%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "2.25", %%% date = "27 September 2023", %%% time = "17:02:58 MDT", %%% filename = "sigplan2010.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% FAX = "+1 801 581 4148", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "08201 139718 750618 7192531", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography, BibTeX, programming languages, %%% SIGPLAN", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE bibliography of ACM SIGPLAN %%% Notices, covering 2010--2019. %%% %%% There are World Wide Web sites for the %%% journal at %%% %%% http://www.acm.org/sigplan/ %%% https://dl.acm.org/loi/sigplan %%% %%% and %%% %%% http://www.rowan.edu/sigplan/ %%% %%% and coverage of about a dozen volumes can be found at %%% %%% http://ftp.informatik.rwth-aachen.de/dblp/db/journals/sigplan/index.html %%% %%% Several conference proceedings are published %%% as volumes of SIGPLAN Notices. Many of them %%% can also be found via the ACM proceedings Web %%% sites: %%% %%% http://www.acm.org/pubs/contents/proceedings/ %%% http://www.acm.org/pubs/contents/proceedings/asplos/ %%% http://www.acm.org/pubs/contents/proceedings/plan/ %%% http://www.acm.org/pubs/contents/proceedings/pldi/ %%% %%% At version 2.25, the COMPLETE year coverage %%% looks like this: %%% %%% 2010 ( 355) 2013 ( 377) 2016 ( 378) %%% 2011 ( 370) 2014 ( 354) 2017 ( 343) %%% 2012 ( 375) 2015 ( 389) 2018 ( 247) %%% %%% Article: 3188 %%% %%% Total entries: 3188 %%% %%% Some of the bibliography entries in this %%% file contain abstracts. These are governed %%% by the ACM Copyright Notice for ACM SIGPLAN %%% Notices, which says: %%% %%% ``Permission to copy without fee all %%% or part of this material is granted %%% provided that the copies are not made %%% or distributed for commercial %%% advantage, the ACM copyright notice %%% and the title of the publication and %%% its date appear, and notice is given %%% that copying is by permission of the %%% Association for Computing Machinery. %%% To copy otherwise, or to republish, %%% requires a fee and/or specific %%% permission.'' %%% %%% Inasmuch as this bibliography, and its %%% companion files in the master collection, %%% is freely distributed without charge, %%% inclusion of article abstracts clearly %%% falls within the copyright permissions, and %%% this author considers that ACM has given %%% the required permission under the terms of %%% the above Copyright Notice. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using bibsort -byvolume. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{ "\input bibnames.sty " # "\input path.sty " # "\def \TM {${}^{\sc TM}$} " # "\ifx \undefined \circled \def \circled #1{(#1)} \fi" # "\ifx \undefined \reg \def \reg {\circled{R}} \fi" # "\hyphenation{ }" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, FAX: +1 801 581 4148, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-SIGPLAN = "ACM SIG{\-}PLAN Notices"} %%% ==================================================================== %%% Publisher abbreviations: @String{pub-ACM = "ACM Press"} @String{pub-ACM:adr = "New York, NY, USA"} @String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"} @String{pub-AW:adr = "Reading, MA, USA"} %%% ==================================================================== %%% Series abbreviations: @String{ser-SIGPLAN = "ACM SIG{\-}PLAN Notices"} %%% ==================================================================== %%% Bibliography entries, in publication order: @Article{Gershenfeld:2010:RAL, author = "Neil Gershenfeld and David Dalrymple and Kailiang Chen and Ara Knaian and Forrest Green and Erik D. Demaine and Scott Greenwald and Peter Schmidt-Nielsen", title = "Reconfigurable asynchronous logic automata: {(RALA)}", journal = j-SIGPLAN, volume = "45", number = "1", pages = "1--6", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Atig:2010:VPW, author = "Mohamed Faouzi Atig and Ahmed Bouajjani and Sebastian Burckhardt and Madanlal Musuvathi", title = "On the verification problem for weak memory models", journal = j-SIGPLAN, volume = "45", number = "1", pages = "7--18", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Koskinen:2010:CGT, author = "Eric Koskinen and Matthew Parkinson and Maurice Herlihy", title = "Coarse-grained transactions", journal = j-SIGPLAN, volume = "45", number = "1", pages = "19--30", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Attiya:2010:SVS, author = "H. Attiya and G. Ramalingam and N. Rinetzky", title = "Sequential verification of serializability", journal = j-SIGPLAN, volume = "45", number = "1", pages = "31--42", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Godefroid:2010:CMM, author = "Patrice Godefroid and Aditya V. Nori and Sriram K. Rajamani and Sai Deep Tetali", title = "Compositional may-must program analysis: unleashing the power of alternation", journal = j-SIGPLAN, volume = "45", number = "1", pages = "43--56", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chaudhuri:2010:CAP, author = "Swarat Chaudhuri and Sumit Gulwani and Roberto Lublinerman", title = "Continuity analysis of programs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "57--70", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Harris:2010:PAS, author = "William R. Harris and Sriram Sankaranarayanan and Franjo Ivan{\v{c}}i{\'c} and Aarti Gupta", title = "Program analysis via satisfiability modulo path programs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "71--82", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tristan:2010:SVV, author = "Jean-Baptiste Tristan and Xavier Leroy", title = "A simple, verified validator for software pipelining", journal = j-SIGPLAN, volume = "45", number = "1", pages = "83--92", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chlipala:2010:VCI, author = "Adam Chlipala", title = "A verified compiler for an impure functional language", journal = j-SIGPLAN, volume = "45", number = "1", pages = "93--106", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Myreen:2010:VJT, author = "Magnus O. Myreen", title = "Verified just-in-time compiler on {x86}", journal = j-SIGPLAN, volume = "45", number = "1", pages = "107--118", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Terauchi:2010:DTC, author = "Tachio Terauchi", title = "Dependent types from counterexamples", journal = j-SIGPLAN, volume = "45", number = "1", pages = "119--130", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rondon:2010:LLL, author = "Patrick Maxim Rondon and Ming Kawaguchi and Ranjit Jhala", title = "Low-level liquid types", journal = j-SIGPLAN, volume = "45", number = "1", pages = "131--144", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Schafer:2010:TID, author = "Max Sch{\"a}fer and Oege de Moor", title = "Type inference for datalog with complex type hierarchies", journal = j-SIGPLAN, volume = "45", number = "1", pages = "145--156", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Henzinger:2010:BQN, author = "Thomas A. Henzinger", title = "From {Boolean} to quantitative notions of correctness", journal = j-SIGPLAN, volume = "45", number = "1", pages = "157--158", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pitts:2010:NS, author = "Andrew M. Pitts", title = "Nominal system {T}", journal = j-SIGPLAN, volume = "45", number = "1", pages = "159--170", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hobor:2010:TIA, author = "Aquinas Hobor and Robert Dockins and Andrew W. Appel", title = "A theory of indirection via approximation", journal = j-SIGPLAN, volume = "45", number = "1", pages = "171--184", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dreyer:2010:RML, author = "Derek Dreyer and Georg Neis and Andreas Rossberg and Lars Birkedal", title = "A relational modal logic for higher-order stateful {ADTs}", journal = j-SIGPLAN, volume = "45", number = "1", pages = "185--198", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Suter:2010:DPA, author = "Philippe Suter and Mirco Dotta and Viktor Kuncak", title = "Decision procedures for algebraic data types with abstractions", journal = j-SIGPLAN, volume = "45", number = "1", pages = "199--210", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Magill:2010:ANA, author = "Stephen Magill and Ming-Hsien Tsai and Peter Lee and Yih-Kuen Tsay", title = "Automatic numeric abstractions for heap-manipulating programs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "211--222", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jost:2010:SDQ, author = "Steffen Jost and Kevin Hammond and Hans-Wolfgang Loidl and Martin Hofmann", title = "Static determination of quantitative resource usage for higher-order programs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "223--236", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Malecha:2010:TVR, author = "Gregory Malecha and Greg Morrisett and Avraham Shinnar and Ryan Wisnesky", title = "Toward a verified relational database management system", journal = j-SIGPLAN, volume = "45", number = "1", pages = "237--248", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Podelski:2010:CGF, author = "Andreas Podelski and Thomas Wies", title = "Counterexample-guided focus", journal = j-SIGPLAN, volume = "45", number = "1", pages = "249--260", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nanevski:2010:SVH, author = "Aleksandar Nanevski and Viktor Vafeiadis and Josh Berdine", title = "Structuring the verification of heap-manipulating programs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "261--274", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jia:2010:DTP, author = "Limin Jia and Jianzhou Zhao and Vilhelm Sj{\"o}berg and Stephanie Weirich", title = "Dependent types and program equivalence", journal = j-SIGPLAN, volume = "45", number = "1", pages = "275--286", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hutchins:2010:PSS, author = "DeLesley S. Hutchins", title = "Pure subtype systems", journal = j-SIGPLAN, volume = "45", number = "1", pages = "287--298", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gay:2010:MST, author = "Simon J. Gay and Vasco T. Vasconcelos and Ant{\'o}nio Ravara and Nils Gesbert and Alexandre Z. Caldeira", title = "Modular session types for distributed object-oriented programming", journal = j-SIGPLAN, volume = "45", number = "1", pages = "299--312", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Srivastava:2010:PVP, author = "Saurabh Srivastava and Sumit Gulwani and Jeffrey S. Foster", title = "From program verification to program synthesis", journal = j-SIGPLAN, volume = "45", number = "1", pages = "313--326", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Vechev:2010:AGS, author = "Martin Vechev and Eran Yahav and Greta Yorsh", title = "Abstraction-guided synthesis of synchronization", journal = j-SIGPLAN, volume = "45", number = "1", pages = "327--338", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bodik:2010:PAN, author = "Rastislav Bodik and Satish Chandra and Joel Galenson and Doug Kimelman and Nicholas Tung and Shaon Barman and Casey Rodarmor", title = "Programming with angelic nondeterminism", journal = j-SIGPLAN, volume = "45", number = "1", pages = "339--352", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Greenberg:2010:CMM, author = "Michael Greenberg and Benjamin C. Pierce and Stephanie Weirich", title = "Contracts made manifest", journal = j-SIGPLAN, volume = "45", number = "1", pages = "353--364", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Siek:2010:TB, author = "Jeremy G. Siek and Philip Wadler", title = "Threesomes, with and without blame", journal = j-SIGPLAN, volume = "45", number = "1", pages = "365--376", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wrigstad:2010:ITU, author = "Tobias Wrigstad and Francesco Zappa Nardelli and Sylvain Lebresne and Johan {\"O}stlund and Jan Vitek", title = "Integrating typed and untyped code in a scripting language", journal = j-SIGPLAN, volume = "45", number = "1", pages = "377--388", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tate:2010:GCO, author = "Ross Tate and Michael Stepp and Sorin Lerner", title = "Generating compiler optimizations from proofs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "389--402", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dias:2010:AGI, author = "Jo{\~a}o Dias and Norman Ramsey", title = "Automatically generating instruction selectors using declarative machine descriptions", journal = j-SIGPLAN, volume = "45", number = "1", pages = "403--416", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jim:2010:SAD, author = "Trevor Jim and Yitzhak Mandelbaum and David Walker", title = "Semantics and algorithms for data-dependent grammars", journal = j-SIGPLAN, volume = "45", number = "1", pages = "417--430", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Broberg:2010:PRB, author = "Niklas Broberg and David Sands", title = "{Paralocks}: role-based information flow control and beyond", journal = j-SIGPLAN, volume = "45", number = "1", pages = "431--444", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bhargavan:2010:MVS, author = "Karthikeyan Bhargavan and C{\'e}dric Fournet and Andrew D. Gordon", title = "Modular verification of security protocol code by typing", journal = j-SIGPLAN, volume = "45", number = "1", pages = "445--456", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Martin:2010:DCO, author = "Jean-Phillipe Martin and Michael Hicks and Manuel Costa and Periklis Akritidis and Miguel Castro", title = "Dynamically checking ownership policies in concurrent {C}\slash {C++} programs", journal = j-SIGPLAN, volume = "45", number = "1", pages = "457--470", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Heizmann:2010:NI, author = "Matthias Heizmann and Jochen Hoenicke and Andreas Podelski", title = "Nested interpolants", journal = j-SIGPLAN, volume = "45", number = "1", pages = "471--482", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Filinski:2010:MA, author = "Andrzej Filinski", title = "Monads in action", journal = j-SIGPLAN, volume = "45", number = "1", pages = "483--494", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kobayashi:2010:HOM, author = "Naoki Kobayashi and Naoshi Tabuchi and Hiroshi Unno", title = "Higher-order multi-parameter tree transducers and recursion schemes for program verification", journal = j-SIGPLAN, volume = "45", number = "1", pages = "495--508", month = jan, year = "2010", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Mar 15 19:13:16 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nikhil:2010:UGP, author = "Rishiyur S. Nikhil", title = "Using {GPCE} principles for hardware systems and accelerators: (bridging the gap to {HW} design)", journal = j-SIGPLAN, volume = "45", number = "2", pages = "1--2", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621608", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Moore's Law has precipitated a crisis in the creation of hardware systems (ASICs and FPGAs)-how to design such enormously complex concurrent systems quickly, reliably and affordably? At the same time, portable devices, the energy crisis, and high performance computing present a related challenge-how to move complex and high-performance algorithms from software into hardware (for more speed and/or energy efficiency)?\par In this talk I will start with a brief technical introduction to BSV, a language that directly addresses these concerns. It uses ideas from Guarded Atomic Actions (cf. Term Rewriting Systems, TLA+, Unity, and EventB) to address complex concurrency with scalability. It borrows from Haskell (types, type classes, higher-order functions) for robustness and powerful program generation (a.k.a. 'static elaboration' to HW designers). And it is fully synthesizable (compilable) into high-quality RTL (Verilog/VHDL). I will then describe some of the remarkable projects that BSV has enabled in industry and academia today.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "Bluespec Systemverilog; BSV; energy efficient computing; FPGA; hardware accelerators; hardware/software codesign; Haskell; high level synthesis; high performance computing; hybrid computing; term rewriting systems", } @Article{Cordy:2010:EOO, author = "James R. Cordy", title = "Eating our own dog food: {DSLs} for generative and transformational engineering", journal = j-SIGPLAN, volume = "45", number = "2", pages = "3--4", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621609", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Languages and systems to support generative and transformational solutions have been around a long time. Systems such as XVCL, DMS, ASF+SDF, Stratego and TXL have proven mature, efficient and effective in a wide range of applications. Even so, adoption remains a serious issue - almost all successful production applications of these systems in practice either involve help from the original authors or years of experience to get rolling. While work on accessibility is active, with efforts such as ETXL, Stratego XT, Rascal and Colm, the fundamental big step remains - it's not obvious how to apply a general purpose transformational system to any given generation or transformation problem, and the real power is in the paradigms of use, not the languages themselves.\par In this talk I will propose an agenda for addressing this problem by taking our own advice - designing and implementing domain specific languages (DSLs) for specific generative, transformational and analysis problem domains. We widely advise end users of the need for DSLs for their kinds of problems - why not for our kinds? And we use our tools for implementing their DSLs - why not our own? I will outline a general method for using transformational techniques to implement transformational and generative DSLs, and review applications of the method to implementing example text-based DSLs for model-based code generation and static code analysis. Finally, I will outline some first steps in implementing model transformation DSLs using the same idea - retaining the maturity and efficiency of our existing tools while bringing them to the masses by 'eating our own dogfood'.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "domain-specific languages; generative programming; model driven engineering; source transformation systems", } @Article{Willcock:2010:RGP, author = "Jeremiah James Willcock and Andrew Lumsdaine and Daniel J. Quinlan", title = "Reusable, generic program analyses and transformations", journal = j-SIGPLAN, volume = "45", number = "2", pages = "5--14", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621611", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The optimizations in modern compilers are constructed for a predetermined set of primitive types. As a result, programmers are unable to exploit optimizations for user-defined types where these optimizations would be correct and beneficial. Moreover, because the set of optimizations is also fixed, programmers are unable to incorporate new optimizations into the compiler. To address these limitations, we apply the reuse methodologies from generic programming to compiler analyses and optimizations. To enable compilers to apply optimizations to classes of types rather than particular types, we define optimizations in terms of generic interface descriptions (similar to C++ concepts or Haskell type classes). By extending these interface descriptions to include associated program analysis and transformation fragments, we enable compilers to incorporate user-defined transformations and analyses. Since these transformations are explicitly associated with interface descriptions, they can be applied in generic fashion by the compiler. We demonstrate that classical compiler optimizations, when generalized using this framework, can apply to a broad range of types, both built-in and user-defined. Finally, we present an initial implementation, the principles of which are generalizable to other compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compiler optimization; generic programming", } @Article{Bagge:2010:ASB, author = "Anya Helene Bagge and Valentin David and Magne Haveraaen", title = "The axioms strike back: testing with concepts and axioms in {C++}", journal = j-SIGPLAN, volume = "45", number = "2", pages = "15--24", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621612", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern development practises encourage extensive testing of code while it is still under development, using unit tests to check individual code units in isolation. Such tests are typically case-based, checking a likely error scenario or an error that has previously been identified and fixed. Coming up with good test cases is challenging, and focusing on individual tests can distract from creating tests that cover the full functionality.\par Axioms, known from program specification, allow for an alternative way of generating test cases, where the intended functionality is described as rules or equations that can be checked automatically. Axioms are proposed as part of the {\em concept\/} feature of the upcoming C++0x standard.\par In this paper, we describe how tests may be generated automatically from axioms in C++ concepts, and supplied with appropriate test data to form effective automated unit tests.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "algebraic specification; axiom-based testing; axioms; C++; C++0x; concepts; generative programming; mouldable programming; program transformation; test generation; unit testing", } @Article{Garcia:2010:TFT, author = "Ronald Garcia and Andrew Lumsdaine", title = "Toward foundations for type-reflective metaprogramming", journal = j-SIGPLAN, volume = "45", number = "2", pages = "25--34", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621613", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "C++ template metaprogramming has been used with great success to build software applications and libraries. In practice, however, template metaprogramming suffers usability, reliability, and capability shortcomings, and it is not well understood in theory. Template metaprogramming has these problems because it relies on emergent properties of disparate language features that were tailored to other purposes. As a step toward solid and sound language support for metaprogramming, this paper establishes firm semantic foundations for select capabilities of template metaprogramming.\par We analyze C++ and the idioms of template metaprogramming and isolate, in a language-neutral fashion, fundamental capabilities of C++ that enable metaprogramming. Guided by this analysis, we present a design for a core calculus that directly expresses fundamental metaprogramming capabilities, including static computation, code generation, and type reflection. We prove a typesafety property for compile-time evaluation of metaprograms. To formally connect the core calculus to programming practice, we present a more convenient surface language for metaprogramming. Its semantics are captured by type-directed translation to the core calculus. We prove that this translation preserves well-typing.\par This idealized presentation averts some of the shortcomings of C++ template metaprogramming and provides a framework for further study.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "c++; metaprogramming; reflection; semantics", } @Article{Sadat-Mohtasham:2010:TPD, author = "Hossein Sadat-Mohtasham and H. James Hoover", title = "Transactional pointcuts: designation reification and advice of interrelated join points", journal = j-SIGPLAN, volume = "45", number = "2", pages = "35--44", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621615", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aspect-oriented mechanisms are characterized by their join point models. A join point model has three components: join points, which are elements of language semantics; 'a means of identifying join points'; and 'a means of affecting the behaviour at those join points.' A pointcut-advice model is a dynamic join point model in which join points are points in program execution. Pointcuts select a set of join points, and advice affects the behaviour of the selected join points. In this model, join points are typically selected and advised independently of each other. That is, the relationships between join points are not taken into account in join point selection and advice. In practice, join points are often not independent. Instead, they form part of a higher-level operation that implements the intent of the developer ({\em e.g.\/} managing a resource). There are natural situations in which join points should be selected only if they play a specific role in that operation.\par We propose a new join point model that takes join point interrelationships into account and allows the designation of more complex computations as join points. Based on the new model, we have designed an aspect-oriented construct called a {\em transactional pointcut (transcut)}. Transcuts select sets of interrelated join points and reify them into higher-level join points that can be advised. They share much of the machinery and intuition of pointcuts, and can be viewed as their natural extension. We have implemented a transcuts prototype as an extension to the AspectJ language and integrated it into the abc compiler. We present an example where a transcut is applied to implement recommended resource handling practices in the presence of exceptions within method boundaries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "aspect-oriented programming; join point model; transactional pointcut", } @Article{Akai:2010:EAS, author = "Shumpei Akai and Shigeru Chiba", title = "Extending {AspectJ} for separating regions", journal = j-SIGPLAN, volume = "45", number = "2", pages = "45--54", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621616", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Synchronization is a good candidate for an aspect in aspect-oriented programming (AOP) since programmers have to choose the best granularity of synchronization for the underlying hardware to obtain the best execution performance. If synchronization is an aspect, programmers can change the synchronization code independently of the rest of the program when the program runs on different hardware. However, existing AOP languages such as AspectJ have problems. They cannot select an arbitrary code region as a join point. Moreover, they cannot enforce weaving of a synchronization aspect. Since it is an alternative feature in feature modeling, at least one of available synchronization aspects must be woven. Otherwise, the program would be thread-unsafe. Since an aspect in AspectJ is inherently optional, programmers must be responsible for weaving it. To solve these problems, this paper proposes two new constructs for AspectJ, {\em regioncut\/} and {\em assertions for advice}. Regioncut selects arbitrary code region as a join point and assertion for advice enforces weaving a mandatory advice. We implemented these constructs by extending the AspectBench compiler. We evaluated the design of our constructs by applying them to two open-source software products, Javassist and Hadoop.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "aspect-oriented programming; feature-oriented programming; region; synchronization", } @Article{Liu:2010:LFI, author = "Yanhong A. Liu and Michael Gorbovitski and Scott D. Stoller", title = "A language and framework for invariant-driven transformations", journal = j-SIGPLAN, volume = "45", number = "2", pages = "55--64", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621617", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes a language and framework that allow coordinated transformations driven by invariants to be specified declaratively, as invariant rules, and applied automatically. The framework supports incremental maintenance of invariants for program design and optimization, as well as general transformations for instrumentation, refactoring, and other purposes. This paper also describes our implementations for transforming Python and C programs and experiments with successful applications of the systems in generating efficient implementations from clear and modular specifications, in instrumenting programs for runtime verification, profiling, and debugging, and in code refactoring.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "incremental maintenance; invariants; program optimization; program transformation; runtime invariant checking", } @Article{Wehr:2010:JBP, author = "Stefan Wehr and Peter Thiemann", title = "{JavaGI} in the battlefield: practical experience with generalized interfaces", journal = j-SIGPLAN, volume = "45", number = "2", pages = "65--74", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621619", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Generalized interfaces are an extension of the interface concept found in object-oriented languages such as Java or C\#. The extension is inspired by Haskell's type classes. It supports retroactive and type-conditional interface implementations, binary methods, symmetric multimethods, interfaces over families of types, and static interface methods.\par This article reports practical experience with generalized interfaces as implemented in the JavaGI language. Several real-world case studies demonstrate how generalized interfaces provide solutions to extension and integration problems with components in binary form, how they make certain design patterns redundant, and how they eliminate various run-time errors. In each case study, the use of JavaGI results in elegant and highly readable code.\par Furthermore, the article discusses the implementation of a compiler and a run-time system for JavaGI. Benchmarks show that our implementation offers acceptable performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "case studies; external methods; JavaGI; multimethods; retroactive interface implementation", } @Article{McGachey:2010:CJC, author = "Phil McGachey and Antony L. Hosking and J. Eliot B. Moss", title = "Classifying {Java} class transformations for pervasive virtualized access", journal = j-SIGPLAN, volume = "45", number = "2", pages = "75--84", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621620", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The indirection of object accesses is a common theme for target domains as diverse as transparent distribution, persistence, and program instrumentation. Virtualizing accesses to fields and methods (by redirecting calls through accessor and indirection methods) allows interposition of arbitrary code, extending the functionality of an application beyond that intended by the original developer.\par We present class modifications performed by our RuggedJ transparent distribution platform for standard Java virtual machines. RuggedJ abstracts over the location of objects by implementing a single object model for local and remote objects. However the implementation of this model is complicated by the presence of native and system code; classes loaded by Java's bootstrap class loader can be rewritten only in a limited manner, and so cannot be modified to conform to RuggedJ's complex object model. We observe that system code comprises the majority of a given Java application: an average of 76\% in the applications we study. We consider the constraints imposed upon pervasive class transformation within Java, and present a framework for systematically rewriting arbitrary applications. Our system accommodates all system classes, allowing both user and system classes alike to be referenced using a single object model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "Java; object model; program transformation", } @Article{Villazon:2010:ARA, author = "Alex Villaz{\'o}n and Walter Binder and Danilo Ansaloni and Philippe Moret", title = "Advanced runtime adaptation for {Java}", journal = j-SIGPLAN, volume = "45", number = "2", pages = "85--94", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621621", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic aspect-oriented programming (AOP) enables runtime adaptation of aspects, which is important for building sophisticated, aspect-based software engineering tools, such as adaptive profilers or debuggers that dynamically modify instrumentation code in response to user interactions. Today, many AOP frameworks for Java, notably AspectJ, focus on aspect weaving at compile-time or at load-time, and offer only limited support for aspect adaptation and reweaving at runtime. In this paper, we introduce HotWave, an AOP framework based on AspectJ for standard Java Virtual Machines (JVMs). HotWave supports dynamic (re)weaving of previously loaded classes, and it ensures that all classes loaded in a JVM can be (re)woven, including the classes of the standard Java class library. HotWave features a novel mechanism for inter-advice communication, enabling efficient data passing between advices that are woven into the same method. We explain HotWave's programming model and discuss our implementation techniques. As case study, we present an adaptive, aspect-based profiler that leverages HotWave's distinguishing features.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "AspectJ; bytecode instrumentation; code hotswapping; dynamic aspect-oriented programming; Java Virtual Machine; runtime aspect adaptation and (re)weaving", } @Article{Villazon:2010:HCA, author = "Alex Villaz{\'o}n and Walter Binder and Danilo Ansaloni and Philippe Moret", title = "{HotWave}: creating adaptive tools with dynamic aspect-oriented programming in {Java}", journal = j-SIGPLAN, volume = "45", number = "2", pages = "95--98", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621622", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing tools for profiling, debugging, testing, and reverse engineering is error-prone, time-consuming, and therefore costly when using low-level techniques, such as bytecode instrumentation. As a solution to these problems, we promote tool development in Java using high-level aspect-oriented programming (AOP). We demonstrate that the use of aspects yields compact tools that are easy to develop and extend. As enabling technology, we rely on HotWave, a new tool for dynamic and comprehensive aspect weaving. HotWave reconciles compatibility with existing virtual machine and AOP technologies. It provides support for runtime adaptation of aspects and reweaving of previously loaded code, as well as the ability to weave aspects into all methods executing in a Java Virtual Machine, including methods in the standard Java class library. HotWave also features a new mechanism for efficiently passing data between advices that are woven into the same method. We demonstrate the benefits of HotWave's distinguishing features with two case studies in the area of profiling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "AspectJ; bytecode instrumentation; code hotswapping; dynamic aspect-oriented programming; Java Virtual Machine; profiling; runtime weaving", } @Article{Heidenreich:2010:GST, author = "Florian Heidenreich and Jendrik Johannes and Mirko Seifert and Christian Wende and Marcel B{\"o}hme", title = "Generating safe template languages", journal = j-SIGPLAN, volume = "45", number = "2", pages = "99--108", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621624", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Template languages are widely used within generative programming, because they provide intuitive means to generate software artefacts expressed in a specific object language. However, most template languages perform template instantiation on the level of string literals, which allows neither syntax checks nor semantics analysis. To make sure that generated artefacts always conform to the object language, we propose to perform static analysis at template design time. In addition, the increasing popularity of domain-specific languages (DSLs) demands an approach that allows to reuse both the concepts of template languages and the corresponding tools.\par In this paper we address the issues mentioned above by presenting how existing languages can be automatically extended with generic template concepts (e.g., placeholders, loops, conditions) to obtain safe template languages. These languages provide means for syntax checking and static semantic analysis w.r.t. the object language at template design time. We discuss the prerequisites for this extension, analyse the types of correctness properties that can be assured at template design time, and exemplify the key benefits of this approach on a textual DSL and Java.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "generative programming; language extension; safe authoring; template language", } @Article{Kong:2010:APT, author = "Soonho Kong and Wontae Choi and Kwangkeun Yi", title = "Abstract parsing for two-staged languages with concatenation", journal = j-SIGPLAN, volume = "45", number = "2", pages = "109--116", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621625", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This article, based on Doh, Kim, and Schmidt's 'abstract parsing' technique, presents an abstract interpretation for statically checking the syntax of generated code in two-staged programs. Abstract parsing is a static analysis technique for checking the syntax of generated strings. We adopt this technique for two-staged programming languages and formulate it in the abstract interpretation framework. We parameterize our analysis with the abstract domain so that one can choose the abstract domain as long as it satisfies the condition we provide. We also present an instance of the abstract domain, namely an abstract parse stack and its widening with k-cutting.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "abstract interpretation; multi-staged languages; parsing; program analysis", } @Article{Nedunuri:2010:SFP, author = "Srinivas Nedunuri and William R. Cook", title = "Synthesis of fast programs for maximum segment sum problems", journal = j-SIGPLAN, volume = "45", number = "2", pages = "117--126", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621626", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is well-known that a naive algorithm can often be turned into an efficient program by applying appropriate semantics-preserving transformations. This technique has been used to derive programs to solve a variety of maximum-sum programs. One problem with this approach is that each problem variation requires a new set of transformations to be derived. An alternative approach to generation combines problem specifications with flexible algorithm theories to derive efficient algorithms. We show how this approach can be implemented in Haskell and applied to solve constraint satisfaction problems. We illustrate this technique by deriving programs for three varieties of maximum-weightsum problem. The derivations of the different programs are similar, and the resulting programs are asymptotically faster in practice than the programs created by transformation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "algorithms; branch-and-bound; formal methods; program synthesis; segment-sum problems", } @Article{Radermacher:2010:GEI, author = "Ansgar Radermacher and Arnaud Cuccuru and Sebastien Gerard and Fran{\c{c}}ois Terrier", title = "Generating execution infrastructures for component-oriented specifications with a model driven toolchain: a case study for {MARTE}'s {GCM} and real-time annotations", journal = j-SIGPLAN, volume = "45", number = "2", pages = "127--136", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621628", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The development of embedded Systems becomes more and more complex. Model driven engineering can help to manage this complexity by specifying real-time properties in a declarative way and automating the deployment. The UML profile MARTE is a OMG standard that allows to model real-time properties. However, there is no execution infrastructure that supports MARTE's generic component model (GCM) and the application modeling (HLAM).\par The contribution of the paper is twofold: it presents a proposition of a component model with flexible interaction support that allows to tailor code generation to domain and target requirements. Second, it will show how MARTE's GCM concepts can be implemented by means of the proposed component model. The proposed component model has been largely developed in the context of the French national project Flex-eWare with the intention to unify major components model, notably the CORBA component model (CCM) and Fractal. The paper explains the major elements of this model in detail and shows how specific connector and containers can implement MARTE specifications. We present the tool support that is integrated into a UML modeler and based on model-to-model and model to text transformations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "component models; connectors; MARTE; middleware; model-driven engineering", } @Article{Cassou:2010:GPA, author = "Damien Cassou and Benjamin Bertran and Nicolas Loriant and Charles Consel", title = "A generative programming approach to developing pervasive computing systems", journal = j-SIGPLAN, volume = "45", number = "2", pages = "137--146", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621629", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing pervasive computing applications is a difficult task because it requires to deal with a wide range of issues: heterogeneous devices, entity distribution, entity coordination, low-level hardware knowledge. \ldots{} Besides requiring various areas of expertise, programming such applications involves writing a lot of administrative code to glue technologies together and to interface with both hardware and software components.\par This paper proposes a generative programming approach to providing programming, execution and simulation support dedicated to the pervasive computing domain. This approach relies on a domain-specific language, named DiaSpec, dedicated to the description of pervasive computing systems. Our generative approach factors out features of distributed systems technologies, making DiaSpec-specified software systems portable.\par The DiaSpec compiler is implemented and has been used to generate dedicated programming frameworks for a variety of pervasive computing applications, including detailed ones to manage the building of an engineering school.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "DSL; generative programming; pervasive computing", } @Article{Jarvi:2010:AUI, author = "Jaakko J{\"a}rvi and Mat Marcus and Sean Parent and John Freeman and Jacob Smith", title = "Algorithms for user interfaces", journal = j-SIGPLAN, volume = "45", number = "2", pages = "147--156", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621630", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "User interfaces for modern applications must support a rich set of interactive features. It is commonplace to find applications with dependencies between values manipulated by user interface elements, conditionally enabled controls, and script record-ability and playback against different documents. A significant fraction of the application programming effort is devoted to implementing such functionality, and the resulting code is typically not reusable.\par This paper extends our 'property models' approach to programming user interfaces. Property models allow a large part of the functionality of a user interface to be implemented in reusable libraries, reducing application specific code to a set of declarative rules. We describe how, as a by-product of computations that maintain the values of user interface elements, property models obtain accurate information of the currently active dependencies among those elements. This information enables further expanding the class of user interface functionality that we can encode as generic algorithms. In particular, we describe automating the decisions for the enablement of user interface widgets and activation of command widgets. Failing to disable or deactivate widgets correctly is a common source of user-interface defects, which our approach largely removes.\par We report on the increased reuse, reduced defect rates, and improved user interface design turnarounds in a commercial software development effort as a result of adopting our approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "component software; constraint systems; declarative specifications; software reuse; user interfaces", } @Article{Kastner:2010:MRP, author = "Christian K{\"a}stner and Sven Apel and Martin Kuhlemann", title = "A model of refactoring physically and virtually separated features", journal = j-SIGPLAN, volume = "45", number = "2", pages = "157--166", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621632", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Physical separation with class refinements and method refinements {\`a} la AHEAD and virtual separation using annotations {\`a} la {\em \#ifdef\/} or CIDE are two competing implementation approaches for software product lines with complementary advantages. Although both approaches have been mainly discussed in isolation, we strive for an integration to leverage the respective advantages. In this paper, we lay the foundation for such an integration by providing a model that supports both physical and virtual separation and by describing refactorings in both directions. We prove the refactorings complete, so every virtually separated product line can be automatically transformed into a physically separated one (replacing annotations by refinements) and vice versa. To demonstrate the feasibility of our approach, we have implemented the refactorings in our tool CIDE and conducted four case studies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "AHEAD; CIDE; FeatureHouse; preprocessor; refinements; separation of concerns; software product lines", } @Article{Sanen:2010:MPS, author = "Frans Sanen and Eddy Truyen and Wouter Joosen", title = "Mapping problem-space to solution-space features: a feature interaction approach", journal = j-SIGPLAN, volume = "45", number = "2", pages = "167--176", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837852.1621633", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mapping problem-space features into solution-space features is a fundamental configuration problem in software product line engineering. A configuration problem is defined as generating the most optimal combination of software features given a requirements specification and given a set of configuration rules. Current approaches however provide little support for expressing complex configuration rules between problem and solution space that support incomplete requirements specifications. In this paper, we propose an approach to model complex configuration rules based on a generalization of the concept of problem-solution feature interactions. These are interactions between solution-space features that only arise in specific problem contexts. The use of an existing tool to support our approach is also discussed: we use the DLV answer set solver to express a particular configuration problem as a logic program whose answer set corresponds to the optimal combinations of solution-space features. We motivate and illustrate our approach with a case study in the field of managing dynamic adaptations in distributed software, where the goal is to generate an optimal protocol for accommodating a given adaptation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "configuration knowledge; default logic; distributed runtime adaptation; DLV; problem-solution feature interactions; software product line engineering", } @Article{Kuhlemann:2010:SCN, author = "Martin Kuhlemann and Don Batory and Christian K{\"a}stner", title = "Safe composition of non-monotonic features", journal = j-SIGPLAN, volume = "45", number = "2", pages = "177--186", month = feb, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1621607.1621634", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:37:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programs can be composed from features. We want to verify automatically that all legal combinations of features can be composed safely without errors. Prior work on this problem assumed that features add code monotonically. We generalize prior work to enable features to add {\em and remove\/} code, describe our analyses and implementation, and review case studies. We observe that more expressive features increase the complexity of developed programs rapidly -- up to the point where tools and automated concepts as presented in this paper are indispensable for verification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "AHEAD; feature-oriented programming; refactoring; safe composition", } @Article{Brewer:2010:TDR, author = "Eric A. Brewer", title = "Technology for developing regions: {Moore}'s law is not enough", journal = j-SIGPLAN, volume = "45", number = "3", pages = "1--2", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736021", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The historic focus of development has rightfully been on macroeconomics and good governance, but technology has an increasingly large role to play. In this talk, I review several novel technologies that we have deployed in India and Africa, and discuss the challenges and opportunities of this new subfield of EECS research. Working with the Aravind Eye Hospital, we are currently supporting doctor / patient videoconferencing in 30+ rural villages; more than 25,000 people have had their blindness cured due to these exams.\par Although Moore's Law has led to great cost reductions and thus enabled new technologies, we have reached essentially the low point for cost: the computing is essentially free compared to the rest of the system. The premium is thus on a combination of (1) deeper integration (fewer compo-nents), (2) shared usage models (even phones are shared), and (3) lower operating costs in terms of power and connectivity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "developing regions; ictd; it for development.", } @Article{Ipek:2010:DRM, author = "Engin Ipek and Jeremy Condit and Edmund B. Nightingale and Doug Burger and Thomas Moscibroda", title = "Dynamically replicated memory: building reliable systems from nanoscale resistive memories", journal = j-SIGPLAN, volume = "45", number = "3", pages = "3--14", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736023", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "DRAM is facing severe scalability challenges in sub-45nm technology nodes due to precise charge placement and sensing hurdles in deep-submicron geometries. Resistive memories, such as phase-change memory (PCM), already scale well beyond DRAM and are a promising DRAM replacement. Unfortunately, PCM is write-limited, and current approaches to managing writes must decommission pages of PCM when the first bit fails.\par This paper presents {\em dynamically replicated memory\/} (DRM), the first hardware and operating system interface designed for PCM that allows {\em continued operation through graceful degradation\/} when hard faults occur. DRM reuses memory pages that contain hard faults by dynamically forming pairs of complementary pages that act as a single page of storage. No changes are required to the processor cores, the cache hierarchy, or the operating system's page tables. By changing the memory controller, the TLBs, and the operating system to be DRM-aware, we can improve the lifetime of PCM by up to 40x over conventional error-detection techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "phase-change memory; write endurance", } @Article{Kirman:2010:PEA, author = "Nevin Kirman and Jos{\'e} F. Mart{\'\i}nez", title = "A power-efficient all-optical on-chip interconnect using wavelength-based oblivious routing", journal = j-SIGPLAN, volume = "45", number = "3", pages = "15--28", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736024", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an all-optical approach to constructing data networks on chip that combines the following key features: (1) Wavelength-based routing, where the route followed by a packet depends solely on the wavelength of its carrier signal, and not on information either contained in the packet or traveling along with it. (2) Oblivious routing, by which the wavelength (and thus the route) employed to connect a source-destination pair is invariant for that pair, and does not depend on ongoing transmissions by other nodes, thereby simplifying design and operation. And (3) passive optical wavelength routers, whose routing pattern is set at design time, which allows for area and power optimizations not generally available to solutions that use dynamic routing. Compared to prior proposals, our evaluation shows that our solution is significantly more power efficient at a similar level of performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "nanophotonics; on-chip network; optical network; wavelength-based oblivious routing", } @Article{Neelakantam:2010:RSE, author = "Naveen Neelakantam and David R. Ditzel and Craig Zilles", title = "A real system evaluation of hardware atomicity for software speculation", journal = j-SIGPLAN, volume = "45", number = "3", pages = "29--38", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736026", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we evaluate the atomic region compiler abstraction by incorporating it into a commercial system. We find that atomic regions are simple and intuitive to integrate into an x86 binary-translation system. Furthermore, doing so trivially enables additional optimization opportunities beyond that achievable by a high-performance dynamic optimizer, which already implements superblocks.\par We show that atomic regions can suffer from severe performance penalties if misspeculations are left uncontrolled, but that a simple software control mechanism is sufficient to reign in all detrimental side-effects. We evaluate using full reference runs of the SPEC CPU2000 integer benchmarks and find that atomic regions enable up to a 9\% (3\% on average) improvement beyond the performance of a tuned product.\par These performance improvements are achieved without any negative side effects. Performance side effects such as code bloat are absent with atomic regions; in fact, static code size is reduced. The hardware necessary is synergistic with other needs and was already available on the commercial product used in our evaluation. Finally, the software complexity is minimal as a single developer was able to incorporate atomic regions into a sophisticated 300,000 line code base in three months, despite never having seen the translator source code beforehand.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "atomicity; checkpoint; dynamic translation; optimization; speculation", } @Article{Harris:2010:DFM, author = "Tim Harris and Sa{\v{s}}a Tomic and Adri{\'a}n Cristal and Osman Unsal", title = "Dynamic filtering: multi-purpose architecture support for language runtime systems", journal = j-SIGPLAN, volume = "45", number = "3", pages = "39--52", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736027", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces a new abstraction to accelerate the read-barriers and write-barriers used by language runtime systems. We exploit the fact that, dynamically, many barrier executions perform checks but no real work -- e.g., in generational garbage collection (GC), frequent checks are needed to detect the creation of inter-generational references, even though such references occur rarely in many workloads. We introduce a form of dynamic filtering that identifies redundant checks by (i) recording checks that have recently been executed, and (ii) detecting when a barrier is repeating one of these checks. We show how this technique can be applied to a variety of algorithms for GC, transactional memory, and language-based security. By supporting dynamic filtering in the instruction set, we show that the fast-paths of these barriers can be streamlined, reducing the impact on the quality of surrounding code. We show how we accelerate the barriers used for generational GC and transactional memory in the Bartok research compiler. With a 2048-entry filter, dynamic filtering eliminates almost all the overhead of the GC write-barriers. Dynamic filtering eliminates around half the overhead of STM over a non-synchronized baseline -- even when used with an STM that is already designed for low overhead, and which employs static analyses to avoid redundant operations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "garbage collection; runtime systems; transactional memory", } @Article{Bergan:2010:CCR, author = "Tom Bergan and Owen Anderson and Joseph Devietti and Luis Ceze and Dan Grossman", title = "{CoreDet}: a compiler and runtime system for deterministic multithreaded execution", journal = j-SIGPLAN, volume = "45", number = "3", pages = "53--64", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736029", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The behavior of a multithreaded program does not depend only on its inputs. Scheduling, memory reordering, timing, and low-level hardware effects all introduce nondeterminism in the execution of multithreaded programs. This severely complicates many tasks, including debugging, testing, and automatic replication. In this work, we avoid these complications by eliminating their root cause: we develop a compiler and runtime system that runs arbitrary multithreaded C/C++ POSIX Threads programs deterministically.\par A trivial nonperformant approach to providing determinism is simply deterministically serializing execution. Instead, we present a compiler and runtime infrastructure that ensures determinism but resorts to serialization rarely, for handling interthread communication and synchronization. We develop two basic approaches, both of which are largely dynamic with performance improved by some static compiler optimizations. First, an ownership-based approach detects interthread communication via an evolving table that tracks ownership of memory regions by threads. Second, a buffering approach uses versioned memory and employs a deterministic commit protocol to make changes visible to other threads. While buffering has larger single-threaded overhead than ownership, it tends to scale better (serializing less often). A hybrid system sometimes performs and scales better than either approach individually.\par Our implementation is based on the LLVM compiler infrastructure. It needs neither programmer annotations nor special hardware. Our empirical evaluation uses the PARSEC and SPLASH2 benchmarks and shows that our approach scales comparably to nondeterministic execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compilers; determinism; multicore; multithreading", } @Article{Raman:2010:SPU, author = "Arun Raman and Hanjun Kim and Thomas R. Mason and Thomas B. Jablin and David I. August", title = "Speculative parallelization using software multi-threaded transactions", journal = j-SIGPLAN, volume = "45", number = "3", pages = "65--76", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736030", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the right techniques, multicore architectures may be able to continue the exponential performance trend that elevated the performance of applications of all types for decades. While many scientific programs can be parallelized without speculative techniques, speculative parallelism appears to be the key to continuing this trend for general-purpose applications. Recently-proposed code parallelization techniques, such as those by Bridges et al. and by Thies et al., demonstrate scalable performance on multiple cores by using speculation to divide code into atomic units (transactions) that span multiple threads in order to expose data parallelism. Unfortunately, most software and hardware Thread-Level Speculation (TLS) memory systems and transactional memories are not sufficient because they only support single-threaded atomic units. Multi-threaded Transactions (MTXs) address this problem, but they require expensive hardware support as currently proposed in the literature. This paper proposes a Software MTX (SMTX) system that captures the {\em applicability\/} and {\em performance\/} of hardware MTX, but on {\em existing multicore machines}. The SMTX system yields a harmonic mean speedup of 13.36x on native hardware with four 6-core processors (24 cores in total) running speculatively parallelized applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "automatic parallelization; loop-level parallelism; multi-threaded transactions; pipelined parallelism; software transactional memory; thread-level speculation", } @Article{Lee:2010:REO, author = "Dongyoon Lee and Benjamin Wester and Kaushik Veeraraghavan and Satish Narayanasamy and Peter M. Chen and Jason Flinn", title = "{Respec}: efficient online multiprocessor replay via speculation and external determinism", journal = j-SIGPLAN, volume = "45", number = "3", pages = "77--90", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736031", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deterministic replay systems record and reproduce the execution of a hardware or software system. While it is well known how to replay uniprocessor systems, replaying shared memory multiprocessor systems at low overhead on commodity hardware is still an open problem. This paper presents Respec, a new way to support deterministic replay of shared memory multithreaded programs on commodity multiprocessor hardware. Respec targets online replay in which the recorded and replayed processes execute concurrently.\par Respec uses two strategies to reduce overhead while still ensuring correctness: speculative logging and externally deterministic replay. Speculative logging optimistically logs less information about shared memory dependencies than is needed to guarantee deterministic replay, then recovers and retries if the replayed process diverges from the recorded process. Externally deterministic replay relaxes the degree to which the two executions must match by requiring only their system output and final program states match. We show that the combination of these two techniques results in low recording and replay overhead for the common case of data-race-free execution intervals and still ensures correct replay for execution intervals that have data races.\par We modified the Linux kernel to implement our techniques. Our software system adds on average about 18\% overhead to the execution time for recording and replaying programs with two threads and 55\% overhead for programs with four threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "external determinism; replay; speculative execution", } @Article{Eyerman:2010:PJS, author = "Stijn Eyerman and Lieven Eeckhout", title = "Probabilistic job symbiosis modeling for {SMT} processor scheduling", journal = j-SIGPLAN, volume = "45", number = "3", pages = "91--102", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736033", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Symbiotic job scheduling boosts simultaneous multithreading (SMT) processor performance by co-scheduling jobs that have `compatible' demands on the processor's shared resources. Existing approaches however require a sampling phase, evaluate a limited number of possible co-schedules, use heuristics to gauge symbiosis, are rigid in their optimization target, and do not preserve system-level priorities/shares.\par This paper proposes probabilistic job symbiosis modeling, which predicts whether jobs will create positive or negative symbiosis when co-scheduled without requiring the co-schedule to be evaluated. The model, which uses per-thread cycle stacks computed through a previously proposed cycle accounting architecture, is simple enough to be used in system software. Probabilistic job symbiosis modeling provides six key innovations over prior work in symbiotic job scheduling: (i) it does not require a sampling phase, (ii) it readjusts the job co-schedule continuously, (iii) it evaluates a large number of possible co-schedules at very low overhead, (iv) it is not driven by heuristics, (v) it can optimize a performance target of interest (e.g., system throughput or job turnaround time), and (vi) it preserves system-level priorities/shares. These innovations make symbiotic job scheduling both practical and effective.\par Our experimental evaluation, which assumes a realistic scenario in which jobs come and go, reports an average 16\% (and up to 35\%) reduction in job turnaround time compared to the previously proposed SOS (sample, optimize, symbios) approach for a two-thread SMT processor, and an average 19\% (and up to 45\%) reduction in job turnaround time for a four-thread SMT processor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "performance modeling; simultaneous multi-threading (SMT); symbiotic job scheduling", } @Article{Shen:2010:RBV, author = "Kai Shen", title = "Request behavior variations", journal = j-SIGPLAN, volume = "45", number = "3", pages = "103--116", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736034", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A large number of user requests execute (often concurrently) within a server system. A single request may exhibit fluctuating hardware characteristics (such as instruction completion rate and on-chip resource usage) over the course of its execution, due to inherent variations in application execution semantics as well as dynamic resource competition on resource-sharing processors like multicores. Understanding such behavior variations can assist fine-grained request modeling and adaptive resource management.\par This paper presents operating system management to track request behavior variations online. In addition to metric sample collection during periodic interrupts, we exploit the frequent system calls in server applications to perform low-cost in-kernel sampling. We utilize identified behavior variations to support or enhance request modeling in request classification, anomaly analysis, and online request signature construction. A foundation of our request modeling is the ability to quantify the difference between two requests' time series behaviors. We evaluate several differencing measures and enhance the classic dynamic time warping technique with additional penalties for asynchronous warp steps. Finally, motivated by fluctuating request resource usage and the resulting contention, we implement contention-easing CPU scheduling on multicore platforms and demonstrate its effectiveness in improving the worst-case request performance.\par Experiments in this paper are based on five server applications -- Apache web server, TPCC, TPCH, RUBiS online auction benchmark, and a user-content-driven online teaching application called WeBWorK.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "hardware counter; multicore; operating system adaptation; request modeling; server system", } @Article{Johnson:2010:DCM, author = "F. Ryan Johnson and Radu Stoica and Anastasia Ailamaki and Todd C. Mowry", title = "Decoupling contention management from scheduling", journal = j-SIGPLAN, volume = "45", number = "3", pages = "117--128", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736035", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many parallel applications exhibit unpredictable communication between threads, leading to contention for shared objects. The choice of contention management strategy impacts strongly the performance and scalability of these applications: spinning provides maximum performance but wastes significant processor resources, while blocking-based approaches conserve processor resources but introduce high overheads on the critical path of computation. Under situations of high or changing load, the operating system complicates matters further with arbitrary scheduling decisions which often preempt lock holders, leading to long serialization delays until the preempted thread resumes execution.\par We observe that contention management is orthogonal to the problems of scheduling and load management and propose to decouple them so each may be solved independently and effectively. To this end, we propose a load control mechanism which manages the number of active threads in the system separately from any contention which may exist. By isolating contention management from damaging interactions with the OS scheduler, we combine the efficiency of spinning with the robustness of blocking. The proposed load control mechanism results in stable, high performance for both lightly and heavily loaded systems, requires no special privileges or modifications at the OS level, and can be implemented as a library which benefits existing code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "blocking; concurrency control; contention; load management; multicore; scheduling; spinning; threads", } @Article{Zhuravlev:2010:ASR, author = "Sergey Zhuravlev and Sergey Blagodurov and Alexandra Fedorova", title = "Addressing shared resource contention in multicore processors via scheduling", journal = j-SIGPLAN, volume = "45", number = "3", pages = "129--142", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736036", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Contention for shared resources on multicore processors remains an unsolved problem in existing systems despite significant research efforts dedicated to this problem in the past. Previous solutions focused primarily on hardware techniques and software page coloring to mitigate this problem. Our goal is to investigate how and to what extent contention for shared resource can be mitigated via thread scheduling. Scheduling is an attractive tool, because it does not require extra hardware and is relatively easy to integrate into the system. Our study is the first to provide a comprehensive analysis of contention-mitigating techniques that use only scheduling. The most difficult part of the problem is to find a classification scheme for threads, which would determine how they affect each other when competing for shared resources. We provide a comprehensive analysis of such classification schemes using a newly proposed methodology that enables to evaluate these schemes separately from the scheduling algorithm itself and to compare them to the optimal. As a result of this analysis we discovered a classification scheme that addresses not only contention for cache space, but contention for other shared resources, such as the memory controller, memory bus and prefetching hardware. To show the applicability of our analysis we design a new scheduling algorithm, which we prototype at user level, and demonstrate that it performs within 2\\% of the optimal. We also conclude that the highest impact of contention-aware scheduling techniques is not in improving performance of a workload as a whole but in improving quality of service or performance isolation for individual applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "multicore processors; scheduling; shared resource contention", } @Article{Yuan:2010:SED, author = "Ding Yuan and Haohui Mai and Weiwei Xiong and Lin Tan and Yuanyuan Zhou and Shankar Pasupathy", title = "{SherLog}: error diagnosis by connecting clues from run-time logs", journal = j-SIGPLAN, volume = "45", number = "3", pages = "143--154", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736038", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computer systems often fail due to many factors such as software bugs or administrator errors. Diagnosing such production run failures is an important but challenging task since it is difficult to reproduce them in house due to various reasons: (1) unavailability of users' inputs and file content due to privacy concerns; (2) difficulty in building the exact same execution environment; and (3) non-determinism of concurrent executions on multi-processors.\par Therefore, programmers often have to diagnose a production run failure based on logs collected back from customers and the corresponding source code. Such diagnosis requires expert knowledge and is also too time-consuming, tedious to narrow down root causes. To address this problem, we propose a tool, called SherLog, that analyzes source code by leveraging information provided by run-time logs to infer what must or may have happened during the failed production run. It requires neither re-execution of the program nor knowledge on the log's semantics. It infers both control and data value information regarding to the failed execution.\par We evaluate SherLog with 8 representative {\em real world\/} software failures (6 software bugs and 2 configuration errors) from 7 applications including 3 servers. Information inferred by SherLog are very useful for programmers to diagnose these evaluated failures. Our results also show that SherLog can analyze large server applications such as Apache with thousands of logging messages within only 40 minutes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "failure diagnostics; log; static analysis", } @Article{Weeratunge:2010:AMD, author = "Dasarath Weeratunge and Xiangyu Zhang and Suresh Jagannathan", title = "Analyzing multicore dumps to facilitate concurrency bug reproduction", journal = j-SIGPLAN, volume = "45", number = "3", pages = "155--166", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736039", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging concurrent programs is difficult. This is primarily because the inherent non-determinism that arises because of scheduler interleavings makes it hard to easily reproduce bugs that may manifest only under certain interleavings. The problem is exacerbated in multi-core environments where there are multiple schedulers, one for each core. In this paper, we propose a reproduction technique for concurrent programs that execute on multi-core platforms. Our technique performs a lightweight analysis of a failing execution that occurs in a multi-core environment, and uses the result of the analysis to enable reproduction of the bug in a single-core system, under the control of a deterministic scheduler.\par More specifically, our approach automatically identifies the execution point in the re-execution that corresponds to the failure point. It does so by analyzing the failure core dump and leveraging a technique called {\em execution indexing\/} that identifies a related point in the re-execution. By generating a core dump at this point, and comparing the differences between the two dumps, we are able to guide a search algorithm to efficiently generate a failure inducing schedule. Our experiments show that our technique is highly effective and has reasonable overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrency bugs; execution indexing; multi-core; reproduction", } @Article{Burckhardt:2010:RSP, author = "Sebastian Burckhardt and Pravesh Kothari and Madanlal Musuvathi and Santosh Nagarakatte", title = "A randomized scheduler with probabilistic guarantees of finding bugs", journal = j-SIGPLAN, volume = "45", number = "3", pages = "167--178", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736040", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a randomized scheduler for finding concurrency bugs. Like current stress-testing methods, it repeatedly runs a given test program with supplied inputs. However, it improves on stress-testing by finding buggy schedules more effectively and by quantifying the probability of missing concurrency bugs. Key to its design is the characterization of the depth of a concurrency bug as the minimum number of scheduling constraints required to find it. In a single run of a program with {\em n\/} threads and {\em k\/} steps, our scheduler detects a concurrency bug of depth {\em d\/} with probability at least 1/ {\em nk\/}$^{d-1}$. We hypothesize that in practice, many concurrency bugs (including well-known types such as ordering errors, atomicity violations, and deadlocks) have small bug-depths, and we confirm the efficiency of our schedule randomization by detecting previously unknown and known concurrency bugs in several production-scale concurrent programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrency; race conditions; randomized algorithms; testing", } @Article{Zhang:2010:CDS, author = "Wei Zhang and Chong Sun and Shan Lu", title = "{ConMem}: detecting severe concurrency bugs through an effect-oriented approach", journal = j-SIGPLAN, volume = "45", number = "3", pages = "179--192", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736041", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multicore technology is making concurrent programs increasingly pervasive. Unfortunately, it is difficult to deliver reliable concurrent programs, because of the huge and non-deterministic interleaving space. In reality, without the resources to thoroughly check the interleaving space, critical concurrency bugs can slip into production runs and cause failures in the field. Approaches to making the best use of the limited resources and exposing severe concurrency bugs before software release would be desirable.\par Unlike previous work that focuses on bugs caused by specific interleavings (e.g., races and atomicity-violations), this paper targets concurrency bugs that result in one type of severe effects: program crashes. Our study of the error-propagation process of realworld concurrency bugs reveals a common pattern (50\% in our non-deadlock concurrency bug set) that is highly correlated with program crashes. We call this pattern concurrency-memory bugs: buggy interleavings directly cause memory bugs (NULL-pointer-dereference, dangling-pointer, buffer-overflow, uninitialized-read) on shared memory objects.\par Guided by this study, we built ConMem to monitor program execution, analyze memory accesses and synchronizations, and predicatively detect these common and severe concurrency-memory bugs. We also built a validator ConMem-v to automatically prune false positives by enforcing potential bug-triggering interleavings.\par We evaluated ConMem using 7 open-source programs with 9 real-world severe concurrency bugs. ConMem detects more tested bugs (8 out of 9 bugs) than a lock-set-based race detector and an unserializable-interleaving detector that detect 4 and 5 bugs respectively, with a false positive rate about one tenth of the compared tools. ConMem-v further prunes out all the false positives. ConMem has reasonable overhead suitable for development usage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrency bugs; software testing", } @Article{Mesa-Martinez:2010:CPT, author = "Francisco Javier Mesa-Martinez and Ehsan K. Ardestani and Jose Renau", title = "Characterizing processor thermal behavior", journal = j-SIGPLAN, volume = "45", number = "3", pages = "193--204", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736043", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Temperature is a dominant factor in the performance, reliability, and leakage power consumption of modern processors. As a result, increasing numbers of researchers evaluate thermal characteristics in their proposals. In this paper, we measure a real processor focusing on its thermal characterization executing diverse workloads.\par Our results show that in real designs, thermal transients operate at larger scales than their performance and power counterparts. Conventional thermal simulation methodologies based on profile-based simulation or statistical sampling, such as Simpoint, tend to explore very limited execution spans. Short simulation times can lead to reduced matchings between performance and thermal phases. To illustrate these issues we characterize and classify from a thermal standpoint SPEC00 and SPEC06 applications, which are traditionally used in the evaluation of architectural proposals. This paper concludes with a list of recommendations regarding thermal modeling considerations based on our experimental insights.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "microarchitecture; temperature; thermal simulation", } @Article{Venkatesh:2010:CCR, author = "Ganesh Venkatesh and Jack Sampson and Nathan Goulding and Saturnino Garcia and Vladyslav Bryksin and Jose Lugo-Martinez and Steven Swanson and Michael Bedford Taylor", title = "Conservation cores: reducing the energy of mature computations", journal = j-SIGPLAN, volume = "45", number = "3", pages = "205--218", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736044", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Growing transistor counts, limited power budgets, and the breakdown of voltage scaling are currently conspiring to create a {\em utilization wall\/} that limits the fraction of a chip that can run at full speed at one time. In this regime, specialized, energy-efficient processors can increase parallelism by reducing the per-computation power requirements and allowing more computations to execute under the same power budget. To pursue this goal, this paper introduces {\em conservation cores}. Conservation cores, or {\em c-cores}, are specialized processors that focus on reducing energy and energy-delay instead of increasing performance. This focus on energy makes c-cores an excellent match for many applications that would be poor candidates for hardware acceleration (e.g., irregular integer codes). We present a toolchain for automatically synthesizing c-cores from application source code and demonstrate that they can significantly reduce energy and energy-delay for a wide range of applications. The c-cores support patching, a form of targeted reconfigurability, that allows them to adapt to new versions of the software they target. Our results show that conservation cores can reduce energy consumption by up to 16.0x for functions and by up to 2.1x for whole applications, while patching can extend the useful lifetime of individual c-cores to match that of conventional processors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "conservation core; heterogeneous many-core; patching; utilization wall", } @Article{Sudan:2010:MPI, author = "Kshitij Sudan and Niladrish Chatterjee and David Nellans and Manu Awasthi and Rajeev Balasubramonian and Al Davis", title = "Micro-pages: increasing {DRAM} efficiency with locality-aware data placement", journal = j-SIGPLAN, volume = "45", number = "3", pages = "219--230", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736045", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Power consumption and DRAM latencies are serious concerns in modern chip-multiprocessor (CMP or multi-core) based compute systems. The management of the DRAM row buffer can significantly impact both power consumption and latency. Modern DRAM systems read data from cell arrays and populate a row buffer as large as 8 KB on a memory request. But only a small fraction of these bits are ever returned back to the CPU. This ends up wasting energy and time to read (and subsequently write back) bits which are used rarely. Traditionally, an open-page policy has been used for uni-processor systems and it has worked well because of spatial and temporal locality in the access stream. In future multi-core processors, the possibly independent access streams of each core are interleaved, thus destroying the available locality and significantly under-utilizing the contents of the row buffer. In this work, we attempt to improve row-buffer utilization for future multi-core systems.\par The schemes presented here are motivated by our observations that a large number of accesses within heavily accessed OS pages are to small, contiguous 'chunks' of cache blocks. Thus, the co-location of chunks (from different OS pages) in a row-buffer will improve the overall utilization of the row buffer contents, and consequently reduce memory energy consumption and access time. Such co-location can be achieved in many ways, notably involving a reduction in OS page size and software or hardware assisted migration of data within DRAM. We explore these mechanisms and discuss the trade-offs involved along with energy and performance improvements from each scheme. On average, for applications with room for improvement, our best performing scheme increases performance by 9\% (max. 18\%) and reduces memory energy consumption by 15\% (max. 70\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data placement; dram row-buffer management", } @Article{Pelley:2010:PRD, author = "Steven Pelley and David Meisner and Pooya Zandevakili and Thomas F. Wenisch and Jack Underwood", title = "Power routing: dynamic power provisioning in the data center", journal = j-SIGPLAN, volume = "45", number = "3", pages = "231--242", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735971.1736047", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data center power infrastructure incurs massive capital costs, which typically exceed energy costs over the life of the facility. To squeeze maximum value from the infrastructure, researchers have proposed over-subscribing power circuits, relying on the observation that peak loads are rare. To ensure availability, these proposals employ power capping, which throttles server performance during utilization spikes to enforce safe power budgets. However, because budgets must be enforced locally -- at each power distribution unit (PDU) -- local utilization spikes may force throttling even when power delivery capacity is available elsewhere. Moreover, the need to maintain reserve capacity for fault tolerance on power delivery paths magnifies the impact of utilization spikes.\par In this paper, we develop mechanisms to better utilize installed power infrastructure, reducing reserve capacity margins and avoiding performance throttling. Unlike conventional high-availability data centers, where collocated servers share identical primary and secondary power feeds, we reorganize power feeds to create shuffled power distribution topologies. Shuffled topologies spread secondary power feeds over numerous PDUs, reducing reserve capacity requirements to tolerate a single PDU failure. Second, we propose Power Routing, which schedules IT load dynamically across redundant power feeds to: (1) shift slack to servers with growing power demands, and (2) balance power draw across AC phases to reduce heating and improve electrical stability. We describe efficient heuristics for scheduling servers to PDUs (an NP-complete problem). Using data collected from nearly 1000 servers in three production facilities, we demonstrate that these mechanisms can reduce the required power infrastructure capacity relative to conventional high-availability data centers by 32\% without performance degradation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data centers; power infrastructure", } @Article{Ahmad:2010:JOI, author = "Faraz Ahmad and T. N. Vijaykumar", title = "Joint optimization of idle and cooling power in data centers while maintaining response time", journal = j-SIGPLAN, volume = "45", number = "3", pages = "243--256", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735971.1736048", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Server power and cooling power amount to a significant fraction of modern data centers' recurring costs. While data centers provision enough servers to guarantee response times under the maximum loading, data centers operate under much less loading most of the times (e.g., 30-70\% of the maximum loading). Previous server-power proposals exploit this under-utilization to reduce the server idle power by keeping active only as many servers as necessary and putting the rest into low-power standby modes. However, these proposals incur higher cooling power due to hot spots created by concentrating the data center loading on fewer active servers, or degrade response times due to standby-to-active transition delays, or both. Other proposals optimize the cooling power but incur considerable idle power. To address the first issue of power, we propose {\em PowerTrade}, which trades-off idle power and cooling power for each other, thereby reducing the total power. To address the second issue of response time, we propose {\em SurgeGuard\/} to overprovision the number of active servers beyond that needed by the current loading so as to absorb future increases in the loading. SurgeGuard is a two-tier scheme which uses well-known over-provisioning at coarse time granularities (e.g., one hour) to absorb the common, smooth increases in the loading, and a novel fine-grain replenishment of the over-provisioned reserves at fine time granularities (e.g., five minutes) to handle the uncommon, abrupt loading surges. Using real-world traces, we show that combining PowerTrade and SurgeGuard reduces total power by 30\% compared to previous low-power schemes while maintaining response times within 1.7\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cooling power; data center; idle power; power management; response time", } @Article{Goodstein:2010:BAA, author = "Michelle L. Goodstein and Evangelos Vlachos and Shimin Chen and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry", title = "Butterfly analysis: adapting dataflow analysis to dynamic parallel monitoring", journal = j-SIGPLAN, volume = "45", number = "3", pages = "257--270", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735971.1736050", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Online program monitoring is an effective technique for detecting bugs and security attacks in running applications. Extending these tools to monitor parallel programs is challenging because the tools must account for inter-thread dependences and relaxed memory consistency models. Existing tools assume sequential consistency and often slow down the monitored program by orders of magnitude. In this paper, we present a novel approach that avoids these pitfalls by not relying on strong consistency models or detailed inter-thread dependence tracking. Instead, we only assume that events in the distant past on all threads have become visible; we make no assumptions on (and avoid the overheads of tracking) the relative ordering of more recent events on other threads. To overcome the potential state explosion of considering all the possible orderings among recent events, we adapt two techniques from static dataflow analysis, reaching definitions and reaching expressions, to this new domain of dynamic parallel monitoring. Significant modifications to these techniques are proposed to ensure the correctness and efficiency of our approach. We show how our adapted analysis can be used in two popular memory and security tools. We prove that our approach does not miss errors, and sacrifices precision only due to the lack of a relative ordering among recent events. Moreover, our simulation study on a collection of Splash-2 and Parsec 2.0 benchmarks running a memory-checking tool on a hardware-assisted logging platform demonstrates the potential benefits in trading off a very low false positive rate for (i) reduced overhead and (ii) the ability to run on relaxed consistency models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data flow analysis; dynamic program monitoring; parallel programming; static analysis", } @Article{Vlachos:2010:PEA, author = "Evangelos Vlachos and Michelle L. Goodstein and Michael A. Kozuch and Shimin Chen and Babak Falsafi and Phillip B. Gibbons and Todd C. Mowry", title = "{ParaLog}: enabling and accelerating online parallel monitoring of multithreaded applications", journal = j-SIGPLAN, volume = "45", number = "3", pages = "271--284", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736051", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "{\em Instruction-grain lifeguards\/} monitor the events of a running application at the level of individual instructions in order to identify and help mitigate application bugs and security exploits. Because such lifeguards impose a 10-100X slowdown on existing platforms, previous studies have proposed hardware designs to accelerate lifeguard processing. However, these accelerators are either tailored to a specific class of lifeguards or suitable only for monitoring single-threaded programs.\par We present ParaLog, the first design of a system enabling fast online parallel monitoring of multithreaded parallel applications. ParaLog supports a broad class of software-defined lifeguards. We show how three existing accelerators can be enhanced to support online multithreaded monitoring, dramatically reducing lifeguard overheads. We identify and solve several challenges in monitoring parallel applications and/or parallelizing these accelerators, including (i) enforcing inter-thread data dependences, (ii) dealing with inter-thread effects that are not reflected in coherence traffic, (iii) dealing with unmonitored operating system activity, and (iv) ensuring lifeguards can access shared metadata with negligible synchronization overheads. We present our system design for both Sequentially Consistent and Total Store Ordering processors. We implement and evaluate our design on a 16 core simulated CMP, using benchmarks from SPLASH-2 and PARSEC and two lifeguards: a data-flow tracking lifeguard and a memory-access checker lifeguard. Our results show that (i) our parallel accelerators improve performance by 2-9X and 1.13-3.4X for our two lifeguards, respectively, (ii) we are 5-126X faster than the time-slicing approach required by existing techniques, and (iii) our average overheads for applications with eight threads are 51\% and 28\% for the two lifeguards, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "hardware support for debugging; instruction-grain lifeguards; online parallel monitoring", } @Article{Hormati:2010:MMS, author = "Amir H. Hormati and Yoonseo Choi and Mark Woh and Manjunath Kudlur and Rodric Rabbah and Trevor Mudge and Scott Mahlke", title = "{MacroSS}: macro-{SIMD}ization of streaming applications", journal = j-SIGPLAN, volume = "45", number = "3", pages = "285--296", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736053", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "SIMD (Single Instruction, Multiple Data) engines are an essential part of the processors in various computing markets, from servers to the embedded domain. Although SIMD-enabled architectures have the capability of boosting the performance of many application domains by exploiting data-level parallelism, it is very challenging for compilers and also programmers to identify and transform parts of a program that will benefit from a particular SIMD engine. The focus of this paper is on the problem of SIMDization for the growing application domain of streaming. Streaming applications are an ideal solution for targeting multi-core architectures, such as shared/distributed memory systems, tiled architectures, and single-core systems. Since these architectures, in most cases, provide SIMD acceleration units as well, it is highly beneficial to generate SIMD code from streaming programs. Specifically, we introduce MacroSS, which is capable of performing macro-SIMDization on high-level streaming graphs. Macro-SIMDization uses high-level information such as execution rates of actors and communication patterns between them to transform the graph structure, vectorize actors of a streaming program, and generate intermediate code. We also propose low-overhead architectural modifications that accelerate shuffling of data elements between the scalar and vectorized parts of a streaming program. Our experiments show that MacroSS is capable of generating code that, on average, outperforms scalar code compiled with the current state-of-art auto-vectorizing compilers by 54\%. Using the low-overhead data shuffling hardware, performance is improved by an additional 8\% with less than 1\% area overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compiler; optimization; SIMD architecture; streaming", } @Article{Woo:2010:CPD, author = "Dong Hyuk Woo and Hsien-Hsin S. Lee", title = "{COMPASS}: a programmable data prefetcher using idle {GPU} shaders", journal = j-SIGPLAN, volume = "45", number = "3", pages = "297--310", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735971.1736054", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A traditional fixed-function graphics accelerator has evolved into a programmable general-purpose graphics processing unit over the last few years. These powerful computing cores are mainly used for accelerating graphics applications or enabling low-cost scientific computing. To further reduce the cost and form factor, an emerging trend is to integrate GPU along with the memory controllers onto the same die with the processor cores. However, given such a system-on-chip, the GPU, while occupying a substantial part of the silicon, will sit idle and contribute nothing to the overall system performance when running non-graphics workloads or applications lack of data-level parallelism. In this paper, we propose COMPASS, a compute shader-assisted data prefetching scheme, to leverage the GPU resource for improving single-threaded performance on an integrated system. By harnessing the GPU shader cores with very lightweight architectural support, COMPASS can emulate the functionality of a hardware-based prefetcher using the idle GPU and successfully improve the memory performance of single-thread applications. Moreover, thanks to its flexibility and programmability, one can implement the best performing prefetch scheme to improve each specific application as demonstrated in this paper. With COMPASS, we envision that a future application vendor can provide a custom-designed COMPASS shader bundled with its software to be loaded at runtime to optimize the performance. Our simulation results show that COMPASS can improve the single-thread performance of memory-intensive applications by 68\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compute shader; GPU; prefetch", } @Article{Sanchez:2010:FAS, author = "Daniel Sanchez and Richard M. Yoo and Christos Kozyrakis", title = "Flexible architectural support for fine-grain scheduling", journal = j-SIGPLAN, volume = "45", number = "3", pages = "311--322", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736055", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To make efficient use of CMPs with tens to hundreds of cores, it is often necessary to exploit fine-grain parallelism. However, managing tasks of a few thousand instructions is particularly challenging, as the runtime must ensure load balance without compromising locality and introducing small overheads. Software-only schedulers can implement various scheduling algorithms that match the characteristics of different applications and programming models, but suffer significant overheads as they synchronize and communicate task information over the deep cache hierarchy of a large-scale CMP. To reduce these costs, hardware-only schedulers like Carbon, which implement task queuing and scheduling in hardware, have been proposed. However, a hardware-only solution fixes the scheduling algorithm and leaves no room for other uses of the custom hardware.\par This paper presents a combined hardware-software approach to build fine-grain schedulers that retain the flexibility of software schedulers while being as fast and scalable as hardware ones. We propose asynchronous direct messages (ADM), a simple architectural extension that provides direct exchange of asynchronous, short messages between threads in the CMP without going through the memory hierarchy. ADM is sufficient to implement a family of novel, software-mostly schedulers that rely on low-overhead messaging to efficiently coordinate scheduling and transfer task information. These schedulers match and often exceed the performance and scalability of Carbon when using the same scheduling algorithm. When the ADM runtime tailors its scheduling algorithm to application characteristics, it outperforms Carbon by up to 70\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "chip-multiprocessors; fine-grain scheduling; many-core; messaging; scheduling; work-stealing", } @Article{Romanescu:2010:SDV, author = "Bogdan F. Romanescu and Alvin R. Lebeck and Daniel J. Sorin", title = "Specifying and dynamically verifying address translation-aware memory consistency", journal = j-SIGPLAN, volume = "45", number = "3", pages = "323--334", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736057", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computer systems with virtual memory are susceptible to design bugs and runtime faults in their address translation (AT) systems. Detecting bugs and faults requires a clear specification of correct behavior. To address this need, we develop a framework for AT-aware memory consistency models. We expand and divide memory consistency into the physical address memory consistency (PAMC) model that defines the behavior of operations on physical addresses and the virtual address memory consistency (VAMC) model that defines the behavior of operations on virtual addresses. As part of this expansion, we show what AT features are required to bridge the gap between PAMC and VAMC. Based on our AT-aware memory consistency specifications, we design efficient dynamic verification hardware that can detect violations of VAMC and thus detect the effects of design bugs and runtime faults, including most AT related bugs in published errata.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "address translation; dynamic verification; memory consistency; virtual memory", } @Article{Ebrahimi:2010:FST, author = "Eiman Ebrahimi and Chang Joo Lee and Onur Mutlu and Yale N. Patt", title = "Fairness via source throttling: a configurable and high-performance fairness substrate for multi-core memory systems", journal = j-SIGPLAN, volume = "45", number = "3", pages = "335--346", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736058", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cores in a chip-multiprocessor (CMP) system share multiple hardware resources in the memory subsystem. If resource sharing is unfair, some applications can be delayed significantly while others are unfairly prioritized. Previous research proposed separate fairness mechanisms in each individual resource. Such resource-based fairness mechanisms implemented independently in each resource can make contradictory decisions, leading to low fairness and loss of performance. Therefore, a coordinated mechanism that provides fairness in the entire shared memory system is desirable.\par This paper proposes a new approach that provides fairness in the {\em entire shared memory system}, thereby eliminating the need for and complexity of developing fairness mechanisms for each individual resource. Our technique, Fairness via Source Throttling (FST), estimates the unfairness in the entire shared memory system. If the estimated unfairness is above a threshold set by system software, FST throttles down cores causing unfairness by limiting the number of requests they can inject into the system and the frequency at which they do. As such, our {\em source-based\/} fairness control ensures fairness decisions are made in tandem in the entire memory system. FST also enforces thread priorities/weights, and enables system software to enforce different fairness objectives and fairness-performance tradeoffs in the memory system.\par Our evaluations show that FST provides the best system fairness and performance compared to four systems with no fairness control and with state-of-the-art fairness mechanisms implemented in both shared caches and memory controllers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "fairness; multi-core systems; shared memory systems; system performance", } @Article{Gelado:2010:ADS, author = "Isaac Gelado and Javier Cabezas and Nacho Navarro and John E. Stone and Sanjay Patel and Wen-mei W. Hwu", title = "An asymmetric distributed shared memory model for heterogeneous parallel systems", journal = j-SIGPLAN, volume = "45", number = "3", pages = "347--358", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736059", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous computing combines general purpose CPUs with accelerators to efficiently execute both sequential control-intensive and data-parallel phases of applications. Existing programming models for heterogeneous computing rely on programmers to explicitly manage data transfers between the CPU system memory and accelerator memory.\par This paper presents a new programming model for heterogeneous computing, called Asymmetric Distributed Shared Memory (ADSM), that maintains a shared logical memory space for CPUs to access objects in the accelerator physical memory but not vice versa. The asymmetry allows light-weight implementations that avoid common pitfalls of symmetrical distributed shared memory systems. ADSM allows programmers to assign data objects to performance critical methods. When a method is selected for accelerator execution, its associated data objects are allocated within the shared logical memory space, which is hosted in the accelerator physical memory and transparently accessible by the methods executed on CPUs.\par We argue that ADSM reduces programming efforts for heterogeneous computing systems and enhances application portability. We present a software implementation of ADSM, called GMAC, on top of CUDA in a GNU/Linux environment. We show that applications written in ADSM and running on top of GMAC achieve performance comparable to their counterparts using programmer-managed data transfers. This paper presents the GMAC system and evaluates different design choices. We further suggest additional architectural support that will likely allow GMAC to achieve higher application performance than the current CUDA model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "asymmetric distributed shared memory; data-centric programming models; heterogeneous systems", } @Article{Bhattacharjee:2010:ICC, author = "Abhishek Bhattacharjee and Margaret Martonosi", title = "Inter-core cooperative {TLB} for chip multiprocessors", journal = j-SIGPLAN, volume = "45", number = "3", pages = "359--370", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736060", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Translation Lookaside Buffers (TLBs) are commonly employed in modern processor designs and have considerable impact on overall system performance. A number of past works have studied TLB designs to lower access times and miss rates, specifically for uniprocessors. With the growing dominance of chip multiprocessors (CMPs), it is necessary to examine TLB performance in the context of parallel workloads.\par This work is the first to present TLB prefetchers that exploit commonality in TLB miss patterns across cores in CMPs. We propose and evaluate two Inter-Core Cooperative (ICC) TLB prefetching mechanisms, assessing their effectiveness at eliminating TLB misses both individually and together. Our results show these approaches require at most modest hardware and can collectively eliminate 19\% to 90\% of data TLB (D-TLB) misses across the surveyed parallel workloads.\par We also compare performance improvements across a range of hardware and software implementation possibilities. We find that while a fully-hardware implementation results in average performance improvements of 8-46\% for a range of TLB sizes, a hardware/software approach yields improvements of 4-32\%. Overall, our work shows that TLB prefetchers exploiting inter-core correlations can effectively eliminate TLB misses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "parallelism; prefetching; translation lookaside buffer", } @Article{Huang:2010:OES, author = "Ruirui Huang and Daniel Y. Deng and G. Edward Suh", title = "{Orthrus}: efficient software integrity protection on multi-cores", journal = j-SIGPLAN, volume = "45", number = "3", pages = "371--384", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736062", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes an efficient hardware/software system that significantly enhances software security through diversified replication on multi-cores. Recent studies show that a large class of software attacks can be detected by running multiple versions of a program simultaneously and checking the consistency of their behaviors. However, execution of multiple replicas incurs significant overheads on today's computing platforms, especially with fine-grained comparisons necessary for high security. Orthrus exploits similarities in automatically generated replicas to enable simultaneous execution of those replicas with minimal overheads; the architecture reduces memory and bandwidth overheads by compressing multiple memory spaces together, and additional power consumption and silicon area by eliminating redundant computations. Utilizing the hardware architecture, Orthrus implements a fine-grained memory layout diversification with the LLVM compiler and can detect corruptions in both pointers and critical data. Experiments indicate that the Orthrus architecture incurs minimal overheads and provides a protection against a broad range of attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "memory protection; multi-core architecture; replication-aware architecture; software diversity and redundancy; software security", } @Article{Feng:2010:SPS, author = "Shuguang Feng and Shantanu Gupta and Amin Ansari and Scott Mahlke", title = "Shoestring: probabilistic soft error reliability on the cheap", journal = j-SIGPLAN, volume = "45", number = "3", pages = "385--396", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736063", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aggressive technology scaling provides designers with an ever increasing budget of cheaper and faster transistors. Unfortunately, this trend is accompanied by a decline in individual device reliability as transistors become increasingly susceptible to soft errors. We are quickly approaching a new era where resilience to soft errors is no longer a luxury that can be reserved for just processors in high-reliability, mission-critical domains. Even processors used in mainstream computing will soon require protection. However, due to tighter profit margins, reliable operation for these devices must come at little or no cost. This paper presents Shoestring, a minimally invasive software solution that provides high soft error coverage with very little overhead, enabling its deployment even in commodity processors with 'shoestring' reliability budgets. Leveraging intelligent analysis at compile time, and exploiting low-cost, symptom-based error detection, Shoestring is able to focus its efforts on protecting statistically-vulnerable portions of program code. Shoestring effectively applies instruction duplication to protect only those segments of code that, when subjected to a soft error, are likely to result in user-visible faults without first exhibiting symptomatic behavior. Shoestring is able to recover from an additional 33.9\% of soft errors that are undetected by a symptom-only approach, achieving an overall user-visible failure rate of 1.6\%. This reliability improvement comes at a modest performance overhead of 15.8\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compiler analysis; error detection; fault injection", } @Article{Yoon:2010:VFE, author = "Doe Hyun Yoon and Mattan Erez", title = "Virtualized and flexible {ECC} for main memory", journal = j-SIGPLAN, volume = "45", number = "3", pages = "397--408", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1736020.1736064", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a general scheme for virtualizing main memory error-correction mechanisms, which map redundant information needed to correct errors into the memory namespace itself. We rely on this basic idea, which increases flexibility to increase error protection capabilities, improve power efficiency, and reduce system cost; with only small performance overheads. We augment the virtual memory system architecture to detach the physical mapping of data from the physical mapping of its associated ECC information. We then use this mechanism to develop two-tiered error protection techniques that separate the process of detecting errors from the rare need to also correct errors, and thus save energy. We describe how to provide strong chipkill and double-chip kill protection using existing DRAM and packaging technology. We show how to maintain access granularity and redundancy overheads, even when using $\times 8$ DRAM chips. We also evaluate error correction for systems that do not use ECC DIMMs. Overall, analysis of demanding SPEC CPU 2006 and PARSEC benchmarks indicates that performance overhead is only 1\% with ECC DIMMs and less than 10\% using standard Non-ECC DIMM configurations, that DRAM power savings can be as high as 27\%, and that the system energy-delay product is improved by 12\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "error correction; fault tolerance; memory systems; reliability", } @Article{Li:2010:AAB, author = "Minming Li and Chun Jason Xue and Tiantian Liu and Yingchao Zhao", title = "Analysis and approximation for bank selection instruction minimization on partitioned memory architecture", journal = j-SIGPLAN, volume = "45", number = "4", pages = "1--8", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755890", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A large number of embedded systems include 8-bit microcontrollers for their energy efficiency and low cost. Multi-bank memory architecture is commonly applied in 8-bit microcontrollers to increase the size of memory without extending address buses. To switch among different memory banks, a special instruction, Bank Selection, is used. How to minimize the number of bank selection instructions inserted is important to reduce code size for embedded systems.\par In this paper, we consider how to insert the minimum number of bank selection instructions in a program to achieve feasibility. A program can be represented by a control flow graph (CFG). We prove that it is NP-Hard to insert the minimum number of bank selection instructions if all the variables are pre-assigned to memory banks. Therefore, we introduce a 2-approximation algorithm using a rounding method. When the CFG is a tree or the out-degree of each node in the CFG is at most two, we show that we can insert the bank selection instructions optimally in polynomial time. We then consider the case when there are some nodes that do not access any memory bank and design a dynamic programming method to compute the optimal insertion strategy when the CFG is a tree. Experimental result shows the proposed techniques can reduce bank selection instructions significantly on partitioned memory architecture.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "bank selection instruction minimization; partitioned memory architecture", } @Article{Pyka:2010:VSL, author = "Robert Pyka and Felipe Klein and Peter Marwedel and Stylianos Mamagkakis", title = "Versatile system-level memory-aware platform description approach for embedded {MPSoCs}", journal = j-SIGPLAN, volume = "45", number = "4", pages = "9--16", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755891", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we present a novel system modeling language which targets primarily the development of source-level multiprocessor memory aware optimizations.\par In contrast to previous system modeling approaches this approach tries to model the whole system and especially the memory hierarchy in a structural and semantically accessible way. Previous approaches primarily support generation of simulators or retargetable code selectors and thus concentrate on pure behavioral models or describe only the processor instruction set in a semantically accessible way, A simple, database-like, interface is offered to the optimization developer, which in conjunction with the MACCv2 framework enables rapid development of source-level architecture independent optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "architecture description; channel; component; configuration; definition; energy models; framework", } @Article{Kim:2010:ODM, author = "Yongjoo Kim and Jongeun Lee and Aviral Shrivastava and Yunheung Paek", title = "Operation and data mapping for {CGRAs} with multi-bank memory", journal = j-SIGPLAN, volume = "45", number = "4", pages = "17--26", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755892", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Coarse Grain Reconfigurable Architectures (CGRAs) promise high performance at high power efficiency. They fulfill this promise by keeping the hardware extremely simple, and moving the complexity to application mapping. One major challenge comes in the form of data mapping. For reasons of power-efficiency and complexity, CGRAs use multi-bank local memory, and a row of PEs share memory access. In order for each row of the PEs to access any memory bank, there is a hardware arbiter between the memory requests generated by the PEs and the banks of the local memory. However, a fundamental restriction remains that a bank cannot be accessed by two different PEs at the same time. We propose to meet this challenge by mapping application operations onto PEs and data into memory banks in a way that avoids such conflicts. Our experimental results on kernels from multimedia benchmarks demonstrate that our local memory-aware compilation approach can generate mappings that are up to 40\% better in performance (17.3\% on average) compared to a memory-unaware scheduler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "arbiter; bank conflict; coarse-grained reconfigurable architecture; compilation; multi-bank memory", } @Article{Foroozannejad:2010:LDB, author = "Mohammad H. Foroozannejad and Matin Hashemi and Trevor L. Hodges and Soheil Ghiasi", title = "Look into details: the benefits of fine-grain streaming buffer analysis", journal = j-SIGPLAN, volume = "45", number = "4", pages = "27--36", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755894", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many embedded applications demand processing of a seemingly endless stream of input data in real-time. Productive development of such applications is typically carried out by synthesizing software from high-level specifications, such as data-flow graphs. In this context, we study the problem of inter-actor buffer allocation, which is a critical step during compilation of streaming applications. We argue that fine-grain analysis of buffers' spatio-temporal characteristics, as opposed to conventional live range analysis, enables dramatic improvements in buffer sharing. Improved sharing translates to reduction of the compiled binary memory footprint, which is of prime concern in many embedded systems. We transform the buffer allocation problem to two-dimensional packing using complex polygons. We develop an evolutionary packing algorithm, which readily yields buffer allocations. Experimental results show an average of over 7X and 2X improvement in total buffer size, compared to baseline and conventional live range analysis schemes, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "buffer management; optimization; software synthesis; streaming applications; synchronous data flow", } @Article{Perathoner:2010:MSE, author = "Simon Perathoner and Tobias Rein and Lothar Thiele and Kai Lampka and Jonas Rox", title = "Modeling structured event streams in system level performance analysis", journal = j-SIGPLAN, volume = "45", number = "4", pages = "37--46", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755895", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper extends the methodology of analytic real-time analysis of distributed embedded systems towards merging and extracting sub-streams based on event type information. For example, one may first merge a set of given event streams, then process them jointly and finally decompose them into separate streams again. In other words, data streams can be hierarchically composed into higher level event streams and decomposed later on again. The proposed technique is strictly compositional, hence highly suited for being embedded into well known performance evaluation frameworks such as Symta/S and MPA (Modular Performance Analysis). It is based on a novel characterization of structured event streams which we denote as Event Count Curves. They characterize the structure of event streams in which the individual events belong to a finite number of classes. This new concept avoids the explicit maintenance of stream-individual information when routing a composed stream through a network of system components. Nevertheless it allows an arbitrary composition and decomposition of sub-streams at any stage of the distributed event processing. For evaluating our approach we analyze a realistic case-study and compare the obtained results with other existing techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "event count curves; performance analysis; real-time calculus", } @Article{Brandt:2010:TCA, author = "Jens Brandt and Klaus Schneider and Sandeep K. Shukla", title = "Translating concurrent action oriented specifications to synchronous guarded actions", journal = j-SIGPLAN, volume = "45", number = "4", pages = "47--56", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755896", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent Action-Oriented Specifications (CAOS) model the behavior of a synchronous hardware circuit as asynchronous guarded actions at an abstraction level higher than the Register Transfer Level (RTL). Previous approaches always considered the compilation of CAOS, which includes a transformation of the under-lying model of computation and the scheduling of guarded actions per clock cycle, as a tightly integrated step. In this paper, we present a new compilation procedure, which separates these two tasks and translates CAOS models to synchronous guarded actions with an explicit interface to a scheduler. This separation of concerns has many advantages, including better analyses and integration of custom schedulers. Our method also generates assertions that each scheduler must obey that can be fulfilled by algorithms for scheduler synthesis like those developed in supervisory control. We present our translation procedure in detail and illustrate it by various examples. We also show that our method simplifies formal verification of hardware synthesized from CAOS specifications over previously known formal verification approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "code generation; concurrent action-oriented specifications; guarded commands; synchronous languages", } @Article{Delaval:2010:CMD, author = "Gwena{\"e}l Delaval and Herv{\'e} Marchand and Eric Rutten", title = "Contracts for modular discrete controller synthesis", journal = j-SIGPLAN, volume = "45", number = "4", pages = "57--66", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755898", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe the extension of a reactive programming language with a behavioral contract construct. It is dedicated to the programming of reactive control of applications in embedded systems, and involves principles of the supervisory control of discrete event systems. Our contribution is in a language approach where modular discrete controller synthesis (DCS) is integrated, and it is concretized in the encapsulation of DCS into a compilation process. From transition system specifications of possible behaviors, DCS automatically produces controllers that make the controlled system satisfy the property given as objective. Our language features and compiling technique provide correctness-by-construction in that sense, and enhance reliability and verifiability. Our application domain is adaptive and reconfigurable systems: closed-loop adaptation mechanisms enable flexible execution of functionalities w.r.t. changing resource and environment conditions. Our language can serve programming such adaption controllers. This paper particularly describes the compilation of the language. We present a method for the modular application of discrete controller synthesis on synchronous programs, and its integration in the BZR language. We consider structured programs, as a composition of nodes, and first apply DCS on particular nodes of the program, in order to reduce the complexity of the controller computation; then, we allow the abstraction of parts of the program for this computation; and finally, we show how to recompose the different controllers computed from different abstractions for their correct co-execution with the initial program. Our work is illustrated with examples, and we present quantitative results about its implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "adaptive and reconfigurable systems; components; contracts; discrete controller synthesis; modularity; reactive systems; synchronous programming", } @Article{Schlickling:2010:SAD, author = "Marc Schlickling and Markus Pister", title = "Semi-automatic derivation of timing models for {WCET} analysis", journal = j-SIGPLAN, volume = "45", number = "4", pages = "67--76", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755899", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded systems are widely used for supporting our every day life. In the area of safety-critical systems human life often depends on the system's correct behavior. Many of such systems are hard real-time systems, so that the notion of correctness not only means functional correctness. They additionally have to obey stringent timing constraints, i.e. timely task completion under all circumstances is essential. An example for such a safety-critical system is the flight control computer in an airplane, which is responsible for stability, attitude and path control.\par In order to derive guarantees on the timing behavior of hard real-time systems, the worst-case execution time (WCET) of each task in the system has to be determined. Saarland University and AbsInt GmbH have successfully developed the aiT WCET analyzer for computing safe upper bounds on the WCET of a task. The computation is mainly based on abstract interpretation of timing models of the processor and its periphery. Such timing models are currently hand-crafted by human experts. Therefore their implementation is a time-consuming and error-prone process.\par Modern processors or system controllers are automatically synthesized out of formal hardware specifications like VHDL or Verilog. Besides the system' functional behavior, such specifications provide all information needed for the creation of a timing model. But due to their size and complexity, manually examining the sources is even more complex than only looking at the processor manuals. Moreover, this would not reduce the effort nor the probability of implementation errors.\par To face this problem, this paper proposes a method for semi-automatically deriving suitable timing models out of formal hardware specifications in VHDL that fit to the tool chain of the aiT WCET analyzer. By this, we reduce the creation time of timing models from months to weeks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "hard real-time; vhdl; worst-case execution time", } @Article{Viskic:2010:DEA, author = "Ines Viskic and Lochi Yu and Daniel Gajski", title = "Design exploration and automatic generation of {MPSoC} platform {TLMs} from {Kahn Process Network} applications", journal = j-SIGPLAN, volume = "45", number = "4", pages = "77--84", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755900", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With increasingly more complex Multi-Processor Systems on Chip (MPSoC) and shortening time-to-market projections, Transaction Level Modeling and Platform Aware Design are seen as promising >approaches to efficient MPSoC design.\par In this paper, we present an automatized 3-phase process of Platform Aware Design and apply it to Kahn Process Networks (KPN) applications, a widely used model of computation for data-flow applications. We start with the KPN application and an abstract platform template and automatically generate an executable TLM with estimated timing that accurately reflects the system platform. We support homogeneous and heterogeneous multi-master platform models with shared memory or direct communication paradigm. The communication in heterogeneous platform modules is enabled with the transducer unit (TX) for protocol translation. TX units also act as message routers to support Network on Chip (NoC) communication.\par We evaluate our approach with the case study of the H.264 Encoder design process, in which the specification compliant design was reached from the KPN application in less than 2 hours. The example demonstrates that automatic generation of platform aware TLMs enables a fast, efficient and error resilient design process.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "automatic generation; Kahn Process Network; process mapping; transaction level model", } @Article{Ozturk:2010:CDN, author = "Ozcan Ozturk and Mahmut Kandemir and Mary J. Irwin and Sri H. K. Narayanan", title = "Compiler directed network-on-chip reliability enhancement for chip multiprocessors", journal = j-SIGPLAN, volume = "45", number = "4", pages = "85--94", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755902", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Chip multiprocessors (CMPs) are expected to be the building blocks for future computer systems. While architecting these emerging CMPs is a challenging problem on its own, programming them is even more challenging. As the number of cores accommodated in chip multiprocessors increases, network-on-chip (NoC) type communication fabrics are expected to replace traditional point-to-point buses. Most of the prior software related work so far targeting CMPs focus on performance and power aspects. However, as technology scales, components of a CMP are being increasingly exposed to both transient and permanent hardware failures. This paper presents and evaluates a compiler-directed power-performance aware reliability enhancement scheme for network-on-chip (NoC) based chip multiprocessors (CMPs). The proposed scheme improves on-chip communication reliability by duplicating messages traveling across CMP nodes such that, for each original message, its duplicate uses a different set of communication links as much as possible (to satisfy performance constraint). In addition, our approach tries to reuse communication links across the different phases of the program to maximize link shutdown opportunities for the NoC (to satisfy power constraint). Our results show that the proposed approach is very effective in improving on-chip network reliability, without causing excessive power or performance degradation. In our experiments, we also evaluate the performance oriented and energy oriented versions of our compiler-directed reliability enhancement scheme, and compare it to two pure hardware based fault tolerant routing schemes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "chip multiprocessors; compiler; noc; reliability", } @Article{Kulkarni:2010:IBP, author = "Prasad A. Kulkarni and Michael R. Jantz and David B. Whalley", title = "Improving both the performance benefits and speed of optimization phase sequence searches", journal = j-SIGPLAN, volume = "45", number = "4", pages = "95--104", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755903", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The issues of compiler optimization phase ordering and selection present important challenges to compiler developers in several domains, and in particular to the speed, code size, power, and cost-constrained domain of embedded systems. Different sequences of optimization phases have been observed to provide the best performance for different applications. Compiler writers and embedded systems developers have recently addressed this problem by conducting iterative empirical searches using machine-learning based heuristic algorithms in an attempt to find the phase sequences that are most effective for each application. Such searches are generally performed at the program level, although a few studies have been performed at the function level. The finer granularity of function-level searches has the potential to provide greater overall performance benefits, but only at the cost of slower searches caused by a greater number of performance evaluations that often require expensive program simulations. In this paper, we evaluate the performance benefits and search time increases of function-level approaches as compared to their program-level counterparts. We, then, present a novel search algorithm that conducts distinct function-level searches simultaneously, but requires only a single program simulation for evaluating the performance of potentially unique sequences for each function. Thus, our new hybrid search strategy provides the enhanced performance benefits of function-level searches with a search-time cost that is comparable to or less than program-level searches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "genetic algorithms; phase ordering", } @Article{Li:2010:ECU, author = "Weijia Li and Youtao Zhang", title = "An efficient code update scheme for {DSP} applications in mobile embedded systems", journal = j-SIGPLAN, volume = "45", number = "4", pages = "105--114", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755904", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "DSP processors usually provide dedicated address generation units (AGUs) to assist address computation. By carefully allocating variables in the memory, DSP compilers take advantage of AGUs and generate efficient code with compact size and improved performance. However, DSP applications running on mobile embedded systems often need to be updated after their initial releases. Studies showed that small changes at the source code level may significantly change the variable layout in the memory and thus the binary code, which causes large energy overheads to mobile embedded systems that patch through wireless or satellite communication, and often pecuniary burden to the users.\par In this paper, we propose an update-conscious code update scheme to effectively reduce patch size. It first performs incremental offset assignment based on a recent variable coalescing heuristic, and then summarizes the code difference using two types of update primitives. Our experimental results showed that using update-conscious code update can greatly improve code similarity and thus reduce the update script sizes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "context-aware script; context-unaware script; incremental coalescing general offset assignment (icgoa); incremental coalescing simple offset assignment (icsoa)", } @Article{Wernsing:2010:ECF, author = "John Robert Wernsing and Greg Stitt", title = "Elastic computing: a framework for transparent, portable, and adaptive multi-core heterogeneous computing", journal = j-SIGPLAN, volume = "45", number = "4", pages = "115--124", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755906", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the past decade, system architectures have started on a clear trend towards increased parallelism and heterogeneity, often resulting in speedups of 10x to 100x. Despite numerous compiler and high-level synthesis studies, usage of such systems has largely been limited to device experts, due to significantly increased application design complexity. To reduce application design complexity, we introduce elastic computing - a framework that separates functionality from implementation details by enabling designers to use specialized functions, called elastic functions, which enable an optimization framework to explore thousands of possible implementations, even ones using different algorithms. Elastic functions allow designers to execute the same application code efficiently on potentially any architecture and for different runtime parameters such as input size, battery life, etc. In this paper, we present an initial elastic computing framework that transparently optimizes application code onto diverse systems, achieving significant speedups ranging from 1.3x to 46x on a hyper-threaded Xeon system with an FPGA accelerator, a 16-CPU Opteron system, and a quad-core Xeon system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "elastic computing; fpga; heterogeneous architectures; multi-core; speedup", } @Article{Biehl:2010:ISA, author = "Matthias Biehl and Chen DeJiu and Martin T{\"o}rngren", title = "Integrating safety analysis into the model-based development toolchain of automotive embedded systems", journal = j-SIGPLAN, volume = "45", number = "4", pages = "125--132", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755907", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The automotive industry has a growing demand for the seamless integration of safety analysis tools into the model-based development toolchain for embedded systems. This requires translating concepts of the automotive domain to the safety domain. We automate such a translation between the automotive architecture description language EAST-ADL2 and the safety analysis tool HiP-HOPS by using model transformations and by leveraging the advantages of different model transformation techniques. Through this integration, the analysis can be conducted early in the development process, when the system can be redesigned to fulfill safety goals with relatively low effort and cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "architecture description language; model-based development; safety analysis; tool integration", } @Article{Fischmeister:2010:SBP, author = "Sebastian Fischmeister and Yanmeng Ba", title = "Sampling-based program execution monitoring", journal = j-SIGPLAN, volume = "45", number = "4", pages = "133--142", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755908", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For its high overall cost during product development, program debugging is an important aspect of system development. Debugging is a hard and complex activity, especially in time-sensitive systems which have limited resources and demanding timing constraints. System tracing is a frequently used technique for debugging embedded systems. A specific use of system tracing is to monitor and debug control-flow problems in programs. However, it is difficult to implement because of the potentially high overhead it might introduce to the system and the changes which can occur to the system behavior due to tracing. To solve the above problems, in this work, we present a sampling-based approach to execution monitoring which specifically helps developers debug time-sensitive systems such as real-time applications. We build the system model and propose three theorems to determine the sampling period in different scenarios. We also design seven heuristics and an instrumentation framework to extend the sampling period which can reduce the monitoring overhead and achieve an optimal tradeoff between accuracy and overhead introduced by instrumentation. Using this monitoring framework, we can use the information extracted through sampling to reconstruct the system state and execution paths to locate the deviation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "debugging; embedded system; monitoring; sampling; tracing", } @Article{Shrivastava:2010:CVE, author = "Aviral Shrivastava and Jongeun Lee and Reiley Jeyapaul", title = "Cache vulnerability equations for protecting data in embedded processor caches from soft errors", journal = j-SIGPLAN, volume = "45", number = "4", pages = "143--152", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755910", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Continuous technology scaling has brought us to a point, where transistors have become extremely susceptible to cosmic radiation strikes, or soft errors. Inside the processor, caches are most vulnerable to soft errors, and techniques at various levels of design abstraction, e.g., fabrication, gate design, circuit design, and microarchitecture-level, have been developed to protect data in caches. However, no work has been done to investigate the effect of code transformations on the vulnerability of data in caches. Data is vulnerable to soft errors in the cache only if it will be read by the processor, and not if it will be overwritten. Since code transformations can change the read-write pattern of program variables, they significantly effect the soft error vulnerability of program variables in the cache. We observe that often opportunity exists to significantly reduce the soft error vulnerability of cache data by trading-off a little performance. However, even if one wanted to exploit this trade-off, it is difficult, since there are no efficient techniques to estimate vulnerability of data in caches. To this end, this paper develops efficient static analysis method to estimate program vulnerability in caches, which enables the compiler to exploit the performance-vulnerability trade-offs in applications. Finally, as compared to simulation based estimation, static analysis techniques provide the insights into vulnerability calculations that provide some simple schemes to reduce program vulnerability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cache vulnerability; code transformation; compiler technique; embedded processors; soft errors; static analysis", } @Article{Altmeyer:2010:RAT, author = "Sebastian Altmeyer and Claire Maiza and Jan Reineke", title = "Resilience analysis: tightening the {CRPD} bound for set-associative caches", journal = j-SIGPLAN, volume = "45", number = "4", pages = "153--162", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755888.1755911", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In preemptive real-time systems, scheduling analyses need --- in addition to the worst-case execution time the context-switch cost. In case of preemption, the preempted and the preempting task may interfere on the cache memory.\par This interference leads to additional cache misses in the preempted task. The delay due to these cache misses is referred to as the cache-related preemption delay (CRPD), which constitutes the major part of the context-switch cost.\par In this paper, we present a new approach to compute tight bounds on the CRPD for LRU set-associative caches, based on analyses of both the preempted and the preempting task. Previous approaches analyzing both the preempted and the preempting task were either imprecise or unsound.\par As the basis of our approach we introduce the notion of resilience: The resilience of a memory block of the preempted task is the maximal number of memory accesses a preempting task could perform without causing an additional miss to this block. By computing lower bounds on the resilience of blocks and an upper bound on the number of accesses by a preempting task, one can guarantee that some blocks may not contribute to the CRPD. The CRPD analysis based on resilience considerably outperforms previous approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cache-related preemption delay; lru caches; timing analysis", } @Article{Wang:2010:RRA, author = "Yi Wang and Duo Liu and Meng Wang and Zhiwei Qin and Zili Shao and Yong Guan", title = "{RNFTL}: a reuse-aware {NAND} flash translation layer for flash memory", journal = j-SIGPLAN, volume = "45", number = "4", pages = "163--172", month = apr, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1755951.1755912", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 15 12:45:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we propose a hybrid-level flash translation layer (FTL) called RNFTL (Reuse-Aware NFTL) to improve the endurance and space utilization of NAND flash memory. Our basic idea is to prevent a primary block with many free pages from being erased in a merge operation. The preserved primary blocks are further reused as replacement blocks. In such a way, the space utilization and the number of erase counts for each block in NAND flash can be enhanced. To the best of our knowledge, this is the first work to employ a reuse-aware strategy in FTL for improving the space utilization and endurance of NAND flash. We conduct experiments on a set of traces that collected from real workload in daily life. The experimental results show that our technique has significant improvement on space utilization, block lifetime and wear-leveling compared with the previous work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "endurance; flash memory; reuse; space utilization; wear-leveling", } @Article{Agerwala:2010:ECC, author = "Tilak Agerwala", title = "Exascale computing: the challenges and opportunities in the next decade", journal = j-SIGPLAN, volume = "45", number = "5", pages = "1--2", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693454", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Supercomputing systems have made great strides in recent years as the extensive computing needs of cutting-edge engineering work and scientific discovery have driven the development of more powerful systems. In 2008, the first petaflop machine was released, and historic trends indicate that in ten years, we should be at the exascale level. Indeed, various agencies are targeting a computer system capable of 1 Exaop (10**18 ops) of computation within the next decade. We believe that applications in many industries will be materially transformed by exascale computers.\par Meeting the exascale challenge will require significant innovation in technology, architecture and programmability. Power is a fundamental problem at all levels; traditional memory cost and performance are not keeping pace with compute potential; the storage hierarchy will have to be re-architected; networks will be a much bigger part of the system cost; reliability at exascale levels will require a holistic approach to architecture design, and programmability and ease-of-use will be an essential component to extract the promised performance at the exascale level.\par In this talk, I will discuss the importance of exascale computing and address the major challenges, touching on the areas of technology, architecture, reliability and usability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "architecture; challenges; exascale", } @Article{Mendez-Lojo:2010:SDO, author = "Mario M{\'e}ndez-Lojo and Donald Nguyen and Dimitrios Prountzos and Xin Sui and M. Amber Hassaan and Milind Kulkarni and Martin Burtscher and Keshav Pingali", title = "Structure-driven optimizations for amorphous data-parallel programs", journal = j-SIGPLAN, volume = "45", number = "5", pages = "3--14", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693457", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Irregular algorithms are organized around pointer-based data structures such as graphs and trees, and they are ubiquitous in applications. Recent work by the Galois project has provided a systematic approach for parallelizing irregular applications based on the idea of optimistic or speculative execution of programs. However, the overhead of optimistic parallel execution can be substantial. In this paper, we show that many irregular algorithms have structure that can be exploited and present three key optimizations that take advantage of algorithmic structure to reduce speculative overheads. We describe the implementation of these optimizations in the Galois system and present experimental results to demonstrate their benefits. To the best of our knowledge, this is the first system to exploit algorithmic structure to optimize the execution of irregular programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "amorphous data-parallelism; cautious operator implementations; irregular programs; iteration coalescing; one-shot optimization; optimistic parallelization; synchronization overheads", } @Article{Coons:2010:GEU, author = "Katherine E. Coons and Sebastian Burckhardt and Madanlal Musuvathi", title = "{GAMBIT}: effective unit testing for concurrency libraries", journal = j-SIGPLAN, volume = "45", number = "5", pages = "15--24", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693458", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As concurrent programming becomes prevalent, software providers are investing in concurrency libraries to improve programmer productivity. Concurrency libraries improve productivity by hiding error-prone, low-level synchronization from programmers and providing higher-level concurrent abstractions. Testing such libraries is difficult, however, because concurrency failures often manifest only under particular scheduling circumstances. Current best testing practices are often inadequate: heuristic-guided fuzzing is not systematic, systematic schedule enumeration does not find bugs quickly, and stress testing is neither systematic nor fast.\par To address these shortcomings, we propose a prioritized search technique called GAMBIT that combines the speed benefits of heuristic-guided fuzzing with the soundness, progress, and reproducibility guarantees of stateless model checking. GAMBIT combines known techniques such as partial-order reduction and preemption-bounding with a generalized best-first search frame- work that prioritizes schedules likely to expose bugs. We evaluate GAMBIT's effectiveness on newly released concurrency libraries for Microsoft's {.NET} framework. Our experiments show that GAMBIT finds bugs more quickly than prior stateless model checking techniques without compromising coverage guarantees or reproducibility.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrency; model checking; multithreading; partial-order reduction; preemption bound; software testing", } @Article{Lee:2010:FXC, author = "Jonathan K. Lee and Jens Palsberg", title = "Featherweight {X10}: a core calculus for async-finish parallelism", journal = j-SIGPLAN, volume = "45", number = "5", pages = "25--36", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693459", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a core calculus with two of X10's key constructs for parallelism, namely async and finish. Our calculus forms a convenient basis for type systems and static analyses for languages with async-finish parallelism, and for tractable proofs of correctness. For example, we give a short proof of the deadlock-freedom theorem of Saraswat and Jagadeesan. Our main contribution is a type system that solves the open problem of context-sensitive may-happen-in-parallel analysis for languages with async-finish parallelism. We prove the correctness of our type system and we report experimental results of performing type inference on 13,000 lines of X10 code. Our analysis runs in polynomial time, takes a total of 28 seconds on our benchmarks, and produces a low number of false positives, which suggests that our analysis is a good basis for other analyses such as race detectors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "operational semantics; parallelism; static analysis", } @Article{Mannarswamy:2010:CAS, author = "Sandya Mannarswamy and Dhruva R. Chakrabarti and Kaushik Rajan and Sujoy Saraswati", title = "Compiler aided selective lock assignment for improving the performance of software transactional memory", journal = j-SIGPLAN, volume = "45", number = "5", pages = "37--46", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693460", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Atomic sections have been recently introduced as a language construct to improve the programmability of concurrent software. They simplify programming by not requiring the explicit specification of locks for shared data. Typically atomic sections are supported in software either through the use of optimistic concurrency by using transactional memory or through the use of pessimistic concurrency using compiler-assigned locks. As a software transactional memory (STM) system does not take advantage of the specific memory access patterns of an application it often suffers from false conflicts and high validation overheads. On the other hand, the compiler usually ends up assigning coarse grain locks as it relies on whole program points-to analysis which is conservative by nature. This adversely affects performance by limiting concurrency. In order to mitigate the disadvantages associated with STM's lock assignment scheme, we propose a hybrid approach which combines STM's lock assignment with a compiler aided selective lock assignment scheme (referred to as SCLA-STM). SCLA-STM overcomes the inefficiencies associated with a purely compile-time lock assignment approach by (i) using the underlying STM for shared variables where only a conservative analysis is possible by the compiler (e.g., in the presence of may-alias points to information) and (ii) being selective about the shared data chosen for the compiler-aided lock assignment. We describe our prototype SCLA-STM scheme implemented in the HP-UX IA-64 C/C++ compiler, using TL2 as our STM implementation. We show that SCLA-STM improves application performance for certain STAMP benchmarks from 1.68\% to 37.13\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compilers; multithreading; parallelization; performance", } @Article{Rossbach:2010:TPA, author = "Christopher J. Rossbach and Owen S. Hofmann and Emmett Witchel", title = "Is transactional programming actually easier?", journal = j-SIGPLAN, volume = "45", number = "5", pages = "47--56", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693462", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Chip multi-processors (CMPs) have become ubiquitous, while tools that ease concurrent programming have not. The promise of increased performance for all applications through ever more parallel hardware requires good tools for concurrent programming, especially for average programmers. Transactional memory (TM) has enjoyed recent interest as a tool that can help programmers program concurrently.\par The transactional memory (TM) research community is heavily invested in the claim that programming with transactional memory is easier than alternatives (like locks), but evidence for or against the veracity of this claim is scant. In this paper, we describe a user-study in which 237 undergraduate students in an operating systems course implement the same programs using coarse and fine-grain locks, monitors, and transactions. We surveyed the students after the assignment, and examined their code to determine the types and frequency of programming errors for each synchronization technique. Inexperienced programmers found baroque syntax a barrier to entry for transactional programming. On average, subjective evaluation showed that students found transactions harder to use than coarse-grain locks, but slightly easier to use than fine-grained locks. Detailed examination of synchronization errors in the students' code tells a rather different story. Overwhelmingly, the number and types of programming errors the students made was much lower for transactions than for locks. On a similar programming problem, over 70\% of students made errors with fine-grained locking, while less than 10\% made errors with transactions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "optimistic concurrency; synchronization; transactional memory", } @Article{Zyulkyarov:2010:DPU, author = "Ferad Zyulkyarov and Tim Harris and Osman S. Unsal and Adr{\'\i}an Cristal and Mateo Valero", title = "Debugging programs that use atomic blocks and transactional memory", journal = j-SIGPLAN, volume = "45", number = "5", pages = "57--66", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693463", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the emergence of research prototypes, programming using atomic blocks and transactional memory (TM) is becoming more attractive. This paper describes our experience building and using a debugger for programs written with these abstractions. We introduce three approaches: ({\em i\/}) debugging at the level of atomic blocks, where the programmer is shielded from implementation details (such as exactly what kind of TM is used, or indeed whether lock inference is used instead), ({\em ii\/}) debugging at the level of transactions, where conflict rates, read sets, write sets, and other TM internals are visible, and ({\em iii\/}) debug-time transactions, which let the programmer manipulate synchronization from within the debugger - e.g., enlarging the scope of an atomic block to try to identify a bug.\par In this paper we explain the rationale behind the new debugging approaches that we propose. We describe the design and implementation of an extension to the WinDbg debugger, enabling support for C\# programs using atomic blocks and TM. We also demonstrate the design of a 'conflict point discovery' technique for identifying program statements that introduce contention between transactions. We illustrate how these techniques can be used by optimizing a C\# version of the Genome application from STAMP TM benchmark suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "debugging; transactional memory", } @Article{Dalessandro:2010:NSS, author = "Luke Dalessandro and Michael F. Spear and Michael L. Scott", title = "{NOrec}: streamlining {STM} by abolishing ownership records", journal = j-SIGPLAN, volume = "45", number = "5", pages = "67--78", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693464", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Drawing inspiration from several previous projects, we present an ownership-record-free software transactional memory (STM) system that combines extremely low overhead with unusually clean semantics. While unlikely to scale to hundreds of active threads, this 'NOrec' system offers many appealing features: very low fast-path latency--as low as any system we know of that admits concurrent updates; publication and privatization safety; livelock freedom; a small, constant amount of global metadata, and full compatibility with existing data structure layouts; no false conflicts due to hash collisions; compatibility with both managed and unmanaged languages, and both static and dynamic compilation; and easy accommodation of closed nesting, inevitable (irrevocable) transactions, and starvation avoidance mechanisms. To the best of our knowledge, no extant STM system combines this set of features.\par While transactional memory for processors with hundreds of cores is likely to require hardware support, software implementations will be required for backward compatibility with current and near-future processors with 2--64 cores, as well as for fall-back in future machines when hardware resources are exhausted. Our experience suggests that NOrec may be an ideal candidate for such a software system. We also observe that it has considerable appeal for use within the operating system, and in systems that require both closed nesting and publication safety.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "ownership records; software transactional memory; transactional memory; transactional memory models", } @Article{Maldonado:2010:SST, author = "Walther Maldonado and Patrick Marlier and Pascal Felber and Adi Suissa and Danny Hendler and Alexandra Fedorova and Julia L. Lawall and Gilles Muller", title = "Scheduling support for transactional memory contention management", journal = j-SIGPLAN, volume = "45", number = "5", pages = "79--90", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693465", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional Memory (TM) is considered as one of the most promising paradigms for developing concurrent applications. TM has been shown to scale well on >multiple cores when the data access pattern behaves 'well,' i.e., when few conflicts are induced. In contrast, data patterns with frequent write sharing, with long transactions, or when many threads contend for a smaller number of cores, result in numerous conflicts. Until recently, TM implementations had little control of transactional threads, which remained under the supervision of the kernel's transaction-ignorant scheduler. Conflicts are thus traditionally resolved by consulting an STM-level {\em contention manager}. Consequently, the contention managers of these 'conventional' TM implementations suffer from a lack of precision and often fail to ensure reasonable performance in high-contention workloads.\par Recently, scheduling-based TM contention-management has been proposed for increasing TM efficiency under high-contention [2, 5, 19]. However, only user-level schedulers have been considered. In this work, we propose, implement and evaluate several novel kernel-level scheduling support mechanisms for TM contention management. We also investigate different strategies for efficient communication between the kernel and the user-level TM library. To the best of our knowledge, our work is the first to investigate kernel-level support for TM contention management.\par We have introduced kernel-level TM scheduling support into both the Linux and Solaris kernels. Our experimental evaluation demonstrates that lightweight kernel-level scheduling support significantly reduces the number of aborts while improving transaction throughput on various workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "contention management; scheduling; transactional memory", } @Article{Barreto:2010:LPN, author = "Jo{\~a}o Barreto and Aleksandar Dragojevi{\'c} and Paulo Ferreira and Rachid Guerraoui and Michal Kapalka", title = "Leveraging parallel nesting in transactional memory", journal = j-SIGPLAN, volume = "45", number = "5", pages = "91--100", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693466", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Exploiting the emerging reality of affordable multi-core architectures goes through providing programmers with simple abstractions that would enable them to easily turn their sequential programs into concurrent ones that expose as much parallelism as possible. While transactional memory promises to make concurrent programming easy to a wide programmer community, current implementations either disallow nested transactions to run in parallel or do not scale to arbitrary parallel nesting depths. This is an important obstacle to the central goal of transactional memory, as programmers can only start parallel threads in restricted parts of their code.\par This paper addresses the intrinsic difficulty behind the support for parallel nesting in transactional memory, and proposes a novel solution that, to the best of our knowledge, is the first practical solution to meet the lowest theoretical upper bound known for the problem.\par Using a synthetic workload configured to test parallel transactions on a multi-core machine, a practical implementation of our algorithm yields substantial speed-ups (up to 22x with 33 threads) relatively to serial nesting, and shows that the time to start and commit transactions, as well as to detect conflicts, is independent of nesting depth.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "fork-join; nested parallel programs; transactional memory; work-stealing", } @Article{Torrellas:2010:ESC, author = "Josep Torrellas and Bill Gropp and Jaime Moreno and Kunle Olukotun and Vivek Sarkar", title = "Extreme scale computing: challenges and opportunities", journal = j-SIGPLAN, volume = "45", number = "5", pages = "101--102", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693468", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "architecture; challenges; exascale", } @Article{Arvind:2010:HI, author = "Arvind", title = "Is hardware innovation over?", journal = j-SIGPLAN, volume = "45", number = "5", pages = "103--104", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693455", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "My colleagues, promotion committees, research funding agencies and business people often wonder if there is need for any architecture research. There seems to be no room to dislodge Intel IA-32. Even the number of new Application-Specific Integrated Circuits (ASICs) seems to be declining each year, because of the ever-increasing development cost.\par This viewpoint ignores another reality which is that the future will be dominated by mobile devices such as smart phones and the infrastructure needed to support consumer services on these devices. This is already restructuring the IT industry. To the first-order, in the mobile world functionality is determined by what can be supported within a 3W power budget. The only way to reduce power by one to two orders of magnitude is via functionally specialized hardware blocks. A fundamental shift is needed in the current design flow of systems-on-a-chip (SoCs) to produce them in a less-risky and cost-effective manner.\par In this talk we will present, via examples, a method of designing systems that facilitates the synthesis of complex SoCs from reusable 'IP' modules. The technical challenge is to provide a method for connecting modules in a parallel setting so that the functionality and the performance of the composite are predictable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "hardware innovation; system-on-chip", } @Article{Baghsorkhi:2010:APM, author = "Sara S. Baghsorkhi and Matthieu Delahaye and Sanjay J. Patel and William D. Gropp and Wen-mei W. Hwu", title = "An adaptive performance modeling tool for {GPU} architectures", journal = j-SIGPLAN, volume = "45", number = "5", pages = "105--114", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693470", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents an analytical model to predict the performance of\par general-purpose applications on a GPU architecture. The model is designed to provide performance information to an auto-tuning compiler and assist it in narrowing down the search to the more promising implementations. It can also be incorporated into a tool to help programmers better assess the performance bottlenecks in their code. We analyze each GPU kernel and identify how the kernel exercises major GPU microarchitecture features. To identify the performance bottlenecks accurately, we introduce an abstract interpretation of a GPU kernel, {\em work flow graph}, based on which we estimate the execution time of a GPU kernel. We validated our performance model on the NVIDIA GPUs using CUDA (Compute Unified Device Architecture). For this purpose, we used data parallel benchmarks that stress different GPU microarchitecture events such as uncoalesced memory accesses, scratch-pad memory bank conflicts, and control flow divergence, which must be accurately modeled but represent challenges to the analytical performance models. The proposed model captures full system complexity and shows high accuracy in predicting the performance trends of different optimized kernel implementations. We also describe our approach to extracting the performance model automatically from a kernel code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "analytical model; GPU; parallel programming; performance estimation", } @Article{Choi:2010:MDA, author = "Jee W. Choi and Amik Singh and Richard W. Vuduc", title = "Model-driven autotuning of sparse matrix-vector multiply on {GPUs}", journal = j-SIGPLAN, volume = "45", number = "5", pages = "115--126", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693471", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a performance model-driven framework for automated performance tuning (autotuning) of sparse matrix-vector multiply (SpMV) on systems accelerated by graphics processing units (GPU). Our study consists of two parts.\par First, we describe several carefully hand-tuned SpMV implementations for GPUs, identifying key GPU-specific performance limitations, enhancements, and tuning opportunities. These implementations, which include variants on classical blocked compressed sparse row (BCSR) and blocked ELLPACK (BELLPACK) storage formats, match or exceed state-of-the-art implementations. For instance, our best BELLPACK implementation achieves up to 29.0 Gflop/s in single-precision and 15.7 Gflop/s in double-precision on the NVIDIA T10P multiprocessor (C1060), enhancing prior state-of-the-art unblocked implementations (Bell and Garland, 2009) by up to 1.8\times and 1.5\times for single-and double-precision respectively.\par However, achieving this level of performance requires input matrix-dependent parameter tuning. Thus, in the second part of this study, we develop a performance model that can guide tuning. Like prior autotuning models for CPUs (e.g., Im, Yelick, and Vuduc, 2004), this model requires offline measurements and run-time estimation, but more directly models the structure of multithreaded vector processors like GPUs. We show that our model can identify the implementations that achieve within 15\% of those found through exhaustive search.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "GPU; performance modeling; sparse matrix-vector multiplication", } @Article{Zhang:2010:FTS, author = "Yao Zhang and Jonathan Cohen and John D. Owens", title = "Fast tridiagonal solvers on the {GPU}", journal = j-SIGPLAN, volume = "45", number = "5", pages = "127--136", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693472", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study the performance of three parallel algorithms and their hybrid variants for solving tridiagonal linear systems on a GPU: cyclic reduction (CR), parallel cyclic reduction (PCR) and recursive doubling (RD). We develop an approach to measure, analyze, and optimize the performance of GPU programs in terms of memory access, computation, and control overhead. We find that CR enjoys linear algorithm complexity but suffers from more algorithmic steps and bank conflicts, while PCR and RD have fewer algorithmic steps but do more work each step. To combine the benefits of the basic algorithms, we propose hybrid CR+PCR and CR+RD algorithms, which improve the performance of PCR, RD and CR by 21\%, 31\% and 61\% respectively. Our GPU solvers achieve up to a 28x speedup over a sequential LAPACK solver, and a 12x speedup over a multi-threaded CPU solver.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "GPGPU; performance optimization; tridiagonal linear system", } @Article{Sandes:2010:CUG, author = "Edans Flavius O. Sandes and Alba Cristina M. A. de Melo", title = "{CUDAlign}: using {GPU} to accelerate the comparison of megabase genomic sequences", journal = j-SIGPLAN, volume = "45", number = "5", pages = "137--146", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693473", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Biological sequence comparison is a very important operation in Bioinformatics. Even though there do exist exact methods to compare biological sequences, these methods are often neglected due to their quadratic time and space complexity. In order to accelerate these methods, many GPU algorithms were proposed in the literature. Nevertheless, all of them restrict the size of the smallest sequence in such a way that Megabase genome comparison is prevented. In this paper, we propose and evaluate CUDAlign, a GPU algorithm that is able to compare Megabase biological sequences with an exact Smith--Waterman affine gap variant. CUDAlign was implemented in CUDA and tested in two GPU boards, separately. For real sequences whose size range from 1MBP (Megabase Pairs) to 47MBP, a close to uniform GCUPS (Giga Cells Updates per Second) was obtained, showing the potential scalability of our approach. Also, CUDAlign was able to compare the human chromosome 21 and the chimpanzee chromosome 22. This operation took 21 hours on GeForce GTX 280, resulting in a peak performance of 20.375 GCUPS. As far as we know, this is the first time such huge chromosomes are compared with an exact method.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "biological sequence comparison; GPU; Smith--Waterman", } @Article{Hofmeyr:2010:LBS, author = "Steven Hofmeyr and Costin Iancu and Filip Blagojevi{\'c}", title = "Load balancing on speed", journal = j-SIGPLAN, volume = "45", number = "5", pages = "147--158", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693475", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To fully exploit multicore processors, applications are expected to provide a large degree of thread-level parallelism. While adequate for low core counts and their typical workloads, the current load balancing support in operating systems may not be able to achieve efficient hardware utilization for parallel workloads. Balancing run queue length globally ignores the needs of parallel applications where threads are required to make equal progress. In this paper we present a load balancing technique designed specifically for parallel applications running on multicore systems. Instead of balancing run queue length, our algorithm balances the time a thread has executed on ``faster'' and ``slower'' cores. We provide a user level implementation of speed balancing on UMA and NUMA multi-socket architectures running Linux and discuss behavior across a variety of workloads, usage scenarios and programming models. Our results indicate that speed balancing when compared to the native Linux load balancing improves performance and provides good performance isolation in all cases considered. Speed balancing is also able to provide comparable or better performance than DWRR, a fair multi-processor scheduling implementation inside the Linux kernel. Furthermore, parallel application performance is often determined by the implementation of synchronization operations and speed balancing alleviates the need for tuning the implementations of such primitives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "load balancing; operating systems; parallel applications", } @Article{Hoefler:2010:SCP, author = "Torsten Hoefler and Christian Siebert and Andrew Lumsdaine", title = "Scalable communication protocols for dynamic sparse data exchange", journal = j-SIGPLAN, volume = "45", number = "5", pages = "159--168", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693476", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many large-scale parallel programs follow a bulk synchronous parallel (BSP) structure with distinct computation and communication phases. Although the communication phase in such programs may involve all (or large numbers) of the participating processes, the actual communication operations are usually sparse in nature. As a result, communication phases are typically expressed explicitly using point-to-point communication operations or collective operations. We define the dynamic sparse data-exchange (DSDE) problem and derive bounds in the well known LogGP model. While current approaches work well with static applications, they run into limitations as modern applications grow in scale, and as the problems that are being solved become increasingly irregular and dynamic.\par To enable the compact and efficient expression of the communication phase, we develop suitable sparse communication protocols for irregular applications at large scale. We discuss different irregular applications and show the sparsity in the communication for real-world input data. We discuss the time and memory complexity of commonly used protocols for the DSDE problem and develop {\em NBX\/} --a novel fast algorithm with constant memory overhead for solving it. Algorithm {\em NBX\/} improves the runtime of a sparse data-exchange among 8,192 processors on BlueGene/P by a factor of 5.6. In an application study, we show improvements of up to a factor of 28.9 for a parallel breadth first search on 8,192 BlueGene/P processors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "alltoall; distributed termination; irregular algorithms; nonblocking collective operations; sparse data exchange", } @Article{Romein:2010:LCI, author = "John W. Romein and P. Chris Broekema and Jan David Mol and Rob V. van Nieuwpoort", title = "The {LOFAR} correlator: implementation and performance analysis", journal = j-SIGPLAN, volume = "45", number = "5", pages = "169--178", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693477", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "LOFAR is the first of a new generation of radio telescopes. Rather than using expensive dishes, it forms a distributed sensor network that combines the signals from many thousands of simple antennas. Its revolutionary design allows observations in a frequency range that has hardly been studied before.\par Another novel feature of LOFAR is the elaborate use of {\em software\/} to process data, where traditional telescopes use customized hardware. This dramatically increases flexibility and substantially reduces costs, but the high processing and bandwidth requirements compel the use of a supercomputer. The antenna signals are centrally combined, filtered, optionally beam-formed, and correlated by an IBM Blue Gene/P.\par This paper describes the implementation of the so-called correlator. To meet the real-time requirements, the application is highly optimized, and reaches exceptionally high computational and I/O efficiencies. Additionally, we study the scalability of the system, and show that it scales well beyond the requirements. The optimizations allows us to use only half the planned amount of resources, {\em and\/} process 50\% more telescope data, significantly improving the effectiveness of the entire telescope.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "correlator; IBM Blue Gene/P; LOFAR", } @Article{Tzannes:2010:LBS, author = "Alexandros Tzannes and George C. Caragea and Rajeev Barua and Uzi Vishkin", title = "Lazy binary-splitting: a run-time adaptive work-stealing scheduler", journal = j-SIGPLAN, volume = "45", number = "5", pages = "179--190", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693479", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Lazy Binary Splitting (LBS), a user-level scheduler of nested parallelism for shared-memory multiprocessors that builds on existing Eager Binary Splitting work-stealing (EBS) implemented in Intel's Threading Building Blocks (TBB), but improves performance and ease-of-programming. In its simplest form (SP), EBS requires manual tuning by repeatedly running the application under carefully controlled conditions to determine a {\em stop-splitting-threshold (sst)\/} for every do-all loop in the code. This threshold limits the parallelism and prevents excessive overheads for fine-grain parallelism. Besides being tedious, this tuning also over-fits the code to some particular dataset, platform and calling context of the do-all loop, resulting in poor performance portability for the code. LBS overcomes both the performance portability and ease-of-programming pitfalls of a manually fixed threshold by adapting dynamically to run-time conditions without requiring tuning.\par We compare LBS to Auto-Partitioner (AP), the latest default scheduler of TBB, which does not require manual tuning either but lacks context portability, and outperform it by 38.9\% using TBB's default AP configuration, and by 16.2\% after we tuned AP to our experimental platform. We also compare LBS to SP by manually finding SP's sst using a training dataset and then running both on a different execution dataset. LBS outperforms SP by 19.5\% on average. while allowing for improved performance portability without requiring tedious manual tuning. LBS also outperforms SP with {\em sst=1}, its default value when undefined, by 56.7\%, and serializing work-stealing (SWS), another work-stealer by 54.7\%. Finally, compared to serializing inner parallelism (SI) which has been used by OpenMP, LBS is 54.2\% faster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic scheduling; load balancing; nested parallelism; thread scheduling; work stealing", } @Article{Radojkovic:2010:TSB, author = "Petar Radojkovi{\'c} and Vladimir {\v{C}}akarevi{\'c} and Javier Verd{\'u} and Alex Pajuelo and Francisco J. Cazorla and Mario Nemirovsky and Mateo Valero", title = "Thread to strand binding of parallel network applications in massive multi-threaded systems", journal = j-SIGPLAN, volume = "45", number = "5", pages = "191--202", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693480", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In processors with several levels of hardware resource sharing,like CMPs in which each core is an SMT, the scheduling process becomes more complex than in processors with a single level of resource sharing, such as pure-SMT or pure-CMP processors. Once the operating system selects the set of applications to simultaneously schedule on the processor (workload), each application/thread must be assigned to one of the hardware contexts(strands). We call this last scheduling step the Thread to Strand Binding or TSB. In this paper, we show that the TSB impact on the performance of processors with several levels of shared resources is high. We measure a variation of up to 59\% between different TSBs of real multithreaded network applications running on the UltraSPARC T2 processor which has three levels of resource sharing. In our view, this problem is going to be more acute in future multithreaded architectures comprising more cores, more contexts per core, and more levels of resource sharing.\par We propose a resource-sharing aware TSB algorithm (TSBSched) that significantly facilitates the problem of thread to strand binding for software-pipelined applications, representative of multithreaded network applications. Our systematic approach encapsulates both, the characteristics of multithreaded processors under the study and the structure of the software pipelined applications. Once calibrated for a given processor architecture, our proposal does not require hardware knowledge on the side of the programmer, nor extensive profiling of the application. We validate our algorithm on the UltraSPARC T2 processor running a set of real multithreaded network applications on which we report improvements of up to 46\% compared to the current state-of-the-art dynamic schedulers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "CMT; process scheduling; simultaneous multithreading; UltraSPARC T2", } @Article{Zhang:2010:DCS, author = "Eddy Z. Zhang and Yunlian Jiang and Xipeng Shen", title = "Does cache sharing on modern {CMP} matter to the performance of contemporary multithreaded programs?", journal = j-SIGPLAN, volume = "45", number = "5", pages = "203--212", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693482", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most modern Chip Multiprocessors (CMP) feature shared cache on chip. For multithreaded applications, the sharing reduces communication latency among co-running threads, but also results in cache contention.\par A number of studies have examined the influence of cache sharing on multithreaded applications, but most of them have concentrated on the design or management of shared cache, rather than a systematic measurement of the influence. Consequently, prior measurements have been constrained by the reliance on simulators, the use of out-of-date benchmarks, and the limited coverage of deciding factors. The influence of CMP cache sharing on contemporary multithreaded applications remains preliminarily understood.\par In this work, we conduct a systematic measurement of the influence on two kinds of commodity CMP machines, using a recently released CMP benchmark suite, PARSEC, with a number of potentially important factors on program, OS, and architecture levels considered. The measurement shows some surprising results. Contrary to commonly perceived importance of cache sharing, neither positive nor negative effects from the cache sharing are significant for most of the program executions, regardless of the types of parallelism, input datasets, architectures, numbers of threads, and assignments of threads to cores. After a detailed analysis, we find that the main reason is the mismatch of current development and compilation of multithreaded applications and CMP architectures. By transforming the programs in a cache-sharing-aware manner, we observe up to 36\% performance increase when the threads are placed on cores appropriately.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "chip multiprocessors; parallel program optimizations; shared cache; thread scheduling", } @Article{Liu:2010:IPL, author = "Lixia Liu and Zhiyuan Li", title = "Improving parallelism and locality with asynchronous algorithms", journal = j-SIGPLAN, volume = "45", number = "5", pages = "213--222", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693483", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As multicore chips become the main building blocks for high performance computers, many numerical applications face a performance impediment due to the limited hardware capacity to move data between the CPU and the off-chip memory. This is especially true for large computing problems solved by iterative algorithms because of the large data set typically used. Loop tiling, also known as loop blocking, was shown previously to be an effective way to enhance data locality, and hence to reduce the memory bandwidth pressure, for a class of iterative algorithms executed on a single processor. Unfortunately, the tiled programs suffer from reduced parallelism because only the loop iterations within a single tile can be easily parallelized. In this work, we propose to use the asynchronous model to enable effective loop tiling such that both parallelism and locality can be attained simultaneously. Asynchronous algorithms were previously proposed to reduce the communication cost and synchronization overhead between processors. Our new discovery is that carefully controlled asynchrony and loop tiling can significantly improve the performance of parallel iterative algorithms on multicore processors due to simultaneously attained data locality and loop-level parallelism. We present supporting evidence from experiments with three well-known numerical kernels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "asynchronous algorithms; data locality; loop tiling; memory performance; parallel numerical programs", } @Article{Castaldo:2010:SLP, author = "Anthony M. Castaldo and R. Clint Whaley", title = "Scaling {LAPACK} panel operations using parallel cache assignment", journal = j-SIGPLAN, volume = "45", number = "5", pages = "223--232", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693484", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In LAPACK many matrix operations are cast as block algorithms which iteratively process a panel using an unblocked algorithm and then update a remainder matrix using the high performance Level 3 BLAS. The Level 3 BLAS have excellent weak scaling, but panel processing tends to be bus bound, and thus scales with bus speed rather than the number of processors ({\em p\/}). Amdahl's law therefore ensures that as {\em p\/} grows, the panel computation will become the dominant cost of these LAPACK routines. Our contribution is a novel parallel cache assignment approach which we show scales well with {\em p}. We apply this general approach to the QR and LU panel factorizations on two commodity 8-core platforms with very different cache structures, and demonstrate superlinear panel factorization speedups on both machines. Other approaches to this problem demand complicated reformulations of the computational approach, new kernels to be tuned, new mathematics, an inflation of the high-order flop count, and do not perform as well. By demonstrating a straight-forward alternative that avoids all of these contortions and scales with {\em p}, we address a critical stumbling block for dense linear algebra in the age of massive parallelism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "Atlas; factorization; GPU; LAPACK; LU; multicore; multi-core; parallel; QR", } @Article{Sutherland:2010:CTC, author = "Dean F. Sutherland and William L. Scherlis", title = "Composable thread coloring", journal = j-SIGPLAN, volume = "45", number = "5", pages = "233--244", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693485", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces the language-independent concept of ``thread usage policy.'' Many multi-threaded software systems contain policies that regulate associations among threads, executable code, and potentially shared state. A system, for example, may constrain which threads are permitted to execute particular code segments, usually as a means to constrain those threads from accessing or writing particular elements of state. These policies ensure properties such as state confinement or reader/writer constraints, often without recourse to locking or transaction discipline.\par Our approach allows developers to concisely document their thread usage policies in a manner that enables the use of sound scalable analysis to assess consistency of policy and as-written code. This paper identifies the key semantic concepts of our thread coloring language and illustrates how to use its succinct source-level annotations to express models of thread usage policies, following established annotation conventions for Java.\par We have built a prototype static analysis tool, implemented as an integrated development environment plug-in (for the Eclipse IDE), that notifies developers of discrepancies between policy annotations and as-written code. Our analysis technique uses several underlying algorithms based on abstract interpretation, call-graphs, and type inference. The resulting overall analysis is both sound and composable. We have used this prototype analysis tool in case studies to model and analyze more than a million lines of code.\par Our validation process included field trials on a wide variety of complex large-scale production code selected by the host organizations. Our in-field experience led us to focus on potential adoptability by real-world developers. We have developed techniques that can reduce annotation density to less than one line per thousand lines of code (KLOC). In addition, the prototype analysis tool supports an incremental and iterative approach to modeling and analysis. This approach enabled field trial partners to directly target areas of greatest concern and to achieve useful results within a few hours.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "annotation; Java; keywords: state consistency; multicore; race conditions; state confinement; thread policy", } @Article{Agrawal:2010:HLF, author = "Kunal Agrawal and Charles E. Leiserson and Jim Sukha", title = "Helper locks for fork-join parallel programming", journal = j-SIGPLAN, volume = "45", number = "5", pages = "245--256", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693487", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Helper locks allow programs with large parallel critical sections, called parallel regions, to execute more efficiently by enlisting processors that might otherwise be waiting on the helper lock to aid in the execution of the parallel region. Suppose that a processor {\em p\/} is executing a parallel region {\em A\/} after having acquired the lock {\em L\/} protecting {\em A}. If another processor {\em p\/} $\prime$ tries to acquire {\em L}, then instead of blocking and waiting for {\em p\/} to complete {\em A}, processor {\em p\/} $\prime$ joins {\em p\/} to help it complete {\em A}. Additional processors not blocked on {\em L\/} may also help to execute {\em A}.\par The HELPER runtime system can execute fork-join computations augmented with helper locks and parallel regions. HELPER supports the unbounded nesting of parallel regions. We provide theoretical completion-time and space-usage bounds for a design of HELPER based on work stealing. Specifically, let {\em V\/} be the number of parallel regions in a computation, let {\em T\/}$_1$ be its work, and let {\em T\/} $\infty$ be its `aggregate span' --- the sum of the spans (critical-path lengths) of all its parallel regions. We prove that HELPER completes the computation in expected time {\em O\/} ({\em T\/}$_1$ / {\em P\/} P + {\em T\/} $\infty$+ {\em PV\/}) on {\em P\/} processors. This bound indicates that programs with a small number of highly parallel critical sections can attain linear speedup. For the space bound, we prove that HELPER completes a program using only $O(P S_1)$ stack space, where $S_1$ is the sum, over all regions, of the stack space used by each region in a serial execution. Finally, we describe a prototype of HELPER implemented by modifying the Cilk multithreaded runtime system. We used this prototype to implement a concurrent hash table with a resize operation protected by a helper lock.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "Cilk; dynamic multithreading; helper lock; nested parallelism; parallel region; scheduling; work stealing", } @Article{Bronson:2010:PCB, author = "Nathan G. Bronson and Jared Casper and Hassan Chafi and Kunle Olukotun", title = "A practical concurrent binary search tree", journal = j-SIGPLAN, volume = "45", number = "5", pages = "257--268", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693488", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a concurrent relaxed balance AVL tree algorithm that is fast, scales well, and tolerates contention. It is based on optimistic techniques adapted from software transactional memory, but takes advantage of specific knowledge of the algorithm to reduce overheads and avoid unnecessary retries. We extend our algorithm with a fast linearizable clone operation, which can be used for consistent iteration of the tree. Experimental evidence shows that our algorithm outperforms a highly tuned concurrent skip list for many access patterns, with an average of 39\% higher single-threaded throughput and 32\% higher multi-threaded throughput over a range of contention levels and operation mixes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "optimistic concurrency; snapshot isolation", } @Article{Tallent:2010:ALC, author = "Nathan R. Tallent and John M. Mellor-Crummey and Allan Porterfield", title = "Analyzing lock contention in multithreaded applications", journal = j-SIGPLAN, volume = "45", number = "5", pages = "269--280", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693489", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many programs exploit shared-memory parallelism using multithreading. Threaded codes typically use locks to coordinate access to shared data. In many cases, contention for locks reduces parallel efficiency and hurts scalability. Being able to quantify and attribute lock contention is important for understanding where a multithreaded program needs improvement.\par This paper proposes and evaluates three strategies for gaining insight into performance losses due to lock contention. First, we consider using a straightforward strategy based on call stack profiling to attribute idle time and show that it fails to yield insight into lock contention. Second, we consider an approach that builds on a strategy previously used for analyzing idleness in work-stealing computations; we show that this strategy does not yield insight into lock contention. Finally, we propose a new technique for measurement and analysis of lock contention that uses data associated with locks to blame lock holders for the idleness of spinning threads. Our approach incurs $\leq$ 5\% overhead on a quantum chemistry application that makes extensive use of locking (65M distinct locks, a maximum of 340K live locks, and an average of 30K lock acquisitions per second per thread) and attributes lock contention to its full static and dynamic calling contexts. Our strategy, implemented in HPCToolkit, is fully distributed and should scale well to systems with large core counts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "HPCToolkit; lock contention; multithreading; performance analysis", } @Article{Upadhyaya:2010:UDS, author = "Gautam Upadhyaya and Samuel P. Midkiff and Vijay S. Pai", title = "Using data structure knowledge for efficient lock generation and strong atomicity", journal = j-SIGPLAN, volume = "45", number = "5", pages = "281--292", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693490", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To achieve high-performance on multicore systems, sharedmemory parallel languages must efficiently implement atomic operations. The commonly used and studied paradigms for atomicity are fine-grained locking, which is both difficult to program and error-prone; optimistic software transactions, which require substantial overhead to detect and recover from atomicity violations; and compiler-generation of locks from programmer-specified atomic sections, which leads to serialization whenever imprecise pointer analysis suggests the mere possibility of a conflicting operation. This paper presents a new strategy for compiler-generated locking that uses data structure knowledge to facilitate more precise alias and lock generation analyses and reduce unnecessary serialization. Implementing and evaluating these ideas in the Java language shows that the new strategy achieves eight-thread speedups of 0.83 to 5.9 for the five STAMP benchmarks studied, outperforming software transactions on all but one benchmark, and nearly matching programmer-specified fine-grained locks on all but one benchmark. The results also indicate that compiler knowledge of data structures improves the effectiveness of compiler analysis, boosting eight-thread performance by up to 300\%. Further, the new analysis allows for software support of strong atomicity with less than 1\% overhead for two benchmarks and less than 20\% for three others. The strategy also nearly matches the performance of programmer-specified fine-grained locks for the SPECjbb2000 benchmark, which has traditionally not been amenable to static analyses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "automatic lock generation; parallel programming; transactional memory", } @Article{Ali:2010:MAC, author = "Qasim Ali and Samuel Pratt Midkiff and Vijay S. Pai", title = "Modeling advanced collective communication algorithms on {Cell}-based systems", journal = j-SIGPLAN, volume = "45", number = "5", pages = "293--304", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693492", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents and validates performance models for a variety of high-performance collective communication algorithms for systems with Cell processors. The systems modeled include a single Cell processor, two Cell chips on a Cell Blade, and a cluster of Cell Blades. The models extend PLogP, the well-known point-to-point performance model, by accounting for the unique hardware characteristics of the Cell (e.g., heterogeneous interconnects and DMA engines) and by applying the model to collective communication. This paper also presents a micro-benchmark suite to accurately measure the extended PLogP parameters on the Cell Blade and then uses these parameters to model different algorithms for the {\em barrier, broadcast, reduce, all-reduce}, and {\em all-gather\/} collective operations. Out of 425 total performance predictions, 398 of them see less than 10\% error compared to the actual execution time and all of them see less than 15\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "algorithms; collective communication; modeling", } @Article{Zhai:2010:PPP, author = "Jidong Zhai and Wenguang Chen and Weimin Zheng", title = "{PHANTOM}: predicting performance of parallel applications on large-scale parallel machines using a single node", journal = j-SIGPLAN, volume = "45", number = "5", pages = "305--314", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693493", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For designers of large-scale parallel computers, it is greatly desired that performance of parallel applications can be predicted at the design phase. However, this is difficult because the execution time of parallel applications is determined by several factors, including sequential computation time in each process, communication time and their convolution. Despite previous efforts, it remains an open problem to estimate sequential computation time in each process accurately and efficiently for large-scale parallel applications on non-existing target machines.\par This paper proposes a novel approach to predict the sequential computation time accurately and efficiently. We assume that there is at least one node of the target platform but the whole target system need not be available. We make two main technical contributions. First, we employ deterministic replay techniques to execute any process of a parallel application on a single node at real speed. As a result, we can simply measure the real sequential computation time on a target node for each process one by one. Second, we observe that computation behavior of processes in parallel applications can be clustered into a few groups while processes in each group have similar computation behavior. This observation helps us reduce measurement time significantly because we only need to execute representative parallel processes instead of all of them.\par We have implemented a performance prediction framework, called PHANTOM, which integrates the above computation-time acquisition approach with a trace-driven network simulator. We validate our approach on several platforms. For ASCI Sweep3D, the error of our approach is less than 5\% on 1024 processor cores. Compared to a recent regression-based prediction approach, PHANTOM presents better prediction accuracy across different platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "deterministic replay; parallel application; performance prediction; trace-driven simulation", } @Article{Aleen:2010:IDD, author = "Farhana Aleen and Monirul Sharif and Santosh Pande", title = "Input-driven dynamic execution prediction of streaming applications", journal = j-SIGPLAN, volume = "45", number = "5", pages = "315--324", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693494", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Streaming applications are promising targets for effectively utilizing multicores because of their inherent amenability to pipelined parallelism. While existing methods of orchestrating streaming programs on multicores have mostly been static, real-world applications show ample variations in execution time that may cause the achieved speedup and throughput to be sub-optimal. One of the principle challenges for moving towards dynamic orchestration has been the lack of approaches that can predict or accurately estimate upcoming dynamic variations in execution efficiently, well before they occur.\par In this paper, we propose an automated dynamic execution behavior prediction approach that can be used to efficiently estimate the time that will be spent in different pipeline stages for upcoming inputs without requiring program execution. This enables dynamic balancing or scheduling of execution to achieve better speedup. Our approach first uses dynamic taint analysis to automatically generates an input-based execution characterization of the streaming program, which identifies the key control points where variation in execution might occur with the associated input elements that cause these variations. We then automatically generate a light-weight emulator from the program using this characterization that can simulate the execution paths taken for new streaming inputs and provide an estimate of execution time that will be spent in processing these inputs, enabling prediction of possible dynamic variations. We present experimental evidence that our technique can accurately and efficiently estimate execution behaviors for several benchmarks. Our experiments show that dynamic orchestration using our predicted execution behavior can achieve considerably higher speedup than static orchestration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic execution; parallelization; software pipeline", } @Article{Lupei:2010:TST, author = "Daniel Lupei and Bogdan Simion and Don Pinto and Matthew Misler and Mihai Burcea and William Krick and Cristiana Amza", title = "Towards scalable and transparent parallelization of multiplayer games using transactional memory support", journal = j-SIGPLAN, volume = "45", number = "5", pages = "325--326", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693496", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This work addresses the problem of parallelizing multiplayer games using {\em software\/} Transactional Memory (STM) support. Using a realistic high impact application, we show that STM provides not only ease of programming, but also {\em better\/} performance than that achievable with state-of-the-art lock-based programming.\par Towards this goal, we use SynQuake, a game benchmark which extracts the main data structures and the essential features of the popular multiplayer game Quake, but can be driven with a synthetic workload generator that flexibly emulates client game actions and various hot-spot scenarios in the game world.\par We implement, evaluate and compare the STM version of SynQuake with a state-of-the-art lock-based parallelization of Quake, which we ported to SynQuake. While in STM-SynQuake support for maintaining the consistency of each potentially complex game action is automatic, conservative locking of surrounding objects within a bounding box for the duration of the game action is inherently needed in lock-based SynQuake. This leads to a higher scalability factor of STM-SynQuake versus lock-based SynQuake, due to a higher degree of false sharing in the latter.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "massively multiplayer games; scalability; software transactional memory; synchronization", } @Article{Perarnau:2010:KRC, author = "Swann Perarnau and Guillaume Huard", title = "{KRASH}: reproducible {CPU} load generation on many cores machines", journal = j-SIGPLAN, volume = "45", number = "5", pages = "327--328", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693497", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this article we present KRASH, a tool for reproducible generation of system-level CPU load. This tool is intended for use in shared memory machines equipped with multiple CPU cores which are usually exploited concurrently by several users. The objective of KRASH is to enable parallel application developers to validate their resources use strategies on a partially loaded machine by {\em replaying\/} an observed load in concurrence with their application. To reach this objective, we present a method for CPU load generation which behaves as realistically as possible: the resulting load is similar to the load that would be produced by concurrent processes run by other users. Nevertheless, contrary to a simple run of a CPU-intensive application, KRASH is not sensitive to system scheduling decisions. The main benefit brought by KRASH is this reproducibility: no matter how many processes are present in the system the load generated by our tool strictly respects a given load profile. To our knowledge, KRASH is the only tool that implements the generation of a dynamic load profile (a load varying with time). When used to generate a constant load, KRASH result is among the most realistic ones. Furthermore, KRASH provides more flexibility than other tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "CPU load generation; experimentation testbed; many cores", } @Article{Muralidhara:2010:IAS, author = "Sai Prashanth Muralidhara and Mahmut Kandemir and Padma Raghavan", title = "Intra-application shared cache partitioning for multithreaded applications", journal = j-SIGPLAN, volume = "45", number = "5", pages = "329--330", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693498", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we address the problem of partitioning a shared cache when the executing threads belong to the same application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cache; multicore; parallel applications", } @Article{Dash:2010:SPT, author = "Alokika Dash and Brian Demsky", title = "Symbolic prefetching in transactional distributed shared memory", journal = j-SIGPLAN, volume = "45", number = "5", pages = "331--332", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693499", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a static analysis for the automatic generation of symbolic prefetches in a transactional distributed shared memory. A symbolic prefetch specifies the first object to be prefetched followed by a list of field offsets or array indices that define a path through the heap. We also provide an object caching framework and language extensions to support our approach. To our knowledge, this is the first prefetching approach that can prefetch objects whose addresses have not been computed or predicted.\par Our approach makes aggressive use of both prefetching and caching of remote objects to hide network latency. It relies on the transaction commit mechanism to preserve the simple transactional consistency model that we present to the developer. We have evaluated this approach on several shared memory parallel benchmarks and a distributed gaming benchmark to observe speedups due to prefetching and caching. Categories and Subject Descriptors", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "distributed shared memory; symbolic prefetching; transactional memory", } @Article{Chakrabarti:2010:NAE, author = "Dhruva R. Chakrabarti", title = "New abstractions for effective performance analysis of {STM} programs", journal = j-SIGPLAN, volume = "45", number = "5", pages = "333--334", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693500", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the design and implementation of a dynamic conflict graph annotated with fine grain transaction characteristics and show that this is important information for effective performance analysis of a software transactional memory (STM) program. We show how to implement the necessary support in a compiler and an STM with minimal perturbation of the original behavior of the application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrency; software transactional memory", } @Article{Zhang:2010:CSP, author = "Chao Zhang and Chen Ding and Xiaoming Gu and Kirk Kelsey and Tongxin Bai and Xiaobing Feng", title = "Continuous speculative program parallelization in software", journal = j-SIGPLAN, volume = "45", number = "5", pages = "335--336", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693501", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper addresses the problem of extracting coarse-grained parallelism from large sequential code. It builds on BOP, a system for software speculative parallelization. BOP lets a user to mark possibly parallel regions (PPR) in a program and at run-time speculatively executes PPR instances using Unix processes. This short paper presents a new run-time support called continuous speculation, which fully utilizes available parallelism to tolerate differences in PPR task size and processor speed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "software speculative parallelization", } @Article{Marjanovic:2010:ECC, author = "Vladimir Marjanovic and Jes{\'u}s Labarta and Eduard Ayguad{\'e} and Mateo Valero", title = "Effective communication and computation overlap with hybrid {MPI\slash SMPSs}", journal = j-SIGPLAN, volume = "45", number = "5", pages = "337--338", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Communication overhead is one of the dominant factors affecting performance in high-performance computing systems. To reduce the negative impact of communication, programmers overlap communication and computation by using asynchronous communication primitives. This increases code complexity, requiring more development effort and making less readable programs. This paper presents the hybrid use of MPI and SMPSs (SMP superscalar, a task-based shared-memory programming model) that allows the programmer to easily introduce the asynchrony necessary to overlap communication and computation. We demonstrate the hybrid use of MPI/SMPSs with the high-performance LINPACK benchmark (HPL), and compare it to the pure MPI implementation, which uses the look-ahead technique to overlap communication and computation. The hybrid MPI/SMPSs version significantly improves the performance of the pure MPI version, getting close to the asymptotic performance at medium problem sizes and still getting significant benefits at small/large problem sizes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "hybrid MPI/SMPSs; LINPACK; MPI; parallel programming model", } @Article{Cederman:2010:SLF, author = "Daniel Cederman and Philippas Tsigas", title = "Supporting lock-free composition of concurrent data objects", journal = j-SIGPLAN, volume = "45", number = "5", pages = "339--340", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693503", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lock-free data objects offer several advantages over their blocking counterparts, such as being immune to deadlocks and convoying and, more importantly, being highly concurrent. But they share a common disadvantage in that the operations they provide are difficult to compose into larger atomic operations while still guaranteeing lock-freedom. We present a lock-free methodology for composing highly concurrent linearizable objects together by unifying their linearization points. This makes it possible to relatively easily introduce atomic lock-free move operations to a wide range of concurrent objects. Experimental evaluation has shown that the operations originally supported by the data objects keep their performance behavior under our methodology.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "composition; data structures; lock-free", } @Article{Guo:2010:SSL, author = "Yi Guo and Jisheng Zhao and Vincent Cave and Vivek Sarkar", title = "{SLAW}: a scalable locality-aware adaptive work-stealing scheduler for multi-core systems", journal = j-SIGPLAN, volume = "45", number = "5", pages = "341--342", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693504", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This poster introduces SLAW, a Scalable Locality-aware Adaptive Work-stealing scheduler. The SLAW features an adaptive task scheduling algorithm combined with a locality-aware scheduling framework.\par Past work has demonstrated the pros and cons of using fixed scheduling policies, such as {\em work-first\/} and {\em help-first}, in different cases without a clear winner. Prior work also assumes the availability and successful execution of a serial version of the parallel program. This assumption can limit the expressiveness of dynamic task parallel languages.\par The SLAW scheduler supports both work-first and help-first policies simultaneously. It does so by using an {\em adaptive\/} approach that selects a scheduling policy on a per-task basis at runtime. The SLAW scheduler also establishes bounds on the stack usage and the heap space needed to store tasks. The experimental results for the benchmarks studied show that SLAW's adaptive scheduler achieves 0.98x - 9.2x speedup over the help-first scheduler and 0.97x - 4.5x speedup over the work-first scheduler for 64-thread executions, thereby establishing the robustness of using an adaptive approach instead of a fixed policy. In contrast, the help-first policy is 9.2x slower than work-first in the worst case for a fixed help-first policy, and the work-first policy is 3.7x slower than help-first in the worst case for a fixed work-first policy. Further, for large irregular recursive parallel computations, the adaptive scheduler runs with bounded stack usage and achieves performance (and supports data sizes) that cannot be delivered by the use of any single fixed policy.\par The SLAW scheduler is designed for programming models where locality hints are provided to the runtime by the programmer or compiler, and achieves {\em locality-awareness\/} by grouping workers into {\em places}. Locality awareness can lead to improved performance by increasing temporal data reuse within a worker and among workers in the same place. Our experimental results show that locality-aware scheduling can achieve up to 2.6x speedup over locality-oblivious scheduling, for the benchmarks studied.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "help-first; work-first; work-stealing", } @Article{Yang:2010:OCG, author = "Yi Yang and Ping Xiang and Jingfei Kong and Huiyang Zhou", title = "An optimizing compiler for {GPGPU} programs with input-data sharing", journal = j-SIGPLAN, volume = "45", number = "5", pages = "343--344", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693505", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing high performance GPGPU programs is challenging for application developers since the performance is dependent upon how well the code leverages the hardware features of specific graphics processors. To solve this problem and relieve application developers of low-level hardware-specific optimizations, we introduce a novel compiler to optimize GPGPU programs. Our compiler takes a naive GPU kernel function, which is functionally correct but without any consideration for performance optimization. The compiler then analyzes the code, identifies memory access patterns, and generates optimized code. The proposed compiler optimizations target at one category of scientific and media processing algorithms, which has the characteristics of input-data sharing when computing neighboring output pixels/elements. Many commonly used algorithms, such as matrix multiplication, convolution, etc., share such characteristics. For these algorithms, novel approaches are proposed to enforce memory coalescing and achieve effective data reuse. Data prefetching and hardware-specific tuning are also performed automatically with our compiler framework. The experimental results based on a set of applications show that our compiler achieves very high performance, either superior or very close to the highly fine-tuned library, NVIDIA CUBLAS 2.1.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compiler; GPGPU", } @Article{Chandramowlishwaran:2010:ACC, author = "Aparna Chandramowlishwaran and Kathleen Knobe and Richard Vuduc", title = "Applying the concurrent collections programming model to asynchronous parallel dense linear algebra", journal = j-SIGPLAN, volume = "45", number = "5", pages = "345--346", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693506", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This poster is a case study on the application of a novel programming model, called Concurrent Collections (CnC), to the implementation of an asynchronous-parallel algorithm for computing the Cholesky factorization of dense matrices. In CnC, the programmer expresses her computation in terms of application-specific operations, partially-ordered by semantic scheduling constraints. We demonstrate the performance potential of CnC in this poster, by showing that our Cholesky implementation nearly matches or exceeds competing vendor-tuned codes and alternative programming models. We conclude that the CnC model is well-suited for expressing asynchronous-parallel algorithms on emerging multicore systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "asynchronous algorithms; concurrent collections; dense linear algebra", } @Article{Hoffmann:2010:AHS, author = "Henry Hoffmann and Jonathan Eastep and Marco D. Santambrogio and Jason E. Miller and Anant Agarwal", title = "Application heartbeats for software performance and health", journal = j-SIGPLAN, volume = "45", number = "5", pages = "347--348", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693507", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Adaptive, or self-aware, computing has been proposed to help application programmers confront the growing complexity of multicore software development. However, existing approaches to adaptive systems are largely ad hoc and often do not manage to incorporate the true performance goals of the applications they are designed to support. This paper presents an enabling technology for adaptive computing systems: Application Heartbeats. The Application Heartbeats framework provides a simple, standard programming interface that applications can use to indicate their performance and system software (and hardware) can use to query an application's performance. The PARSEC benchmark suite is instrumented with Application Heartbeats to show the broad applicability of the interface and an external resource scheduler demonstrates the use of the interface by assigning cores to an application to maintain a designated performance goal.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "adaptive algorithms", } @Article{Porter:2010:MTM, author = "Donald E. Porter and Emmett Witchel", title = "Modeling transactional memory workload performance", journal = j-SIGPLAN, volume = "45", number = "5", pages = "349--350", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693508", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional memory promises to make parallel programming easier than with fine-grained locking, while performing just as well. This performance claim is not always borne out because an application may violate a common-case assumption of the TM designer or because of external system effects. In order to help programmers assess the suitability of their code for transactional memory, this work introduces a formal model of transactional memory as well as a tool, called Syncchar. Syncchar can predict the speedup of a conversion from locks to transactions within 25\% for the STAMP benchmarks. Because getting good performance from transactions is more difficult than commonly appreciated, developers need tools to tune transactional performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "performance; Syncchar; transactional memory", } @Article{Carter:2010:PLN, author = "John D. Carter and William B. Gardner and Gary Grewal", title = "The {Pilot} library for novice {MPI} programmers", journal = j-SIGPLAN, volume = "45", number = "5", pages = "351--352", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Pilot library is a new method for programming MPI-enabled clusters in C, targeted at novice parallel programmers. Formal elements from Communicating Sequential Processes (CSP) are used to realize a process/channel model of parallel computation that reduces opportunities for deadlock and other communication errors. This simple model, plus an application programming inter-face (API) styled after C's formatted I/O, are designed to make the library easy to learn. The Pilot library exists as a thin layer on top of any standard Message Passing Interface (MPI) implementation, preserving MPI's portability and efficiency, with little performance overhead arising as result of Pilot's additional features.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "C; cluster programming; collective operations; deadlock detection; high-performance computing; MPI", } @Article{Jang:2010:DTE, author = "Byunghyun Jang and Perhaad Mistry and Dana Schaa and Rodrigo Dominguez and David Kaeli", title = "Data transformations enabling loop vectorization on multithreaded data parallel architectures", journal = j-SIGPLAN, volume = "45", number = "5", pages = "353--354", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Loop vectorization, a key feature exploited to obtain high performance on Single Instruction Multiple Data (SIMD) vector architectures, is significantly hindered by irregular memory access patterns in the data stream. This paper describes data transformations that allow us to vectorize loops targeting massively multithreaded data parallel architectures. We present a mathematical model that captures loop-based memory access patterns and computes the most appropriate data transformations in order to enable vectorization. Our experimental results show that the proposed data transformations can significantly increase the number of loops that can be vectorized and enhance the data-level parallelism of applications. Our results also show that the overhead associated with our data transformations can be easily amortized as the size of the input data set increases. For the set of high performance benchmark kernels studied, we achieve consistent and significant performance improvements (up to 11.4X) by applying vectorization using our data transformation approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data transformation; GPGPU; loop vectorization", } @Article{Buehrer:2010:DPS, author = "Gregory Buehrer and Srinivasan Parthasarathy and Shirish Tatikonda", title = "A distributed placement service for graph-structured and tree-structured data", journal = j-SIGPLAN, volume = "45", number = "5", pages = "355--356", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effective data placement strategies can enhance the performance of data-intensive applications implemented on high end computing clusters. Such strategies can have a significant impact in localizing the computation, in minimizing synchronization (communication) costs, in enhancing reliability (via strategic replication policies), and in ensuring a balanced workload or enhancing the available bandwidth from massive storage devices (e.g. disk arrays).\par Existing work has largely targeted the placement of relatively simple data types or entities (e.g. elements, vectors, sets, and arrays). Here we investigate several hash-based distributed data placement methods targeting tree- and graph- structured data, and develop a locality enhancing placement service for large cluster systems. Target applications include the placement of a single large graph (e.g. Web graph), a single large tree (e.g. large XML file), a forest of graphs or trees (e.g. XML database) and other specialized graph data types - bi-partite (query-click graphs), directed acyclic graphs etc. We empirically evaluate our service by demonstrating its use in improving mining executions for pattern discovery, nearest neighbor searching, graph computations, and applications that combine link and content analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data placement; distributed computing; structured data", } @Article{Li:2010:SVC, author = "Guodong Li and Ganesh Gopalakrishnan and Robert M. Kirby and Dan Quinlan", title = "A symbolic verifier for {CUDA} programs", journal = j-SIGPLAN, volume = "45", number = "5", pages = "357--358", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a preliminary automated verifier based on mechanical decision procedures which is able to prove functional correctness of CUDA programs and guarantee to detect bugs such as race conditions. We also employ a symbolic partial order reduction (POR) technique to mitigate the interleaving explosion problem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cuda; formal verification; SPMD; symbolic analysis", } @Article{Richards:2010:ADB, author = "Gregor Richards and Sylvain Lebresne and Brian Burg and Jan Vitek", title = "An analysis of the dynamic behavior of {JavaScript} programs", journal = j-SIGPLAN, volume = "45", number = "6", pages = "1--12", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806598", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The JavaScript programming language is widely used for web programming and, increasingly, for general purpose computing. As such, improving the correctness, security and performance of JavaScript applications has been the driving force for research in type systems, static analysis and compiler techniques for this language. Many of these techniques aim to reign in some of the most dynamic features of the language, yet little seems to be known about how programmers actually utilize the language or these features. In this paper we perform an empirical study of the dynamic behavior of a corpus of widely-used JavaScript programs, and analyze how and why the dynamic features are used. We report on the degree of dynamism that is exhibited by these JavaScript programs and compare that with assumptions commonly made in the literature and accepted industry benchmark suites.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic behavior; dynamic metrics; execution tracing; javascript; program analysis", } @Article{Bond:2010:BEC, author = "Michael D. Bond and Graham Z. Baker and Samuel Z. Guyer", title = "{Breadcrumbs}: efficient context sensitivity for dynamic bug detection analyses", journal = j-SIGPLAN, volume = "45", number = "6", pages = "13--24", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806599", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Calling context--the set of active methods on the stack--is critical for understanding the dynamic behavior of large programs. Dynamic program analysis tools, however, are almost exclusively context insensitive because of the prohibitive cost of representing calling contexts at run time. Deployable dynamic analyses, in particular, have been limited to reporting only static program locations.\par This paper presents Breadcrumbs, an efficient technique for recording and reporting dynamic calling contexts. It builds on an existing technique for computing a compact (one word) encoding of each calling context that client analyses can use in place of a program location. The key feature of our system is a search algorithm that can reconstruct a calling context from its encoding using only a static call graph and a small amount of dynamic information collected at cold (infrequently executed) callsites. Breadcrumbs requires no offline training or program modifications, and handles all language features, including dynamic class loading.\par We use Breadcrumbs to add context sensitivity to two dynamic analyses: a data-race detector and an analysis for diagnosing null pointer exceptions. On average, it adds 10\% to 20\% runtime overhead, depending on a tunable parameter that controls how much dynamic information is collected. Collecting less information lowers the overhead, but can result in a search space explosion. In some cases this causes reconstruction to fail, but in most cases Breadcrumbs >produces non-trivial calling contexts that have the potential to significantly improve both the precision of the analyses and the quality of the bug reports.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "bug detection; context sensitivity; dynamic analysis", } @Article{Ruwase:2010:DLE, author = "Olatunji Ruwase and Shimin Chen and Phillip B. Gibbons and Todd C. Mowry", title = "Decoupled lifeguards: enabling path optimizations for dynamic correctness checking tools", journal = j-SIGPLAN, volume = "45", number = "6", pages = "25--35", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806600", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic correctness checking tools (a.k.a. lifeguards) can detect a wide array of correctness issues, such as memory, security, and concurrency misbehavior, in unmodified executables at run time. However, lifeguards that are implemented using dynamic binary instrumentation (DBI) often slow down the monitored application by 10-50X, while proposals that replace DBI with hardware still see 3-8X slowdowns. The remaining overhead is the cost of performing the lifeguard analysis itself. In this paper, we explore compiler optimization techniques to reduce this overhead.\par The lifeguard software is typically structured as a set of event-driven handlers, where the events are individual instructions in the monitored application's dynamic instruction stream. We propose to {\em decouple\/} the lifeguard checking code from the application that it is monitoring so that the lifeguard analysis can be invoked at the granularity of {\em hot paths\/} in the monitored application. In this way, we are able to find many more opportunities for eliminating redundant work in the lifeguard analysis, even starting with well-optimized applications and hand-tuned lifeguard handlers. Experimental results with two lifeguard frameworks - one DBI-based and one hardware-assisted - show significant reduction in monitoring overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic code optimization; dynamic correctness checking; dynamic program analysis", } @Article{Lee:2010:JSD, author = "Byeongcheol Lee and Ben Wiedermann and Martin Hirzel and Robert Grimm and Kathryn S. McKinley", title = "{Jinn}: synthesizing dynamic bug detectors for foreign language interfaces", journal = j-SIGPLAN, volume = "45", number = "6", pages = "36--49", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806601", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming language specifications mandate static and dynamic analyses to preclude syntactic and semantic errors. Although individual languages are usually well-specified, composing languages is not, and this poor specification is a source of many errors in {\em multilingual\/} programs. For example, virtually all Java programs compose Java and C using the Java Native Interface (JNI). Since JNI is informally specified, developers have difficulty using it correctly, and current Java compilers and virtual machines (VMs) inconsistently check only a subset of JNI constraints.\par This paper's most significant contribution is to show how to synthesize dynamic analyses from state machines to detect foreign function interface (FFI) violations. We identify three classes of FFI constraints encoded by eleven state machines that capture thousands of JNI and Python/C FFI rules. We use a mapping function to specify which state machines, transitions, and program entities (threads, objects, references) to check at each FFI call and return. From this function, we synthesize a context-specific dynamic analysis to find FFI bugs. We build bug detection tools for JNI and Python/C using this approach. For JNI, we dynamically and transparently interpose the analysis on Java and C language transitions through the JVM tools interface. The resulting tool, called Jinn, is compiler and virtual machine {\em independent}. It detects and diagnoses a wide variety of FFI bugs that other tools miss. This approach greatly reduces the annotation burden by exploiting common FFI constraints: whereas the generated Jinn code is 22,000+ lines, we wrote only 1,400 lines of state machine and mapping code. Overall, this paper lays the foundation for a more principled approach to developing correct multilingual software and a more concise and automated approach to FFI specification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic analysis; ffi bugs; foreign function interfaces (FFI); java native interface (jni); multilingual programs; python/C; specification; specification generation", } @Article{Prabhu:2010:SPS, author = "Prakash Prabhu and Ganesan Ramalingam and Kapil Vaswani", title = "Safe programmable speculative parallelism", journal = j-SIGPLAN, volume = "45", number = "6", pages = "50--61", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806603", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Execution order constraints imposed by dependences can serialize computation, preventing parallelization of code and algorithms. Speculating on the value(s) carried by dependences is one way to break such critical dependences. Value speculation has been used effectively at a low level, by compilers and hardware. In this paper, we focus on the use of speculation {\em by programmers\/} as an algorithmic paradigm to parallelize seemingly sequential code.\par We propose two new language constructs, {\em speculative composition\/} and {\em speculative iteration}. These constructs enable programmers to declaratively express speculative parallelism in programs: to indicate when and how to speculate, increasing the parallelism in the program, without concerning themselves with mundane implementation details.\par We present a core language with speculation constructs and mutable state and present a formal operational semantics for the language. We use the semantics to define the notion of a correct speculative execution as one that is equivalent to a non-speculative execution. In general, speculation requires a runtime mechanism to undo the effects of speculative computation in the case of mis predictions. We describe a set of conditions under which such rollback can be avoided. We present a static analysis that checks if a given program satisfies these conditions. This allows us to implement speculation efficiently, without the overhead required for rollbacks.\par We have implemented the speculation constructs as a C\# library, along with the static checker for safety. We present an empirical evaluation of the efficacy of this approach to parallelization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "purity; rollback freedom; safety; speculative parallelism; value speculation", } @Article{Tian:2010:SSP, author = "Chen Tian and Min Feng and Rajiv Gupta", title = "Supporting speculative parallelization in the presence of dynamic data structures", journal = j-SIGPLAN, volume = "45", number = "6", pages = "62--73", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806604", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The availability of multicore processors has led to significant interest in compiler techniques for speculative parallelization of sequential programs. Isolation of speculative state from non-speculative state forms the basis of such speculative techniques as this separation enables recovery from misspeculations. In our prior work on CorD [35,36] we showed that for array and scalar variable based programs copying of data between speculative and non-speculative memory can be highly optimized to support state separation that yields significant speedups on multicore machines available today. However, we observe that in context of heap-intensive programs that operate on linked dynamic data structures, state separation based speculative parallelization poses many challenges. The copying of data structures from non-speculative to speculative state (copy-in operation) can be very expensive due to the large sizes of dynamic data structures. The copying of updated data structures from speculative state to non-speculative state (copy-out operation) is made complex due to the changes in the shape and size of the dynamic data structure made by the speculative computation. In addition, we must contend with the need to translate pointers internal to dynamic data structures between their non-speculative and speculative memory addresses. In this paper we develop an augmented design for the representation of dynamic data structures such that all of the above operations can be performed efficiently. Our experiments demonstrate significant speedups on a real machine for a set of programs that make extensive use of heap based dynamic data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "multicore processors; speculative parallelization", } @Article{Kandemir:2010:CTA, author = "Mahmut Kandemir and Taylan Yemliha and SaiPrashanth Muralidhara and Shekhar Srikantaiah and Mary Jane Irwin and Yuanrui Zhnag", title = "Cache topology aware computation mapping for multicores", journal = j-SIGPLAN, volume = "45", number = "6", pages = "74--85", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806605", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The main contribution of this paper is a compiler based, cache topology aware code optimization scheme for emerging multicore systems. This scheme distributes the iterations of a loop to be executed in parallel across the cores of a target multicore machine and schedules the iterations assigned to each core. Our goal is to improve the utilization of the on-chip multi-layer cache hierarchy and to maximize overall application performance. We evaluate our cache topology aware approach using a set of twelve applications and three different commercial multicore machines. In addition, to study some of our experimental parameters in detail and to explore future multicore machines (with higher core counts and deeper on-chip cache hierarchies), we also conduct a simulation based study. The results collected from our experiments with three Intel multicore machines show that the proposed compiler-based approach is very effective in enhancing performance. In addition, our simulation results indicate that optimizing for the on-chip cache hierarchy will be even more important in future multicores with increasing numbers of cores and cache levels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cache; compiler; multicore; multi-level; topology-aware", } @Article{Yang:2010:GCM, author = "Yi Yang and Ping Xiang and Jingfei Kong and Huiyang Zhou", title = "A {GPGPU} compiler for memory optimization and parallelism management", journal = j-SIGPLAN, volume = "45", number = "6", pages = "86--97", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806606", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel optimizing compiler for general purpose computation on graphics processing units (GPGPU). It addresses two major challenges of developing high performance GPGPU programs: effective utilization of GPU memory hierarchy and judicious management of parallelism.\par The input to our compiler is a na&\#239;ve GPU kernel function, which is functionally correct but without any consideration for performance optimization. The compiler analyzes the code, identifies its memory access patterns, and generates both the optimized kernel and the kernel invocation parameters. Our optimization process includes vectorization and memory coalescing for memory bandwidth enhancement, tiling and unrolling for data reuse and parallelism management, and thread block remapping or address-offset insertion for partition-camping elimination. The experiments on a set of scientific and media processing algorithms show that our optimized code achieves very high performance, either superior or very close to the highly fine-tuned library, NVIDIA CUBLAS 2.2, and up to 128 times speedups over the naive versions. Another distinguishing feature of our compiler is the understandability of the optimized code, which is useful for performance analysis and algorithm refinement.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compiler; gpgpu", } @Article{Eggers:2010:AL, author = "Susan Eggers", title = "{2010 Athena} lecture", journal = j-SIGPLAN, volume = "45", number = "6", pages = "98--98", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806608", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Susan Eggers, a Professor of Computer Science and Engineering at the University of Washington, joined her department in 1989. She received a B.A. in 1965 from Connecticut College and a Ph. D. in 1989 from the University of California, Berkeley. Her research interests are in computer architecture and back-end compiler optimization, with an emphasis on experimental performance analysis. With her colleague Hank Levy and their students, she developed the first commercially viable multithreaded architecture, Simultaneous Multithreading, adopted by Intel (as Hyperthreading), IBM, Sun and others. Her current research is in the areas of distributed dataflow machines, FPGAs and chip multiprocessors. In 1989 Professor Eggers was awarded an IBM Faculty Development Award, in 1990 an NSF Presidential Young Investigator Award, in 1994 the Microsoft Professorship in Computer Science and Engineering, and in 2009 the ACM-W Athena Lecturer. She is a Fellow of the ACM and IEEE, a Fellow of the AAAS, and a member of the National Academy of Engineering.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "invited talk", } @Article{Yang:2010:SLI, author = "Jean Yang and Chris Hawblitzel", title = "Safe to the last instruction: automated verification of a type-safe operating system", journal = j-SIGPLAN, volume = "45", number = "6", pages = "99--110", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806610", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Typed assembly language (TAL) and Hoare logic can verify the absence of many kinds of errors in low-level code. We use TAL and Hoare logic to achieve highly automated, static verification of the safety of a new operating system called Verve. Our techniques and tools mechanically verify the safety of every assembly language instruction in the operating system, run-time system, drivers, and applications (in fact, every part of the system software except the boot loader). Verve consists of a 'Nucleus' that provides primitive access to hardware and memory, a kernel that builds services on top of the Nucleus, and applications that run on top of the kernel. The Nucleus, written in verified assembly language, implements allocation, garbage collection, multiple stacks, interrupt handling, and device access. The kernel, written in C\# and compiled to TAL, builds higher-level services, such as preemptive threads, on top of the Nucleus. A TAL checker verifies the safety of the kernel and applications. A Hoare-style verifier with an automated theorem prover verifies both the safety and correctness of the Nucleus. Verve is, to the best of our knowledge, the first operating system mechanically verified to guarantee both type and memory safety. More generally, Verve's approach demonstrates a practical way to mix high-level typed code with low-level untyped code in a verifiably safe manner.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "operating system; run-time system; type safety; verification", } @Article{Tatlock:2010:BEV, author = "Zachary Tatlock and Sorin Lerner", title = "Bringing extensibility to verified compilers", journal = j-SIGPLAN, volume = "45", number = "6", pages = "111--121", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806611", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Verified compilers, such as Leroy's CompCert, are accompanied by a fully checked correctness proof. Both the compiler and proof are often constructed with an interactive proof assistant. This technique provides a strong, end-to-end correctness guarantee on top of a small trusted computing base. Unfortunately, these compilers are also challenging to extend since each additional transformation must be proven correct in full formal detail.\par At the other end of the spectrum, techniques for compiler correctness based on a domain-specific language for writing optimizations, such as Lerner's Rhodium and Cobalt, make the compiler easy to extend: the correctness of additional transformations can be checked completely automatically. Unfortunately, these systems provide a weaker guarantee since their end-to-end correctness has not been proven fully formally.\par We present an approach for compiler correctness that provides the best of both worlds by bridging the gap between compiler verification and compiler extensibility. In particular, we have extended Leroy's CompCert compiler with an execution engine for optimizations written in a domain specific and proved that this execution engine preserves program semantics, using the Coq proof assistant. We present our CompCert extension, XCert, including the details of its execution engine and proof of correctness in Coq. Furthermore, we report on the important lessons learned for making the proof development manageable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compiler optimization; correctness; extensibility", } @Article{Chlipala:2010:UST, author = "Adam Chlipala", title = "{Ur}: statically-typed metaprogramming with type-level record computation", journal = j-SIGPLAN, volume = "45", number = "6", pages = "122--133", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806612", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "{\em Dependent types\/} provide a strong foundation for specifying and verifying rich properties of programs through type-checking. The earliest implementations combined dependency, which allows types to mention program variables; with type-level computation, which facilitates expressive specifications that compute with recursive functions over types. While many recent applications of dependent types omit the latter facility, we argue in this paper that it deserves more attention, even when implemented without dependency.\par In particular, the ability to use functional programs as specifications enables {\em statically-typed metaprogramming\/}: programs write programs, and static type-checking guarantees that the generating process never produces invalid code. Since our focus is on generic validity properties rather than full correctness verification, it is possible to engineer type inference systems that are very effective in narrow domains. As a demonstration, we present Ur, a programming language designed to facilitate metaprogramming with first-class records and names. On top of Ur, we implement Ur/Web, a special standard library that enables the development of modern Web applications. Ad-hoc code generation is already in wide use in the popular Web application frameworks, and we show how that generation may be tamed using types, without forcing metaprogram authors to write proofs or forcing metaprogram users to write any fancy types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dependent types; metaprogramming", } @Article{Emmi:2010:PVT, author = "Michael Emmi and Rupak Majumdar and Roman Manevich", title = "Parameterized verification of transactional memories", journal = j-SIGPLAN, volume = "45", number = "6", pages = "134--145", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806613", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe an automatic verification method to check whether transactional memories ensure strict serializability a key property assumed of the transactional interface. Our main contribution is a technique for effectively verifying parameterized systems. The technique merges ideas from parameterized hardware and protocol verification--verification by invisible invariants and symmetry reduction--with ideas from software verification--template-based invariant generation and satisfiability checking for quantified formul&\#230; (modulo theories). The combination enables us to precisely model and analyze unbounded systems while taming state explosion.\par Our technique enables automated proofs that two-phase locking (TPL), dynamic software transactional memory (DSTM), and transactional locking II (TL2) systems ensure strict serializability. The verification is challenging since the systems are unbounded in several dimensions: the number and length of concurrently executing transactions, and the size of the shared memory they access, have no finite limit. In contrast, state-of-the-art software model checking tools such as BLAST and TVLA are unable to validate either system, due to inherent expressiveness limitations or state explosion.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "parameterized verification; transactional memory", } @Article{Pizlo:2010:SFT, author = "Filip Pizlo and Lukasz Ziarek and Petr Maj and Antony L. Hosking and Ethan Blanton and Jan Vitek", title = "{SCHISM}: fragmentation-tolerant real-time garbage collection", journal = j-SIGPLAN, volume = "45", number = "6", pages = "146--159", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806615", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Managed languages such as Java and C\# are being considered for use in hard real-time systems. A hurdle to their widespread adoption is the lack of garbage collection algorithms that offer predictable space-and-time performance in the face of fragmentation. We introduce SCHISM/CMR, a new concurrent and real-time garbage collector that is fragmentation tolerant and guarantees time-and-space worst-case bounds while providing good throughput. SCHISM/CMR combines mark-region collection of fragmented objects and arrays (arraylets) with separate replication-copying collection of immutable arraylet spines, so as to cope with external fragmentation when running in small heaps. We present an implementation of SCHISM/CMR in the Fiji VM, a high-performance Java virtual machine for mission-critical systems, along with a thorough experimental evaluation on a wide variety of architectures, including server-class and embedded systems. The results show that SCHISM/CMR tolerates fragmentation better than previous schemes, with a much more acceptable throughput penalty.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "fragmentation; mark-region; mark-sweep; real-time; replication-copying", } @Article{Xu:2010:DIU, author = "Guoqing Xu and Atanas Rountev", title = "Detecting inefficiently-used containers to avoid bloat", journal = j-SIGPLAN, volume = "45", number = "6", pages = "160--173", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806616", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Runtime bloat degrades significantly the performance and scalability of software systems. An important source of bloat is the inefficient use of containers. It is expensive to create inefficiently-used containers and to invoke their associated methods, as this may ultimately execute large volumes of code, with call stacks dozens deep, and allocate many temporary objects.\par This paper presents practical static and dynamic tools that can find inappropriate use of containers in Java programs. At the core of these tools is a base static analysis that identifies, for each container, the objects that are added to this container and the key statements (i.e., heap loads and stores) that achieve the semantics of common container operations such as {\em ADD\/} and {\em GET}. The static tool finds problematic uses of containers by considering the nesting relationships among the loops where these {\em semantics-achieving statements\/} are located, while the dynamic tool can instrument these statements and find inefficiencies by profiling their execution frequencies.\par The high precision of the base analysis is achieved by taking advantage of a context-free language (CFL)-reachability formulation of points-to analysis and by accounting for container-specific properties. It is demand-driven and client-driven, facilitating refinement specific to each queried container object and increasing scalability. The tools built with the help of this analysis can be used both to avoid the creation of container-related performance problems early during development, and to help with diagnosis when problems are observed during tuning. Our experimental results show that the static tool has a low false positive rate and produces more relevant information than its dynamic counterpart. Further case studies suggest that significant optimization opportunities can be found by focusing on statically-identified containers for which high allocation frequency is observed at run time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cfl reachability; container bloat; points-to analysis", } @Article{Xu:2010:FLU, author = "Guoqing Xu and Nick Mitchell and Matthew Arnold and Atanas Rountev and Edith Schonberg and Gary Sevitsky", title = "Finding low-utility data structures", journal = j-SIGPLAN, volume = "45", number = "6", pages = "174--186", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806617", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many opportunities for easy, big-win, program optimizations are missed by compilers. This is especially true in highly layered Java applications. Often at the heart of these missed optimization opportunities lie computations that, with great expense, produce data values that have little impact on the program's final output. Constructing a new date formatter to format every date, or populating a large set full of expensively constructed structures only to check its size: these involve costs that are out of line with the benefits gained. This disparity between the formation costs and accrued benefits of data structures is at the heart of much runtime bloat.\par We introduce a run-time analysis to discover these {\em low-utility\/} data structures. The analysis employs dynamic thin slicing, which naturally associates costs with value flows rather than raw data flows. It constructs a model of the incremental, hop-to-hop, costs and benefits of each data structure. The analysis then identifies suspicious structures based on imbalances of its incremental costs and benefits. To decrease the memory requirements of slicing, we introduce {\em abstract dynamic thin slicing}, which performs thin slicing over bounded abstract domains. We have modified the IBM J9 commercial JVM to implement this approach.\par We demonstrate two client analyses: one that finds objects that are expensive to construct but are not necessary for the forward execution, and second that pinpoints ultimately-dead values. We have successfully applied them to large-scale and long-running Java applications. We show that these analyses are effective at detecting operations that have unbalanced costs and benefits.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "abstract dynamic thin slicing; cost benefit analysis; memory bloat", } @Article{Mytkowicz:2010:EAJ, author = "Todd Mytkowicz and Amer Diwan and Matthias Hauswirth and Peter F. Sweeney", title = "Evaluating the accuracy of {Java} profilers", journal = j-SIGPLAN, volume = "45", number = "6", pages = "187--197", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806618", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance analysts profile their programs to find methods that are worth optimizing: the 'hot' methods. This paper shows that four commonly-used Java profilers ({\em xprof, hprof, jprofile, and yourkit\/}) often disagree on the identity of the hot methods. If two profilers disagree, at least one must be incorrect. Thus, there is a good chance that a profiler will mislead a performance analyst into wasting time optimizing a cold method with little or no performance improvement.\par This paper uses causality analysis to evaluate profilers and to gain insight into the source of their incorrectness. It shows that these profilers all violate a fundamental requirement for sampling based profilers: to be correct, a sampling-based profiler must collect samples randomly.\par We show that a proof-of-concept profiler, which collects samples randomly, does not suffer from the above problems. Specifically, we show, using a number of case studies, that our profiler correctly identifies methods that are important to optimize; in some cases other profilers report that these methods are cold and thus not worth optimizing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "bias; observer effect; profiling", } @Article{Baek:2010:GFS, author = "Woongki Baek and Trishul M. Chilimbi", title = "{Green}: a framework for supporting energy-conscious programming using controlled approximation", journal = j-SIGPLAN, volume = "45", number = "6", pages = "198--209", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806620", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy-efficient computing is important in several systems ranging from embedded devices to large scale data centers. Several application domains offer the opportunity to tradeoff quality of service/solution (QoS) for improvements in performance and reduction in energy consumption. Programmers sometimes take advantage of such opportunities, albeit in an ad-hoc manner and often without providing any QoS guarantees.\par We propose a system called Green that provides a simple and flexible framework that allows programmers to take advantage of such approximation opportunities in a systematic manner while providing statistical QoS guarantees. Green enables programmers to approximate expensive functions and loops and operates in two phases. In the calibration phase, it builds a model of the QoS loss produced by the approximation. This model is used in the operational phase to make approximation decisions based on the QoS constraints specified by the programmer. The operational phase also includes an adaptation function that occasionally monitors the runtime behavior and changes the approximation decisions and QoS model to provide strong statistical QoS guarantees.\par To evaluate the effectiveness of Green, we implemented our system and language extensions using the Phoenix compiler framework. Our experiments using benchmarks from domains such as graphics, machine learning, signal processing, and finance, and an in-production, real-world web search engine, indicate that Green can produce significant improvements in performance and energy consumption with small and controlled QoS degradation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "controlled approximation; energy-conscious programming", } @Article{Rajan:2010:GPM, author = "Kaushik Rajan and Sriram Rajamani and Shashank Yaduvanshi", title = "{GUESSTIMATE}: a programming model for collaborative distributed systems", journal = j-SIGPLAN, volume = "45", number = "6", pages = "210--220", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806621", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new programming model GUESSTIMATE for developing collaborative distributed systems. The model allows atomic, isolated operations that transform a system from consistent state to consistent state, and provides a shared transactional store for a collection of such operations executed by various machines in a distributed system. In addition to 'committed state' which is identical in all machines in the distributed system, GUESSTIMATE allows each machine to have a replicated local copy of the state (called 'guesstimated state') so that operations on shared state can be executed locally without any blocking, while also guaranteeing that eventually all machines agree on the sequences of operations executed. Thus, each operation is executed multiple times, once at the time of issue when it updates the guesstimated state of the issuing machine, once when the operation is committed (atomically) to the committed state of all machines, and several times in between as the guesstimated state converges toward the committed state. While we expect the results of these executions of the operation to be identical most of the time in the class of applications we study, it is possible for an operation to succeed the first time when it is executed on the guesstimated state, and fail when it is committed. GUESSTIMATE provides facilities that allow the programmer to deal with this potential discrepancy. This paper presents our programming model, its operational semantics, its realization as an API in C\#, and our experience building collaborative distributed applications with this model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "collaborative applications; concurrency; distributed systems; language extensions", } @Article{Xi:2010:CFM, author = "Qian Xi and David Walker", title = "A context-free markup language for semi-structured text", journal = j-SIGPLAN, volume = "45", number = "6", pages = "221--232", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806622", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An {\em ad hoc data format\/} is any nonstandard, semi-structured data format for which robust data processing tools are not easily available. In this paper, we present ANNE, a new kind of markup language designed to help users generate documentation and data processing tools for ad hoc text data. More specifically, given a new ad hoc data source, an ANNE programmer edits the document to add a number of simple annotations, which serve to specify its syntactic structure. Annotations include elements that specify constants, optional data, alternatives, enumerations, sequences, tabular data, and recursive patterns. The ANNE system uses a combination of user annotations and the raw data itself to extract a context-free grammar from the document. This context-free grammar can then be used to parse the data and transform it into an XML parse tree, which may be viewed through a browser for analysis or debugging purposes. In addition, the ANNE system generates a PADS/ML description, which may be saved as lasting documentation of the data format or compiled into a host of useful data processing tools.\par In addition to designing and implementing ANNE, we have devised a semantic theory for the core elements of the language. This semantic theory describes the editing process, which translates a raw, unannotated text document into an annotated document, and the grammar extraction process, which generates a context-free grammar from an annotated document. We also present an alternative characterization of system behavior by drawing upon ideas from the field of relevance logic. This secondary characterization, which we call {\em relevance analysis}, specifies a direct relationship between unannotated documents and the context-free grammars that our system can generate from them. Relevance analysis allows us to prove important theorems concerning the expressiveness and utility of our system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "ad hoc data; ANNE; domain-specific languages; PADS; tool generation", } @Article{Loitsch:2010:PFP, author = "Florian Loitsch", title = "Printing floating-point numbers quickly and accurately with integers", journal = j-SIGPLAN, volume = "45", number = "6", pages = "233--243", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806623", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present algorithms for accurately converting floating-point numbers to decimal representation. They are fast (up to 4 times faster than commonly used algorithms that use high-precision integers) and correct: any printed number will evaluate to the same number, when read again.\par Our algorithms are fast, because they require only fixed-size integer arithmetic. The sole requirement for the integer type is that it has at least two more bits than the significand of the floating-point number. Hence, for IEEE 754 double-precision numbers (having a 53-bit significand) an integer type with 55 bits is sufficient. Moreover we show how to exploit additional bits to improve the generated output.\par We present three algorithms with different properties: the first algorithm is the most basic one, and does not take advantage of any extra bits. It simply shows how to perform the binary-to-decimal transformation with the minimal number of bits. Our second algorithm improves on the first one by using the additional bits to produce a shorter (often the shortest) result.\par Finally we propose a third version that can be used when the shortest output is a requirement. The last algorithm either produces optimal decimal representations (with respect to shortness and rounding) or rejects its input. For IEEE 754 double-precision numbers and 64-bit integers roughly 99.4\% of all numbers can be processed efficiently. The remaining 0.6\% are rejected and need to be printed by a slower complete algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dtoa; floating-point printing", } @Article{Flanagan:2010:AMD, author = "Cormac Flanagan and Stephen N. Freund", title = "Adversarial memory for detecting destructive races", journal = j-SIGPLAN, volume = "45", number = "6", pages = "244--254", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806625", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multithreaded programs are notoriously prone to race conditions, a problem exacerbated by the widespread adoption of multi-core processors with complex memory models and cache coherence protocols. Much prior work has focused on static and dynamic analyses for race detection, but these algorithms typically are unable to distinguish destructive races that cause erroneous behavior from benign races that do not. Performing this classification manually is difficult, time consuming, and error prone.\par This paper presents a new dynamic analysis technique that uses {\em adversarial memory\/} to classify race conditions as destructive or benign on systems with relaxed memory models. Unlike a typical language implementation, which may only infrequently exhibit non-sequentially consistent behavior, our adversarial memory implementation exploits the full freedom of the memory model to return older, unexpected, or stale values for memory reads whenever possible, in an attempt to crash the target program (that is, to force the program to behave erroneously). A crashing execution provides concrete evidence of a destructive bug, and this bug can be strongly correlated with a specific race condition in the target program.\par Experimental results with our Jumble prototype for Java demonstrate that adversarial memory is highly effective at identifying destructive race conditions, and in distinguishing them from race conditions that are real but benign. Adversarial memory can also reveal destructive races that would not be detected by traditional testing (even after thousands of runs) or by model checkers that assume sequential consistency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrency; dynamic analysis; race conditions; relaxed memory models", } @Article{Bond:2010:PPD, author = "Michael D. Bond and Katherine E. Coons and Kathryn S. McKinley", title = "{PACER}: proportional detection of data races", journal = j-SIGPLAN, volume = "45", number = "6", pages = "255--268", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806626", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data races indicate serious concurrency bugs such as order, atomicity, and sequential consistency violations. Races are difficult to find and fix, often manifesting only after deployment. The frequency and unpredictability of these bugs will only increase as software adds parallelism to exploit multicore hardware. Unfortunately, sound and precise race detectors slow programs by factors of eight or more and do not scale to large numbers of threads.\par This paper presents a precise, low-overhead {\em sampling-based\/} data race detector called Pacer. PACER makes a {\em proportionality\/} guarantee: it detects any race at a rate equal to the sampling rate, by finding races whose first access occurs during a global sampling period. During sampling, PACER tracks all accesses using the dynamically sound and precise FastTrack algorithm. In nonsampling periods, Pacer discards sampled access information that cannot be part of a reported race, {\em and\/} Pacer simplifies tracking of the happens-before relationship, yielding near-constant, instead of linear, overheads. Experimental results confirm our theoretical guarantees. PACER reports races in proportion to the sampling rate. Its time and space overheads scale with the sampling rate, and sampling rates of 1-3\% yield overheads low enough to consider in production software. The resulting system provides a 'get what you pay for' approach that is suitable for identifying real, hard-to-reproduce races in deployed systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "bugs; concurrency; data races; sampling", } @Article{Nakaike:2010:LER, author = "Takuya Nakaike and Maged M. Michael", title = "Lock elision for read-only critical sections in {Java}", journal = j-SIGPLAN, volume = "45", number = "6", pages = "269--278", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806627", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is not uncommon in parallel workloads to encounter shared data structures with read-mostly access patterns, where operations that update data are infrequent and most operations are read-only. Typically, data consistency is guaranteed using mutual exclusion or read-write locks. The cost of atomic update of lock variables result in high overheads and high cache coherence traffic under active sharing, thus slowing down single thread performance and limiting scalability.\par In this paper, we present {\em SOLERO (Software Optimistic Lock Elision for Read-Only critical sections)}, a new lock implementation called for optimizing read-only critical sections in Java based on sequential locks. SOLERO is compatible with the conventional lock implementation of Java. However, unlike the conventional implementation, only critical sections that may write data or have side effects need to update lock variables, while read-only critical sections need only read lock variables without writing them. Each writing critical section changes the lock value to a new value. Hence, a read-only critical section is guaranteed to be consistent if the lock is free and its value does not change from the beginning to the end of the read-only critical section.\par Using Java workloads including SPECjbb2005 and the HashMap and TreeMap Java classes, we evaluate the performance impact of applying SOLERO to read-mostly locks. Our experimental results show performance improvements across the board, often substantial, in both single thread speed and scalability over the conventional lock implementation (mutual exclusion) and read-write locks. SOLERO improves the performance of SPECjbb2005 by 3-5\% on single and multiple threads. The results using the HashMap and TreeMap benchmarks show that SOLERO outperforms the conventional lock implementation and read-write locks by substantial multiples on multi-threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "java; just-in-time compiler; lock; lock elision; monitor; optimization; synchronization", } @Article{Chaudhuri:2010:SI, author = "Swarat Chaudhuri and Armando Solar-Lezama", title = "Smooth interpretation", journal = j-SIGPLAN, volume = "45", number = "6", pages = "279--291", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806629", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present {\em smooth interpretation}, a method to systematically approximate numerical imperative programs by smooth mathematical functions. This approximation facilitates the use of numerical search techniques like gradient descent for program analysis and synthesis. The method extends to programs the notion of {\em Gaussian smoothing}, a popular signal-processing technique that filters out noise and discontinuities from a signal by taking its convolution with a Gaussian function.\par In our setting, Gaussian smoothing executes a program according to a probabilistic semantics; the execution of program {\em P\/} on an input {\em x\/} after Gaussian smoothing can be summarized as follows: (1) Apply a Gaussian perturbation to {\em x\/} -- the perturbed input is a random variable following a normal distribution with mean {\em x}. (2) Compute and return the {\em expected output\/} of {\em P\/} on this perturbed input. Computing the expectation explicitly would require the execution of {\em P\/} on all possible inputs, but smooth interpretation bypasses this requirement by using a form of symbolic execution to approximate the effect of Gaussian smoothing on {\em P}. The result is an efficient but approximate implementation of Gaussian smoothing of programs.\par Smooth interpretation has the effect of attenuating features of a program that impede numerical searches of its input space -- for example, discontinuities resulting from conditional branches are replaced by continuous transitions. We apply smooth interpretation to the problem of synthesizing values of numerical control parameters in embedded control applications. This problem is naturally formulated as one of numerical optimization: the goal is to find parameter values that minimize the error between the resulting program and a programmer-provided behavioral specification. Solving this problem by directly applying numerical optimization techniques is often impractical due to the discontinuities in the error function. By eliminating these discontinuities, smooth interpretation makes it possible to search the parameter space efficiently by means of simple gradient descent. Our experiments demonstrate the value of this strategy in synthesizing parameters for several challenging programs, including models of an automated gear shift and a PID controller.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "continuity; parameter synthesis; program smoothing", } @Article{Gulwani:2010:RBP, author = "Sumit Gulwani and Florian Zuleger", title = "The reachability-bound problem", journal = j-SIGPLAN, volume = "45", number = "6", pages = "292--304", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806630", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We define the {\em reachability-bound problem\/} to be the problem of finding a symbolic worst-case bound on the number of times a given control location inside a procedure is visited in terms of the inputs to that procedure. This has applications in bounding resources consumed by a program such as time, memory, network-traffic, power, as well as estimating quantitative properties (as opposed to boolean properties) of data in programs, such as information leakage or uncertainty propagation. Our approach to solving the reachability-bound problem brings together two different techniques for reasoning about loops in an effective manner. One of these techniques is an abstract-interpretation based iterative technique for computing precise disjunctive invariants (to summarize nested loops). The other technique is a non-iterative proof-rules based technique (for loop bound computation) that takes over the role of doing inductive reasoning, while deriving its power from the use of SMT solvers to reason about abstract loop-free fragments.\par Our solution to the reachability-bound problem allows us to compute precise symbolic complexity bounds for several loops in {.NET} base-class libraries for which earlier techniques fail. We also illustrate the precision of our algorithm for disjunctive invariant computation (which has a more general applicability beyond the reachability-bound problem) on a set of benchmark examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "disjunctive invariants; pattern matching; ranking functions; resource bound analysis; transitive closure", } @Article{Might:2010:REC, author = "Matthew Might and Yannis Smaragdakis and David {Van Horn}", title = "Resolving and exploiting the $k$-{CFA} paradox: illuminating functional vs. object-oriented program analysis", journal = j-SIGPLAN, volume = "45", number = "6", pages = "305--315", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806631", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Low-level program analysis is a fundamental problem, taking the shape of 'flow analysis' in functional languages and 'points-to' analysis in imperative and object-oriented languages. Despite the similarities, the vocabulary and results in the two communities remain largely distinct, with limited cross-understanding. One of the few links is Shivers's $k$-CFA work, which has advanced the concept of 'context-sensitive analysis' and is widely known in both communities.\par Recent results indicate that the relationship between the functional and object-oriented incarnations of $k$-CFA is not as well understood as thought. Van Horn and Mairson proved $k$-CFA for $k \geq 1$ to be EXPTIME-complete; hence, no polynomial-time algorithm can exist. Yet, there are several polynomial-time formulations of context-sensitive points-to analyses in object-oriented languages. Thus, it seems that functional $k$-CFA may actually be a profoundly different analysis from object-oriented $k$-CFA. We resolve this paradox by showing that the exact same specification of $k$-CFA is polynomial-time for object-oriented languages yet exponential-time for functional ones: objects and closures are subtly different, in a way that interacts crucially with context-sensitivity and complexity. This illumination leads to an immediate payoff: by projecting the object-oriented treatment of objects onto closures, we derive a polynomial-time hierarchy of context-sensitive CFAs for functional programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "control-flow analysis; functional; k-cfa; m-cfa; object-oriented; pointer analysis; static analysis", } @Article{Kuncak:2010:CFS, author = "Viktor Kuncak and Mika{\"e}l Mayer and Ruzica Piskac and Philippe Suter", title = "Complete functional synthesis", journal = j-SIGPLAN, volume = "45", number = "6", pages = "316--329", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806632", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Synthesis of program fragments from specifications can make programs easier to write and easier to reason about. To integrate synthesis into programming languages, synthesis algorithms should behave in a predictable way - they should succeed for a well-defined class of specifications. They should also support unbounded data types such as numbers and data structures. We propose to generalize decision procedures into predictable and complete synthesis procedures. Such procedures are guaranteed to find code that satisfies the specification if such code exists. Moreover, we identify conditions under which synthesis will statically decide whether the solution is guaranteed to exist, and whether it is unique. We demonstrate our approach by starting from decision procedures for linear arithmetic and data structures and transforming them into synthesis procedures. We establish results on the size and the efficiency of the synthesized code. We show that such procedures are useful as a language extension with implicit value definitions, and we show how to extend a compiler to support such definitions. Our constructs provide the benefits of synthesis to programmers, without requiring them to learn new concepts or give up a deterministic execution model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "bapa; decision procedure; Presburger arithmetic; synthesis procedure", } @Article{Burckhardt:2010:LCA, author = "Sebastian Burckhardt and Chris Dern and Madanlal Musuvathi and Roy Tan", title = "{Line-Up}: a complete and automatic linearizability checker", journal = j-SIGPLAN, volume = "45", number = "6", pages = "330--340", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806634", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modular development of concurrent applications requires thread-safe components that behave correctly when called concurrently by multiple client threads. This paper focuses on linearizability, a specific formalization of thread safety, where all operations of a concurrent component appear to take effect instantaneously at some point between their call and return. The key insight of this paper is that if a component is intended to be deterministic, then it is possible to build an automatic linearizability checker by systematically enumerating the sequential behaviors of the component and then checking if each its concurrent behavior is equivalent to some sequential behavior.\par We develop this insight into a tool called Line-Up, the first complete and automatic checker for {\em deterministic linearizability}. It is complete, because any reported violation proves that the implementation is not linearizable with respect to {\em any\/} sequential deterministic specification. It is automatic, requiring no manual abstraction, no manual specification of semantics or commit points, no manually written test suites, no access to source code.\par We evaluate Line-Up by analyzing 13 classes with a total of 90 methods in two versions of the {.NET} Framework 4.0. The violations of deterministic linearizability reported by Line-Up exposed seven errors in the implementation that were fixed by the development team.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "atomicity; linearizability; thread safety", } @Article{Torlak:2010:MCA, author = "Emina Torlak and Mandana Vaziri and Julian Dolby", title = "{MemSAT}: checking axiomatic specifications of memory models", journal = j-SIGPLAN, volume = "45", number = "6", pages = "341--350", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806635", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Memory models are hard to reason about due to their complexity, which stems from the need to strike a balance between ease-of-programming and allowing compiler and hardware optimizations. In this paper, we present an automated tool, MemSAT, that helps in debugging and reasoning about memory models. Given an axiomatic specification of a memory model and a multi-threaded test program containing assertions, MemSAT outputs a trace of the program in which both the assertions and the memory model axioms are satisfied, if one can be found. The tool is fully automatic and is based on a SAT solver. If it cannot find a trace, it outputs a minimal subset of the memory model and program constraints that are unsatisfiable. We used MemSAT to check several existing memory models against their published test cases, including the current Java Memory Model by Manson et al. and a revised version of it by Sevcik and Aspinall. We found subtle discrepancies between what was expected and the actual results of test programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "axiomatic specifications; bounded model checking; memory models; sat", } @Article{Marino:2010:DSE, author = "Daniel Marino and Abhayendra Singh and Todd Millstein and Madanlal Musuvathi and Satish Narayanasamy", title = "{DRFX}: a simple and efficient memory model for concurrent programming languages", journal = j-SIGPLAN, volume = "45", number = "6", pages = "351--362", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806636", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The most intuitive memory model for shared-memory multithreaded programming is {\em sequential consistency\/} (SC), but it disallows the use of many compiler and hardware optimizations thereby impacting performance. Data-race-free (DRF) models, such as the proposed C++0x memory model, guarantee SC execution for datarace-free programs. But these models provide no guarantee at all for racy programs, compromising the safety and debuggability of such programs. To address the safety issue, the Java memory model, which is also based on the DRF model, provides a weak semantics for racy executions. However, this semantics is subtle and complex, making it difficult for programmers to reason about their programs and for compiler writers to ensure the correctness of compiler optimizations.\par We present the DRFx memory model, which is simple for programmers to understand and use while still supporting many common optimizations. We introduce a {\em memory model (MM) exception\/} which can be signaled to halt execution. If a program executes without throwing this exception, then DRFx guarantees that the execution is SC. If a program throws an MM exception during an execution, then DRFx guarantees that the program has a data race. We observe that SC violations can be detected in hardware through a lightweight form of conflict detection. Furthermore, our model safely allows aggressive compiler and hardware optimizations within compiler-designated program regions. We formalize our memory model, prove several properties about this model, describe a compiler and hardware design suitable for DRFx, and evaluate the performance overhead due to our compiler and hardware requirements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data races; memory model exception; memory models; sequential consistency; soft fences", } @Article{Chambers:2010:FEE, author = "Craig Chambers and Ashish Raniwala and Frances Perry and Stephen Adams and Robert R. Henry and Robert Bradshaw and Nathan Weizenbaum", title = "{FlumeJava}: easy, efficient data-parallel pipelines", journal = j-SIGPLAN, volume = "45", number = "6", pages = "363--375", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806638", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "MapReduce and similar systems significantly ease the task of writing data-parallel code. However, many real-world computations require a pipeline of MapReduces, and programming and managing such pipelines can be difficult. We present FlumeJava, a Java library that makes it easy to develop, test, and run efficient data-parallel pipelines. At the core of the FlumeJava library are a couple of classes that represent immutable parallel collections, each supporting a modest number of operations for processing them in parallel. Parallel collections and their operations present a simple, high-level, uniform abstraction over different data representations and execution strategies. To enable parallel operations to run efficiently, FlumeJava defers their evaluation, instead internally constructing an execution plan dataflow graph. When the final results of the parallel operations are eventually needed, FlumeJava first optimizes the execution plan, and then executes the optimized operations on appropriate underlying primitives (e.g., MapReduces). The combination of high-level abstractions for parallel data and computation, deferred evaluation and optimization, and efficient parallel primitives yields an easy-to-use system that approaches the efficiency of hand-optimized pipelines. FlumeJava is in active use by hundreds of pipeline developers within Google.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data-parallel programming; java; mapreduce", } @Article{Pan:2010:CPS, author = "Heidi Pan and Benjamin Hindman and Krste Asanovi{\'c}", title = "Composing parallel software efficiently with {Lithe}", journal = j-SIGPLAN, volume = "45", number = "6", pages = "376--387", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806639", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Applications composed of multiple parallel libraries perform poorly when those libraries interfere with one another by obliviously using the same physical cores, leading to destructive resource oversubscription. This paper presents the design and implementation of {\em Lithe}, a low-level substrate that provides the basic primitives and a standard interface for composing parallel codes efficiently. Lithe can be inserted underneath the runtimes of legacy parallel libraries to provide {\em bolt-on\/} composability without needing to change existing application code. Lithe can also serve as the foundation for building new parallel abstractions and libraries that automatically interoperate with one another.\par In this paper, we show versions of Threading Building Blocks (TBB) and OpenMP perform competitively with their original implementations when ported to Lithe. Furthermore, for two applications composed of multiple parallel libraries, we show that leveraging our substrate outperforms their original, even expertly tuned, implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "composability; cooperative scheduling; hierarchical scheduling; oversubscription; parallelism; resource management; user-level scheduling", } @Article{Zhou:2010:BDC, author = "Jin Zhou and Brian Demsky", title = "{Bamboo}: a data-centric, object-oriented approach to many-core software", journal = j-SIGPLAN, volume = "45", number = "6", pages = "388--399", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806640", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traditional data-oriented programming languages such as dataflow languages and stream languages provide a natural abstraction for parallel programming. In these languages, a developer focuses on the flow of data through the computation and these systems free the developer from the complexities of low-level, thread-oriented concurrency primitives. This simplification comes at a cost --- traditional data-oriented approaches restrict the mutation of state and, in practice, the types of data structures a program can effectively use. Bamboo borrows from work in typestate and software transactions to relax the traditional restrictions of data-oriented programming models to support mutation of arbitrary data structures.\par We have implemented a compiler for Bamboo which generates code for the TILEPro64 many-core processor. We have evaluated this implementation on six benchmarks: Tracking, a feature tracking algorithm from computer vision; KMeans, a K-means clustering algorithm; MonteCarlo, a Monte Carlo simulation; FilterBank, a multi-channel filter bank; Fractal, a Mandelbrot set computation; and Series, a Fourier series computation. We found that our compiler generated implementations that obtained speedups ranging from 26.2x to 61.6x when executed on 62 cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "data-centric languages; many-core programming", } @Article{Westbrook:2010:MJM, author = "Edwin Westbrook and Mathias Ricken and Jun Inoue and Yilong Yao and Tamer Abdelatif and Walid Taha", title = "{Mint}: {Java} multi-stage programming using weak separability", journal = j-SIGPLAN, volume = "45", number = "6", pages = "400--411", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806642", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multi-stage programming (MSP) provides a disciplined approach to run-time code generation. In the purely functional setting, it has been shown how MSP can be used to reduce the overhead of abstractions, allowing clean, maintainable code without paying performance penalties. Unfortunately, MSP is difficult to combine with imperative features, which are prevalent in mainstream languages. The central difficulty is scope extrusion, wherein free variables can inadvertently be moved outside the scopes of their binders. This paper proposes a new approach to combining MSP with imperative features that occupies a 'sweet spot' in the design space in terms of how well useful MSP applications can be expressed and how easy it is for programmers to understand. The key insight is that escapes (or 'anti-quotes') must be weakly separable from the rest of the code, i.e. the computational effects occurring inside an escape that are visible outside the escape are guaranteed to not contain code. To demonstrate the feasibility of this approach, we formalize a type system based on Lightweight Java which we prove sound, and we also provide an implementation, called Mint, to validate both the expressivity of the type system and the effect of staging on the performance of Java programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "java; multi-staged languages; multi-stage programming; type systems", } @Article{Chen:2010:TPC, author = "Juan Chen and Ravi Chugh and Nikhil Swamy", title = "Type-preserving compilation of end-to-end verification of security enforcement", journal = j-SIGPLAN, volume = "45", number = "6", pages = "412--423", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806643", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A number of programming languages use rich type systems to verify security properties of code. Some of these languages are meant for source programming, but programs written in these languages are compiled without explicit security proofs, limiting their utility in settings where proofs are necessary, e.g., proof-carrying authorization. Others languages do include explicit proofs, but these are generally lambda calculi not intended for source programming, that must be further compiled to an executable form. A language suitable for source programming backed by a compiler that enables end-to-end verification is missing.\par In this paper, we present a type-preserving compiler that translates programs written in FINE, a source-level functional language with dependent refinements and affine types, to DCIL, a new extension of the {.NET} Common Intermediate Language. FINE is type checked using an external SMT solver to reduce the proof burden on source programmers. We extract explicit LCF-style proof terms from the solver and carry these proof terms in the compilation to DCIL, thereby removing the solver from the trusted computing base. Explicit proofs enable DCIL to be used in a number of important scenarios, including the verification of mobile code, proof-carrying authorization, and evidence-based auditing. We report on our experience using FINE to build reference monitors for several applications, ranging from a plugin-based email client to a conference management server.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "authorization; bytecode languages; compilers; dependent types; functional programming; information flow; mobile code security; security type systems", } @Article{Tate:2010:IOO, author = "Ross Tate and Juan Chen and Chris Hawblitzel", title = "Inferable object-oriented typed assembly language", journal = j-SIGPLAN, volume = "45", number = "6", pages = "424--435", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806644", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A certifying compiler preserves type information through compilation to assembly language programs, producing typed assembly language (TAL) programs that can be verified for safety independently so that the compiler does not need to be trusted. There are two challenges for adopting certifying compilation in practice. First, requiring every compiler transformation and optimization to preserve types is a large burden on compilers, especially when adopting certifying compilation into existing optimizing non-certifying compilers. Second, type annotations significantly increase the size of assembly language programs.\par This paper proposes an alternative to traditional certifying compilers. It presents iTalX, the first inferable TAL type system that supports existential types, arrays, interfaces, and stacks. We have proved our inference algorithm is complete, meaning if an assembly language program is typeable with iTalX then our algorithm will infer an iTalX typing for that program. Furthermore, our algorithm is guaranteed to terminate even if the assembly language program is untypeable. We demonstrate that it is practical to infer such an expressive TAL by showing a prototype implementation of type inference for code compiled by Bartok, an optimizing C\# compiler. Our prototype implementation infers complete type annotations for 98\% of functions in a suite of realistic C\# benchmarks. The type-inference time is about 8\% of the compilation time. We needed to change only 2.5\% of the compiler code, mostly adding new code for defining types and for writing types to object files. Most transformations are untouched. Type-annotation size is only 17\% of the size of pure code and data, reducing type annotations in our previous certifying compiler [4] by 60\%. The compiler needs to preserve only essential type information such as method signatures, object-layout information, and types for static data and external labels. Even non-certifying compilers have most of this information available.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "certifying compiler; existential quantification; object-oriented compiler; Typed Assembly Language (TAL); type inference", } @Article{Khoo:2010:MTC, author = "Yit Phang Khoo and Bor-Yuh Evan Chang and Jeffrey S. Foster", title = "Mixing type checking and symbolic execution", journal = j-SIGPLAN, volume = "45", number = "6", pages = "436--447", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806645", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static analysis designers must carefully balance precision and efficiency. In our experience, many static analysis tools are built around an elegant, core algorithm, but that algorithm is then extensively tweaked to add just enough precision for the coding idioms seen in practice, without sacrificing too much efficiency. There are several downsides to adding precision in this way: the tool's implementation becomes much more complicated; it can be hard for an end-user to interpret the tool's results; and as software systems vary tremendously in their coding styles, it may require significant algorithmic engineering to enhance a tool to perform well in a particular software domain.\par In this paper, we present Mix, a novel system that mixes type checking and symbolic execution. The key aspect of our approach is that these analyses are applied independently on disjoint parts of the program, in an off-the-shelf manner. At the boundaries between nested type checked and symbolically executed code regions, we use special mix rules to communicate information between the off-the-shelf systems. The resulting mixture is a provably sound analysis that is more precise than type checking alone and more efficient than exclusive symbolic execution. In addition, we also describe a prototype implementation, Mixy, for C. Mixy checks for potential null dereferences by mixing a null/non-null type qualifier inference system with a symbolic executor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "false alarms; mix; mixed off-the-shelf analysis; mix rules; precision; symbolic execution; type checking", } @Article{Chen:2010:EIO, author = "Yang Chen and Yuanjie Huang and Lieven Eeckhout and Grigori Fursin and Liang Peng and Olivier Temam and Chengyong Wu", title = "Evaluating iterative optimization across 1000 datasets", journal = j-SIGPLAN, volume = "45", number = "6", pages = "448--459", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806647", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While iterative optimization has become a popular compiler optimization approach, it is based on a premise which has never been truly evaluated: that it is possible to learn the best compiler optimizations across data sets. Up to now, most iterative optimization studies find the best optimizations through repeated runs on the same data set. Only a handful of studies have attempted to exercise iterative optimization on a few tens of data sets.\par In this paper, we truly put iterative compilation to the test for the first time by evaluating its effectiveness across a large number of data sets. We therefore compose KDataSets, a data set suite with 1000 data sets for 32 programs, which we release to the public. We characterize the diversity of KDataSets, and subsequently use it to evaluate iterative optimization. We demonstrate that it is possible to derive a robust iterative optimization strategy across data sets: for all 32 programs, we find that there exists at least one combination of compiler optimizations that achieves 86\% or more of the best possible speedup across {\em all\/} data sets using Intel's ICC (83\% for GNU's GCC). This optimal combination is program-specific and yields speedups up to 1.71 on ICC and 2.23 on GCC over the highest optimization level (-fast and -O3, respectively). This finding makes the task of optimizing programs across data sets much easier than previously anticipated, and it paves the way for the practical and reliable usage of iterative optimization. Finally, we derive pre-shipping and post-shipping optimization strategies for software vendors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "benchmarking; compiler optimization; iterative optimization", } @Article{Kamruzzaman:2010:SDS, author = "Md Kamruzzaman and Steven Swanson and Dean M. Tullsen", title = "Software data spreading: leveraging distributed caches to improve single thread performance", journal = j-SIGPLAN, volume = "45", number = "6", pages = "460--470", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806648", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Single thread performance remains an important consideration even for multicore, multiprocessor systems. As a result, techniques for improving single thread performance using multiple cores have received considerable attention. This work describes a technique, {\em software data spreading}, that leverages the cache capacity of extra cores and extra sockets rather than their computational resources. Software data spreading is a software-only technique that uses compiler-directed thread migration to aggregate cache capacity across cores and chips and improve performance. This paper describes an automated scheme that applies data spreading to various types of loops. Experiments with a set of SPEC2000, SPEC2006, NAS, and microbenchmark workloads show that data spreading can provide speedup of over 2, averaging 17\% for the SPEC and NAS applications on two systems. In addition, despite using more cores for the same computation, data spreading actually saves power since it reduces access to DRAM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "chip multiprocessors; compilers; single-thread performance", } @Article{Sartor:2010:ZRD, author = "Jennifer B. Sartor and Stephen M. Blackburn and Daniel Frampton and Martin Hirzel and Kathryn S. McKinley", title = "{Z}-rays: divide arrays and conquer speed and flexibility", journal = j-SIGPLAN, volume = "45", number = "6", pages = "471--482", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806649", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Arrays are the ubiquitous organization for indexed data. Throughout programming language evolution, implementations have laid out arrays contiguously in memory. This layout is problematic in space and time. It causes heap fragmentation, garbage collection pauses in proportion to array size, and wasted memory for sparse and over-provisioned arrays. Because of array virtualization in managed languages, an array layout that consists of indirection pointers to fixed-size discontiguous memory blocks can mitigate these problems transparently. This design however incurs significant overhead, but is justified when real-time deadlines and space constraints trump performance.\par This paper proposes {\em z-rays}, a discontiguous array design with flexibility and efficiency. A z-ray has a spine with indirection pointers to fixed-size memory blocks called {\em arraylets}, and uses five optimizations: (1) inlining the first N array bytes into the spine, (2) lazy allocation, (3) zero compression, (4) fast array copy, and (5) arraylet copy-on-write. Whereas discontiguous arrays in prior work improve responsiveness and space efficiency, z-rays combine time efficiency and flexibility. On average, the best z-ray configuration performs within 12.7\% of an unmodified Java Virtual Machine on 19 benchmarks, whereas previous designs have {\em two to three times\/} higher overheads. Furthermore, language implementers can configure z-ray optimizations for various design goals. This combination of performance and flexibility creates a better building block for past and future array optimization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "arraylets; arrays; compression; heap; z-rays", } @Article{Acar:2010:TDT, author = "Umut A. Acar and Guy Blelloch and Ruy Ley-Wild and Kanat Tangwongsan and Duru Turkoglu", title = "Traceable data types for self-adjusting computation", journal = j-SIGPLAN, volume = "45", number = "6", pages = "483--496", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806596.1806650", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Self-adjusting computation provides an evaluation model where computations can respond automatically to modifications to their data by using a mechanism for propagating modifications through the computation. Current approaches to self-adjusting computation guarantee correctness by recording dependencies in a trace at the granularity of individual memory operations. Tracing at the granularity of memory operations, however, has some limitations: it can be asymptotically inefficient (\eg, compared to optimal solutions) because it cannot take advantage of problem-specific structure, it requires keeping a large computation trace (often proportional to the runtime of the program on the current input), and it introduces moderately large constant factors in practice.\par In this paper, we extend dependence-tracing to work at the granularity of the query and update operations of arbitrary (abstract) data types, instead of just reads and writes on memory cells. This can significantly reduce the number of dependencies that need to be kept in the trace and followed during an update. We define an interface for supporting a traceable version of a data type, which reports the earliest query that depends on (is changed by) revising operations back in time, and implement several such structures, including priority queues, queues, dictionaries, and counters. We develop a semantics for tracing, extend an existing self-adjusting language, $\Delta$ML, and its implementation to support traceable data types, and present an experimental evaluation by considering a number of benchmarks. Our experiments show dramatic improvements on space and time, sometimes by as much as two orders of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "self-adjusting computation; traceable data types", } @Article{Chen:2010:TTT, author = "Peter M. Chen", title = "Transistors to toys: teaching systems to freshmen", journal = j-SIGPLAN, volume = "45", number = "7", pages = "1--2", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1735998", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "How should we introduce students to the art of system building, and when are students ready to start designing and building interesting systems? In this talk, I describe an experimental course at the University of Michigan that teaches systems to freshmen by having them conceive of, design, and build the hardware and software of a microprocessor-based educational toy. Students in this course build their own microprocessor on an FPGA using a hardware description language. They then write the complete software stack for their toy in assembly language, including device drivers for numerous I/O devices, a simple file system, a graphical user interface, digital audio processing, and application software. By building a substantial system involving hardware, system software, and application software, students gain an appreciation for the complexity and beauty of building computing systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "education", } @Article{Pohle:2010:CWM, author = "Aaron Pohle and Bj{\"o}rn D{\"o}bel and Michael Roitzsch and Hermann H{\"a}rtig", title = "Capability wrangling made easy: debugging on a microkernel with {{\tt valgrind}}", journal = j-SIGPLAN, volume = "45", number = "7", pages = "3--12", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736001", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Not all operating systems are created equal. Contrasting traditional monolithic kernels, there is a class of systems called microkernels more prevalent in embedded systems like cellphones, chip cards or real-time controllers. These kernels offer an abstraction very different from the classical POSIX interface. The resulting unfamiliarity for programmers complicates development and debugging. Valgrind is a well-known debugging tool that virtualizes execution to perform dynamic binary analysis. However, it assumes to run on a POSIX-like kernel and closely interacts with the system to control execution. In this paper we analyze how to adapt Valgrind to a non-POSIX environment and describe our port to the Fiasco. OC microkernel. Additionally, we analyze bug classes that are indigenous to capability systems and show how Valgrind's flexibility can be leveraged to create custom debugging tools detecting these errors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "capability; l4; microkernel; valgrind", } @Article{Chow:2010:MSR, author = "Jim Chow and Dominic Lucchetti and Tal Garfinkel and Geoffrey Lefebvre and Ryan Gardner and Joshua Mason and Sam Small and Peter M. Chen", title = "Multi-stage replay with {Crosscut}", journal = j-SIGPLAN, volume = "45", number = "7", pages = "13--24", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736002", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deterministic record-replay has many useful applications, ranging from fault tolerance and forensics to reproducing and diagnosing bugs. When choosing a record-replay solution, the system administrator must choose a priori how comprehensively to record the execution and at what abstraction level to record it. Unfortunately, these choices may not match well with how the recording is eventually used. A recording may contain too little information to support the end use of replay, or it may contain more sensitive information than is allowed to be shown to the end user of replay. Similarly, fixing the abstraction level at the time of recording often leads to a semantic mismatch with the end use of replay.\par This paper describes how to remedy these problems by adding customizable replay stages to create special-purpose logs for the end users of replay. Our system, called Crosscut, allows replay logs to be 'sliced' along time and abstraction boundaries. Using this approach, users can create slices that include only the processes, applications, or components of interest, excluding parts that handle sensitive data. Users can also retarget the abstraction level of the replay log to higher-level platforms, such as Perl or Valgrind. Execution can then be augmented with additional analysis code at replay time, without disturbing the replayed components in the slice. Crosscut thus uses replay itself to transform logs into a more efficient, secure, and usable form for replay-based applications.\par Our current Crosscut prototype builds on VMware Workstation's record-replay capabilities, and supports a variety of different replay environments. We show how Crosscut can create slices of only the parts of the computation of interest and thereby avoid leaking sensitive information, and we show how to retarget the abstraction level of the log to enable more convenient use during replay debugging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "design; experimentation; performance; replay; security; virtual machines", } @Article{Huang:2010:OCD, author = "Yijian Huang and Haibo Chen and Binyu Zang", title = "Optimizing crash dump in virtualized environments", journal = j-SIGPLAN, volume = "45", number = "7", pages = "25--36", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736003", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Crash dump, or core dump is the typical way to save memory image on system crash for future offline debugging and analysis. However, for typical server machines with likely abundant memory, the time of core dump can significantly increase the mean time to repair (MTTR) by delaying the reboot-based recovery, while not dumping the failure context for analysis would risk recurring crashes on the same problems.\par In this paper, we propose several optimization techniques for core dump in virtualized environments, in order to shorten the MTTR of consolidated virtual machines during crashes. First, we parallelize the process of crash dump and the process of rebooting the crashed VM, by dynamically reclaiming and allocating memory between the crashed VM and the newly spawned VM. Second, we use the virtual machine management layer to introspect the critical data structures of the crashed VM to filter out the dump of unused memory. Finally, we implement disk I/O rate control between core dump and the newly spawned VM according to user-tuned rate control policy to balance the time of crash dump and quality of services in the recovery VM.\par We have implemented a working prototype, Vicover, that optimizes core dump on system crash of a virtual machine in Xen, to minimize the MTTR of core dump and recovery as a whole. In our experiment on a virtualized TPC-W server, Vicover shortens the downtime caused by crash dump by around 5X.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "core dump; parallel core dump; virtual machines", } @Article{Hunt:2010:LBS, author = "Galen C. Hunt", title = "Looking beyond a singularity", journal = j-SIGPLAN, volume = "45", number = "7", pages = "37--38", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1735999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "How does one build a truly dependable software system? Seven years ago, Microsoft Research started the Singularity project to answer this question. The premise was to start with the best known software development tools and to build a new kind of operating system from the ground up. The operating system was to be both an output artifact and a laboratory for the research. Portions of the code and ideas have been incorporated into three separate Microsoft operating systems so far. I will give a brief overview of Singularity planned and built, then describe what we learned, both positive and negative. I will speculate on OS futures including current research to build an operating system in which every last assembly instruction has been verified for type safety, a system for truly mobile computation, and new tools for automatically restructuring large software systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "sing\#; singularity; software-isolated processes (sips)", } @Article{Titzer:2010:ICR, author = "Ben L. Titzer and Thomas W{\"u}rthinger and Doug Simon and Marcelo Cintra", title = "Improving compiler-runtime separation with {XIR}", journal = j-SIGPLAN, volume = "45", number = "7", pages = "39--50", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736005", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Intense research on virtual machines has highlighted the need for flexible software architectures that allow quick evaluation of new design and implementation techniques. The interface between the compiler and runtime system is a principal factor in the flexibility of both components and is critical to enabling rapid pursuit of new optimizations and features. Although many virtual machines have demonstrated modularity for many components, significant dependencies often remain between the compiler and the runtime system components such as the object model and memory management system. This paper addresses this challenge with a carefully designed strict compiler-runtime interface and the XIR language. Instead of the compiler backend lowering object operations to machine operations using hard-wired runtime-specific logic, XIR allows the runtime system to implement this logic, simultaneously simplifying and separating the backend from runtime-system details. In this paper we describe the design and implementation of this compiler-runtime interface and the XIR language in the C1X dynamic compiler, a port of the HotSpotTM Client compiler. Our results show a significant reduction in backend complexity with XIR and an overall reduction in the compiler-runtime interface complexity while still generating comparable quality code with only minor impact on compilation time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compilers; intermediate representations; java; JIT; lowering; object model; register allocation; runtime interface; software architecture; virtual machines", } @Article{Geoffray:2010:VSM, author = "Nicolas Geoffray and Ga{\"e}l Thomas and Julia Lawall and Gilles Muller and Bertil Folliot", title = "{VMKit}: a substrate for managed runtime environments", journal = j-SIGPLAN, volume = "45", number = "7", pages = "51--62", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736006", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Managed Runtime Environments (MREs), such as the JVM and the CLI, form an attractive environment for program execution, by providing portability and safety, via the use of a bytecode language and automatic memory management, as well as good performance, via just-in-time (JIT) compilation. Nevertheless, developing a fully featured MRE, including e.g. a garbage collector and JIT compiler, is a herculean task. As a result, new languages cannot easily take advantage of the benefits of MREs, and it is difficult to experiment with extensions of existing MRE based languages.\par This paper describes and evaluates VMKit, a first attempt to build a common substrate that eases the development of high-level MREs. We have successfully used VMKit to build two MREs: a Java Virtual Machine and a Common Language Runtime. We provide an extensive study of the lessons learned in developing this infrastructure, and assess the ease of implementing new MREs or MRE extensions and the resulting performance. In particular, it took one of the authors only one month to develop a Common Language Runtime using VMKit. VMKit furthermore has performance comparable to the well-established open-source MREs Cacao, Apache Harmony and Mono, and is 1.2 to 3 times slower than JikesRVM on most of the Dacapo benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "just in time compiler; virtual machine; VMKit", } @Article{Zhang:2010:NSS, author = "Qing Zhang and John McCullough and Justin Ma and Nabil Schear and Michael Vrable and Amin Vahdat and Alex C. Snoeren and Geoffrey M. Voelker and Stefan Savage", title = "{Neon}: system support for derived data management", journal = j-SIGPLAN, volume = "45", number = "7", pages = "63--74", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736008", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern organizations face increasingly complex information management requirements. A combination of commercial needs, legal liability and regulatory imperatives has created a patchwork of mandated policies. Among these, personally identifying customer records must be carefully access-controlled, sensitive files must be encrypted on mobile computers to guard against physical theft, and intellectual property must be protected from both exposure and 'poisoning.' However, enforcing such policies can be quite difficult in practice since users routinely share data over networks and derive new files from these inputs--incidentally laundering any policy restrictions. In this paper, we describe a virtual machine monitor system called Neon that transparently labels derived data using byte-level 'tints' and tracks these labels end to end across commodity applications, operating systems and networks. Our goal with Neon is to explore the viability and utility of transparent information flow tracking within conventional networked systems when used in the manner in which they were intended. We demonstrate that this mechanism allows the enforcement of a variety of data management policies, including data-dependent confinement, mandatory I/O encryption, and intellectual property management.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "difc; memory tainting; qemu; virtualization; xen", } @Article{Ye:2010:EES, author = "Lei Ye and Gen Lu and Sushanth Kumar and Chris Gniady and John H. Hartman", title = "Energy-efficient storage in virtual machine environments", journal = j-SIGPLAN, volume = "45", number = "7", pages = "75--84", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736009", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Current trends in increasing storage capacity and virtualization of resources combined with the need for energy efficiency put a challenging task in front of system designers. Previous studies have suggested many approaches to reduce hard disk energy dissipation in native OS environments; however, those mechanisms do not perform well in virtual machine environments because a virtual machine (VM) and the virtual machine monitor (VMM) that runs it have different semantic contexts. This paper explores the disk I/O activities between VMM and VMs using trace driven simulation to understand the I/O behavior of the VM system. Subsequently, this paper proposes three mechanisms to address the isolation between VMM and VMs, and increase the burstiness of hard disk accesses to increase energy efficiency of a hard disk. Compared to standard shutdown mechanisms, with eight VMs the proposed mechanisms reduce disk spin-ups, increase the disk sleep time, and reduce energy consumption by 14.8\% with only 0.5\% increase in execution time. We implemented the proposed mechanisms in Xen and validated our simulation results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "energy management; storage system; virtual machine", } @Article{Kazempour:2010:AAA, author = "Vahid Kazempour and Ali Kamali and Alexandra Fedorova", title = "{AASH}: an asymmetry-aware scheduler for hypervisors", journal = j-SIGPLAN, volume = "45", number = "7", pages = "85--96", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736011", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Asymmetric multicore processors (AMP) consist of cores exposing the same instruction-set architecture (ISA) but varying in size, frequency, power consumption and performance. AMPs were shown to be more power efficient than conventional symmetric multicore processors, and it is therefore likely that future multicore systems will include cores of different types. AMPs derive their efficiency from core specialization: instruction streams can be assigned to run on the cores best suited to their demands for architectural resources. System efficiency is improved as a result. To perform effective matching of threads to cores, the thread scheduler must be asymmetry-aware; and while asymmetry-aware schedulers for operating systems are a well studied topic, asymmetry-awareness in hypervisors has not been addressed. A hypervisor must be asymmetry-aware to enable proper functioning of asymmetry-aware guest operating systems; otherwise they will be ineffective in virtual environments. Furthermore, a hypervisor must ensure that asymmetric cores are shared among multiple guests in a fair fashion or in accordance with their priorities.\par This work for the first time implements simple changes to the hypervisor scheduler, required to make it asymmetry-aware, and evaluates the benefits and overheads of these asymmetry-aware mechanisms. Our evaluation was performed using an open source hypervisor Xen on a real multicore system where asymmetry was emulated via CPU frequency scaling. We compared the asymmetry-aware hypervisor to default Xen. Our results indicate that asymmetry support can be implemented with low overheads, and resulting performance improvements can be significant, reaching up to 36\% in our experiments. Most performance improvements are derived from the fact that an asymmetry-aware hypervisor ensures that the fast cores do not go idle before slow cores and from the fact that it maps virtual cores to physical cores for asymmetry-aware guests according to the guest's expectations. Other benefits from asymmetry awareness are fairer sharing of computing resources among VMs and more stable execution times.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "asymmetric; heterogeneous; hypervisor; multicore processors; scheduling algorithms; virtual machine monitor", } @Article{Lee:2010:SSR, author = "Min Lee and A. S. Krishnakumar and P. Krishnan and Navjot Singh and Shalini Yajnik", title = "Supporting soft real-time tasks in the {Xen} hypervisor", journal = j-SIGPLAN, volume = "45", number = "7", pages = "97--108", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736012", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Virtualization technology enables server consolidation and has given an impetus to low-cost green data centers. However, current hypervisors do not provide adequate support for real-time applications, and this has limited the adoption of virtualization in some domains. Soft real-time applications, such as media-based ones, are impeded by components of virtualization including low-performance virtualization I/O, increased scheduling latency, and shared-cache contention. The virtual machine scheduler is central to all these issues. The goal in this paper is to adapt the virtual machine scheduler to be more soft-real-time friendly.\par We improve two aspects of the VMM scheduler -- managing scheduling latency as a first-class resource and managing shared caches. We use enterprise IP telephony as an illustrative soft real-time workload and design a scheduler S that incorporates the knowledge of soft real-time applications in {\em all\/} aspects of the scheduler to support responsiveness. For this we first define a {\em laxity\/} value that can be interpreted as the target scheduling latency that the workload desires. The load balancer is also designed to minimize the latency for real-time tasks. For cache management, we take cache-affinity into account for real time tasks and load-balance accordingly to prevent cache thrashing. We measured cache misses and demonstrated that cache management is essential for soft real time tasks. Although our scheduler S employs a different design philosophy, interestingly enough it can be implemented with simple modifications to the Xen hypervisor's credit scheduler. Our experiments demonstrate that the Xen scheduler with our modifications can support soft real-time guests well, without penalizing non-real-time domains.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "enterprise telephony workloads; laxity; server consolidation; virtualization; xen", } @Article{Odaira:2010:ERT, author = "Rei Odaira and Kazunori Ogata and Kiyokuni Kawachiya and Tamiya Onodera and Toshio Nakatani", title = "Efficient runtime tracking of allocation sites in {Java}", journal = j-SIGPLAN, volume = "45", number = "7", pages = "109--120", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736014", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tracking the allocation site of every object at runtime is useful for reliable, optimized Java. To be used in production environments, the tracking must be accurate with minimal speed loss. Previous approaches suffer from performance degradation due to the additional field added to each object or track the allocation sites only probabilistically. We propose two novel approaches to track the allocation sites of every object in Java with only a 1.0\% slow-down on average. Our first approach, the {\em Allocation-Site-as-a-Hash-code (ASH) Tracker}, encodes the allocation site ID of an object into the hash code field of its header by regarding the ID as part of the hash code. ASH Tracker avoids an excessive increase in hash code collisions by dynamically shrinking the bit-length of the ID as more and more objects are allocated at that site. For those Java VMs without the hash code field, our second approach, the {\em Allocation-Site-via-a-Class-pointer (ASC) Tracker}, makes the class pointer field in an object header refer to the allocation site structure of the object, which in turn points to the actual class structure. ASC Tracker mitigates the indirection overhead by constant-class-field duplication and allocation-site equality checks. While a previous approach of adding a 4-byte field caused up to 14.4\% and an average 5\% slowdown, both ASH and ASC Trackers incur at most a 2.0\% and an average 1.0\% loss. We demonstrate the usefulness of our low-overhead trackers by an allocation-site-aware memory leak detector and allocation-site-based pretenuring in generational GC. Our pretenuring achieved on average 1.8\% and up to 11.8\% speedups in SPECjvm2008.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "allocation site; hash code; memory allocation", } @Article{Tatsubori:2010:EJT, author = "Michiaki Tatsubori and Akihiko Tozawa and Toyotaro Suzumura and Scott Trent and Tamiya Onodera", title = "Evaluation of a just-in-time compiler retrofitted for {PHP}", journal = j-SIGPLAN, volume = "45", number = "7", pages = "121--132", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736015", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers who develop Web applications often use dynamic scripting languages such as Perl, PHP, Python, and Ruby. For general purpose scripting language usage, interpreter-based implementations are efficient and popular but the server-side usage for Web application development implies an opportunity to significantly enhance Web server throughput. This paper summarizes a study of the optimization of PHP script processing. We developed a PHP processor, P9, by adapting an existing production-quality just-in-time (JIT) compiler for a Java virtual machine, for which optimization technologies have been well-established, especially for server-side application. This paper describes and contrasts microbenchmarks and SPECweb2005 benchmark results for a well-tuned configuration of a traditional PHP interpreter and our JIT compiler-based implementation, P9. Experimental results with the microbenchmarks show 2.5-9.5x advantage with P9, and the SPECweb2005 measurements show about 20-30\% improvements. These results show that the acceleration of dynamic scripting language processing does matter in a realistic Web application server environment. CPU usage profiling shows our simple JIT compiler introduction reduces the PHP core runtime overhead from 45\% to 13\% for a SPECweb2005 scenario, implying that further improvements of dynamic compilers would provide little additional return unless other major overheads such as heavy memory copy between the language runtime and Web server frontend are reduced.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic scripting languages; just-in-time compiler; php", } @Article{Namjoshi:2010:NOP, author = "Manjiri A. Namjoshi and Prasad A. Kulkarni", title = "Novel online profiling for virtual machines", journal = j-SIGPLAN, volume = "45", number = "7", pages = "133--144", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736016", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Application {\em profiling\/} is a popular technique to improve program performance based on its behavior. {\em Offline\/} profiling, although beneficial for several applications, fails in cases where prior program runs may not be feasible, or if changes in input cause the profile to not match the behavior of the actual program run. Managed languages, like Java and C\\#, provide a unique opportunity to overcome the drawbacks of offline profiling by generating the profile information online during the current program run. Indeed, online profiling is extensively used in current VMs, especially during selective compilation to improve program {\em startup\/} performance, as well as during other feedback-directed optimizations.\par In this paper we illustrate the drawbacks of the current {\em reactive\/} mechanism of online profiling during selective compilation. Current VM profiling mechanisms are slow -- thereby delaying associated transformations, and estimate future behavior based on the program's immediate past -- leading to potential misspeculation that limit the benefits of compilation. We show that these drawbacks produce an average performance loss of over 14.5\% on our set of benchmark programs, over an {\em ideal offline\/} approach that accurately compiles the hot methods early. We then propose and evaluate the potential of a novel strategy to achieve similar performance benefits with an online profiling approach. Our new online profiling strategy uses early determination of loop iteration bounds to predict future method hotness. We explore and present promising results on the potential, feasibility, and other issues involved for the successful implementation of this approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "java; online profiling; virtual machines", } @Article{Guha:2010:DPS, author = "Apala Guha and Kim hazelwood and Mary Lou Soffa", title = "{DBT} path selection for holistic memory efficiency and performance", journal = j-SIGPLAN, volume = "45", number = "7", pages = "145--156", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837854.1736018", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic binary translators(DBTs) provide powerful platforms for building dynamic program monitoring and adaptation tools. DBTs, however, have high memory demands because they cache translated code and auxiliary code to a software code cache and must also maintain data structures to support the code cache. The high memory demands make it difficult for memory-constrained embedded systems to take advantage of DBT-based tools. Previous research on DBT memory management focused on the translated code and auxiliary code only. However, we found that data structures are comparable to the code cache in size. We show that the translated code size, auxiliary code size and the data structure size interact in a complex manner, depending on the path selection (trace selection and link formation) strategy. Therefore, holistic memory efficiency (comprising translated code, auxiliary code and data structures) cannot be improved by focusing on the code cache only. In this paper, we use path selection for improving holistic memory efficiency which in turn impacts performance in memory-constrained environments. Although there has been previous research on path selection, such research only considered performance in memory-unconstrained environments.\par The challenge for holistic memory efficiency is that the path selection strategy results in complex interactions between the memory demand components. Also, individual aspects of path selection and the holistic memory efficiency may impact performance in complex ways. We explore these interactions to motivate path selection targeting holistic memory demand. We enumerate all the aspects involved in a path selection design and evaluate a comprehensive set of approaches for each aspect. Finally, we propose a path selection strategy that reduces memory demands by 20\% and at the same time improves performance by 5-20\% compared to an industrial-strength DBT.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic binary translation; embedded systems; memory management; path selection; virtual machines", } @Article{Kondoh:2010:DBT, author = "Goh Kondoh and Hideaki Komatsu", title = "Dynamic binary translation specialized for embedded systems", journal = j-SIGPLAN, volume = "45", number = "7", pages = "157--166", month = jul, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735997.1736019", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:01 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes the design and implementation of a novel dynamic binary translation technique specialized for embedded systems. Virtual platforms have been widely used to develop embedded software and dynamic binary translation is essential to boost their speed in simulations. However, unlike application simulation, the code generated for systems simulation is still slow because the simulator must replicate all of the functions of the target hardware. Embedded systems, which focus on providing one or a few functions, utilize only a small portion of the processor's features most of the time. For example, they may use a Memory Management Unit (MMU) in a processor to map physical memory to effective addresses, but they may not need paged memory support as in an OS. We can exploit this to specialize the dynamically translated code for more performance.\par We built a specialization framework on top of a functional simulator with a dynamic binary translator. Using the framework, we implemented three specializers for an MMU, bi-endianness, and register banks. Experiments with the EEMBC1.1 benchmark showed that the speed of the specialized code was up to 39\% faster than the unspecialized code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic binary translation; embedded systems; partial evaluation; specialization", } @Article{Barabash:2010:TGC, author = "Katherine Barabash and Erez Petrank", title = "Tracing garbage collection on highly parallel platforms", journal = j-SIGPLAN, volume = "45", number = "8", pages = "1--10", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837855.1806653", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The pervasiveness of multiprocessor and multicore hardware and the rising level of available parallelism are radically changing the computing landscape. Can software deal with tomorrow's potential higher parallelism? In this paper we study this issue from the garbage collection perspective. In particular, we investigate the scalability of parallel heap tracing, which stands at the core of the garbage collection activity. Heap shapes can be sequential in nature, and prevent the collector from scaling the trace. We start by proposing the idealized trace utilization as a scalability measure for evaluating the scalability of a given heap shape. We then examine standard Java benchmarks and evaluate the existence of non-scalable object-graph shapes in their execution. Next, we propose and implement a prototype of garbage collection techniques that attempt to ameliorate the object-graph shape problem. Finally, we measure and report their efficacy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "garbage collection; memory management; parallel garbage collection; runtime systems", } @Article{Siebert:2010:CPR, author = "Fridtjof Siebert", title = "Concurrent, parallel, real-time garbage-collection", journal = j-SIGPLAN, volume = "45", number = "8", pages = "11--20", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837855.1806654", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the current developments in CPU implementations, it becomes obvious that ever more parallel multicore systems will be used even in embedded controllers that require real-time guarantees. When garbage collection is used in these systems, parallel and concurrent garbage collection brings important performance advantages in the average case. In a real-time system, however, guarantees on the GC's performance in the worst case are required.\par This paper explains how the single-CPU real-time GC of the Java implementation JamaicaVM was changed to make it a hard real-time garbage collector that is parallel and concurrent. Parallel means that an arbitrary number of CPUs may perform GC work in parallel, while concurrent means that the GC work can be performed concurrently to the application code without pre-empting the application. In addition, the single units of work that this garbage collector has to perform are very small and uniform and the total amount of GC work is bounded by a function of the heap size, such that it becomes possible for any application that has a bounded amount of reachable memory to run the GC work such that sufficient GC progress can be ensured for the application never to run out of heap space.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "concurrent; garbage collection; java; multicore; parallel; real-time", } @Article{Anderson:2010:OPN, author = "Todd A. Anderson", title = "Optimizations in a private nursery-based garbage collector", journal = j-SIGPLAN, volume = "45", number = "8", pages = "21--30", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837855.1806655", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes a garbage collector designed around the use of permanent, private, thread-local nurseries and is principally oriented towards functional languages. We try to maximize the cache hit rate by having threads continually reuse their individual private nurseries. These private nurseries operate in such a way that they can be garbage collected independently of other threads, which creates low collection pause times. Objects which survive thread-local collections are moved to a mature generation that can be collected either concurrently or in a stop-the-world fashion. We describe several optimizations (including two dynamic control parameter adaptation schemes) related to garbage collecting the private nurseries and to our concurrent collector, some of which are made possible when the language provides mutability information. We tested our collector against six benchmarks and saw single-threaded performance improvements in the range of 5-74\%. We also saw a 10x increase (for 24 processors) in scalability for one parallel benchmark that had previously been memory-bound.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "functional languages; garbage collection", } @Article{Nagarakatte:2010:CCE, author = "Santosh Nagarakatte and Jianzhou Zhao and Milo M. K. Martin and Steve Zdancewic", title = "{CETS}: compiler enforced temporal safety for {C}", journal = j-SIGPLAN, volume = "45", number = "8", pages = "31--40", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837855.1806657", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Temporal memory safety errors, such as dangling pointer dereferences and double frees, are a prevalent source of software bugs in unmanaged languages such as C. Existing schemes that attempt to retrofit temporal safety for such languages have high runtime overheads and/or are incomplete, thereby limiting their effectiveness as debugging aids. This paper presents CETS, a compile-time transformation for detecting all violations of temporal safety in C programs. Inspired by existing approaches, CETS maintains a unique identifier with each object, associates this metadata with the pointers in a disjoint metadata space to retain memory layout compatibility, and checks that the object is still allocated on pointer dereferences. A formal proof shows that this is sufficient to provide temporal safety even in the presence of arbitrary casts if the program contains no spatial safety violations. Our CETS prototype employs both temporal check removal optimizations and traditional compiler optimizations to achieve a runtime overhead of just 48\% on average. When combined with a spatial-checking system, the average overall overhead is 116\% for complete memory safety", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "c; dangling pointers; memory safety; temporal errors", } @Article{Vechev:2010:PPC, author = "Martin Vechev and Eran Yahav and Greta Yorsh", title = "{PHALANX}: parallel checking of expressive heap assertions", journal = j-SIGPLAN, volume = "45", number = "8", pages = "41--50", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837855.1806658", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Unrestricted use of heap pointers makes software systems difficult to understand and to debug. To address this challenge, we developed PHALANX -- a practical framework for dynamically checking expressive heap properties such as ownership, sharing and reachability. PHALANX uses novel parallel algorithms to efficiently check a wide range of heap properties utilizing the available cores.\par PHALANX runtime is implemented on top of IBM's Java production virtual machine. This has enabled us to apply our new techniques to real world software. We checked expressive heap properties in various scenarios and found the runtime support to be valuable for debugging and program understanding. Further, our experimental results on DaCapo and other benchmarks indicate that evaluating heap queries using parallel algorithms can lead to significant performance improvements, often resulting in linear speedups as the number of cores increases.\par To encourage adoption by programmers, we extended an existing JML compiler to translate expressive JML assertions about the heap into their efficient implementation provided by PHALANX. To debug her program, a programmer can annotate it with expressive heap assertions in JML, that are efficiently checked by PHALANX.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "ownership; parallel garbage collector; virtual machine", } @Article{Sewell:2010:MEA, author = "Peter Sewell", title = "Memory, an elusive abstraction", journal = j-SIGPLAN, volume = "45", number = "8", pages = "51--52", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806660", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multiprocessors are now ubiquitous. They provide an abstraction of shared memory, accessible by concurrently executing threads, which supports a wide range of software. However, exactly what this key abstraction is -- what the hardware designers implement, and what programmers can depend on -- is surprisingly elusive. In 1979, when articulating the notion of sequential consistency (SC), Lamport wrote 'For some applications, achieving sequential consistency may not be worth the price of slowing down the processors.' [7], and indeed most major multiprocessor families, including Alpha, ARM, Itanium, Power, Sparc, and x86, do not provide the abstraction of SC memory. Internally, they incorporate a range of sophisticated optimisations which have various programmer-visible effects. For some (such as Sparc) these effects are captured in a well-defined relaxed memory model, making it possible (if challenging) to reason with confidence about the behaviour of concurrent programs. For others, however, it has been very unclear what a reasonable model is, despite extensive research over the last three decades. In this talk, I will reflect on the experience of my colleagues and I in trying to establish usable models for x86 multiprocessors, where it appears that our x86-TSO model suffices for common-case code [1-4], and for Power and ARM multiprocessors, where we have models that capture some but not all aspects of their behaviour [5,6]. The underlying causes of these difficulties are complex, including:\par The programmer-observable relaxed-memory behaviour of a multiprocessor is a whole-system property that arises from the interaction between many complex aspects of the processor implementation: speculative execution, store buffering, cache protocol, and so forth. Programs are executed (and tested) on specific multiprocessor implementations, but processor vendors attempt to document loose specifications to cover a range of possible (past and future) implementations Multiprocessor implementation details are typically confidential and may change radically from one implementation to another Vendor specifications suffer from the tension between the need for loose specification, to preserve freedom for such changes, and the need for tight specification, to give strong properties to properties All too often, loose specification has been achieved by vague specification, using informal prose. When it comes to subtle concurrent properties this is almost inevitably ambiguous; it also makes it impossible (even in principle) to test conformance between a processor implementation and such a specification, let alone to verify such a correspondence or to reason about concurrent programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "relaxed memory models; semantics", } @Article{Petricek:2010:CHG, author = "Tomas Petricek and Don Syme", title = "Collecting {Hollywood}'s garbage: avoiding space-leaks in composite events", journal = j-SIGPLAN, volume = "45", number = "8", pages = "53--62", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837855.1806662", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The reactive programming model is largely different to what we're used to as we don't have full control over the application's control flow. If we mix the declarative and imperative programming style, which is usual in the ML family of languages, the situation is even more complex. It becomes easy to introduce patterns where the usual garbage collector for objects cannot automatically dispose all components that we intuitively consider garbage.\par In this paper we discuss a duality between the definitions of garbage for {\em objects\/} and {\em events}. We combine them into a single one, to specify the notion of garbage for reactive programming model in a mixed functional/imperative language and we present a formal algorithm for collecting garbage in this environment.\par Building on top of the theoretical model, we implement a library for reactive programming that does not cause leaks when used in the mixed declarative/imperative model. The library allows us to safely combine both of the reactive programming patterns. As a result, we can take advantage of the clarity and simplicity of the declarative approach as well as the expressivity of the imperative model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "combinator libraries; duality; event-driven; garbage collection; inversion of control; reactive programming", } @Article{Tian:2010:SPU, author = "Chen Tian and Min Feng and Rajiv Gupta", title = "Speculative parallelization using state separation and multiple value prediction", journal = j-SIGPLAN, volume = "45", number = "8", pages = "63--72", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806663", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the availability of chip multiprocessor (CMP) and simultaneous multithreading (SMT) machines, extracting thread level parallelism from a sequential program has become crucial for improving performance. However, many sequential programs cannot be easily parallelized due to the presence of dependences. To solve this problem, different solutions have been proposed. Some of them make the optimistic assumption that such dependences rarely manifest themselves at runtime. However, when this assumption is violated, the recovery causes very large overhead. Other approaches incur large synchronization or computation overhead when resolving the dependences. Consequently, for a loop with frequently arising cross-iteration dependences, previous techniques are not able to speed up the execution. In this paper we propose a compiler technique which uses state separation and multiple value prediction to speculatively parallelize loops in sequential programs that contain frequently arising cross-iteration dependences. The key idea is to generate multiple versions of a loop iteration based on multiple predictions of values of variables involved in cross-iteration dependences (i.e., live-in variables). These speculative versions and the preceding loop iteration are executed in separate memory states simultaneously. After the execution, if one of these versions is correct (i.e., its predicted values are found to be correct), then we merge its state and the state of the preceding iteration because the dependence between the two iterations is correctly resolved. The memory states of other incorrect versions are completely discarded. Based on this idea, we further propose a runtime adaptive scheme that not only gives a good performance but also achieves better CPU utilization. We conducted experiments on 10 benchmark programs on a real machine. The results show that our technique can achieve 1.7x speedup on average across all used benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "multicore processors; speculative parallelization", } @Article{Ugawa:2010:IRB, author = "Tomoharu Ugawa and Hideya Iwasaki and Taiichi Yuasa", title = "Improved replication-based incremental garbage collection for embedded systems", journal = j-SIGPLAN, volume = "45", number = "8", pages = "73--82", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806664", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We have developed an incremental compacting garbage collector for embedded Java systems. The collector divides the heap into equal sized pages and uses the segregated free lists for fast allocation. Collectors that have such a heap layout have a problem of fragmentation in allocating objects larger than the page size. We solve this problem by using the replication-based incremental compaction. The compactor evacuates all objects in one area, the evacuation area, of the heap, thereby creating a large chunk of free space. We developed an algorithm for choosing the evacuation area that effectively cures fragmentation. The compactor does not use any read-barriers. Instead, it uses a technique similar to the replication-based incremental copying collection. This needs forwarding pointers for all evacuated objects. Rather than introducing an extra field for each object, we use a hash table to store forwarding pointers.\par Evaluation of this garbage collector implemented in Sun's J2ME Java Virtual Machine showed that all the benchmarks used were able to run without memory starvation using the heap sizes of only 151\%-286\% of the maximum amount of live data plus 8 KB of the hash table. Experiments on a desktop computer, though it is not a platform for embedded systems, showed that the maximum pause time was shorter than 200 &\#956;s, which was comparable to that of our implementation of the snapshot-at-the-beginning collector without compaction. On an ARM processor, the runtime overhead was 1\%-16\% with 8.0\% on average compared to the mark-sweep collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "compaction; embedded systems; fragmentation; garbage collection; real-time garbage collection", } @Article{Hellyer:2010:LCW, author = "Laurence Hellyer and Richard Jones and Antony L. Hosking", title = "The locality of concurrent write barriers", journal = j-SIGPLAN, volume = "45", number = "8", pages = "83--92", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806666", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent and incremental collectors require barriers to ensure correct synchronisation between mutator and collector. The overheads imposed by particular barriers on particular systems have been widely studied. Somewhat fewer studies have also compared barriers in terms of their termination properties or the volume of floating garbage they generate. Until now, the consequences for locality of different barrier choices has not been studied, although locality will be of increasing importance for emerging architectures. This paper provides a study of the locality of concurrent write barriers, independent of the processor architecture, virtual machine, compiler or garbage collection algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "garbage collection; java; language implementation; memory management", } @Article{Zhao:2010:EMS, author = "Qin Zhao and Derek Bruening and Saman Amarasinghe", title = "Efficient memory shadowing for 64-bit architectures", journal = j-SIGPLAN, volume = "45", number = "8", pages = "93--102", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806667", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Shadow memory is used by dynamic program analysis tools to store metadata for tracking properties of application memory. The efficiency of mapping between application memory and shadow memory has substantial impact on the overall performance of such analysis tools. However, traditional memory mapping schemes that work well on 32-bit architectures cannot easily port to 64-bit architectures due to the much larger 64-bit address space.\par This paper presents EMS64, an efficient memory shadowing scheme for 64-bit architectures. By taking advantage of application reference locality and unused regions in the 64-bit address space, EMS64 provides a fast and flexible memory mapping scheme without relying on any underlying platform features or requiring any specific shadow memory size. Our experiments show that EMS64 is able to reduce the runtime shadow memory translation overhead to 81\% on average, which almost halves the overhead of the fastest 64-bit shadow memory system we are aware of.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "dynamic optimization; shadow memory", } @Article{Singer:2010:EGC, author = "Jeremy Singer and Richard E. Jones and Gavin Brown and Mikel Luj{\'a}n", title = "The economics of garbage collection", journal = j-SIGPLAN, volume = "45", number = "8", pages = "103--112", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806669", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper argues that economic theory can improve our understanding of memory management. We introduce the {\em allocation curve}, as an analogue of the demand curve from microeconomics. An allocation curve for a program characterises how the amount of garbage collection activity required during its execution varies in relation to the heap size associated with that program. The standard treatment of microeconomic demand curves (shifts and elasticity) can be applied directly and intuitively to our new allocation curves. As an application of this new theory, we show how {\em allocation elasticity\/} can be used to control the heap growth rate for variable sized heaps in Jikes RVM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "allocation curve; elasticity; garbage collection; java; memory management; microeconomics", } @Article{Beg:2010:GTA, author = "Mirza Beg and Peter van Beek", title = "A graph theoretic approach to cache-conscious placement of data for direct mapped caches", journal = j-SIGPLAN, volume = "45", number = "8", pages = "113--120", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806670", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Caches were designed to amortize the cost of memory accesses by moving copies of frequently accessed data closer to the processor. Over the years the increasing gap between processor speed and memory access latency has made the cache a bottleneck for program performance. Enhancing cache performance has been instrumental in speeding up programs. For this reason several hardware and software techniques have been proposed by researchers to optimize the cache for minimizing the number of misses. Among these are compile-time data placement techniques in memory which improve cache performance. For the purpose of this work, we concern ourselves with the problem of laying out data in memory given the sequence of accesses on a finite set of data objects such that cache-misses are minimized. The problem has been shown to be hard to solve optimally even if the sequence of data accesses is known at compile time. In this paper we show that given a direct-mapped cache, its size, and the data access sequence, it is possible to identify the instances where there are no conflict misses. We describe an algorithm that can assign the data to cache for minimal number of misses if there exists a way in which conflict misses can be avoided altogether. We also describe the implementation of a heuristic for assigning data to cache for instances where the size of the cache forces conflict misses. Experiments show that our technique results in a 30\% reduction in the number of cache misses compared to the original assignment.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "cache consciousness; cache optimization; data placement in cache; memory management; offline algorithms", } @Article{Albert:2010:PIM, author = "Elvira Albert and Samir Genaim and Miguel G{\'o}mez-Zamalloa", title = "Parametric inference of memory requirements for garbage collected languages", journal = j-SIGPLAN, volume = "45", number = "8", pages = "121--130", month = aug, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1806651.1806671", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:55:48 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The accurate prediction of program's memory requirements is a critical component in software development. Existing heap space analyses either do not take deallocation into account or adopt specific models of garbage collectors which do not necessarily correspond to the actual memory usage. We present a novel approach to inferring upper bounds on memory requirements of Java-like programs which is {\em parametric\/} on the notion of {\em object lifetime}, i.e., on when objects become collectible. If objects lifetimes are inferred by a reachability analysis, then our analysis infers accurate upper bounds on the memory consumption for a {\em reachability\/} -based garbage collector. Interestingly, if objects lifetimes are inferred by a {\em heap liveness\/} analysis, then we approximate the program minimal memory requirement, i.e., the peak memory usage when using an optimal garbage collector which frees objects as soon as they become dead. The key idea is to integrate information on objects lifetimes into the process of generating the {\em recurrence equations\/} which capture the memory usage at the different program states. If the heap size limit is set to the memory requirement inferred by our analysis, it is ensured that execution will not exceed the memory limit with the only assumption that garbage collection works when the limit is reached. Experiments on Java bytecode programs provide evidence of the feasibility and accuracy of our analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "garbage collection; java bytecode; live heap space analysis; low-level languages; peak memory consumption", } @Article{Gordon:2010:MMO, author = "Michael J. C. Gordon", title = "{ML}: metalanguage or object language?", journal = j-SIGPLAN, volume = "45", number = "9", pages = "1--2", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chapman:2010:GAL, author = "James Chapman and Pierre-{\'E}variste Dagand and Conor McBride and Peter Morris", title = "The gentle art of levitation", journal = j-SIGPLAN, volume = "45", number = "9", pages = "3--14", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863547", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Vytiniotis:2010:FPE, author = "Dimitrios Vytiniotis and Andrew J. Kennedy", title = "Functional pearl: every bit counts", journal = j-SIGPLAN, volume = "45", number = "9", pages = "15--26", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Buisson:2010:RES, author = "J{\'e}r{\'e}my Buisson and Fabien Dagnat", title = "{ReCaml}: execution state as the cornerstone of reconfigurations", journal = j-SIGPLAN, volume = "45", number = "9", pages = "27--38", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863550", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mazurak:2010:LCC, author = "Karl Mazurak and Steve Zdancewic", title = "{Lolliproc}: to concurrency from classical linear logic via {Curry--Howard} and control", journal = j-SIGPLAN, volume = "45", number = "9", pages = "39--50", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863551", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{VanHorn:2010:AAM, author = "David {Van Horn} and Matthew Might", title = "Abstracting abstract machines", journal = j-SIGPLAN, volume = "45", number = "9", pages = "51--62", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863553", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Holdermans:2010:PFA, author = "Stefan Holdermans and Jurriaan Hage", title = "Polyvariant flow analysis with higher-ranked polymorphic types and higher-order effect operators", journal = j-SIGPLAN, volume = "45", number = "9", pages = "63--74", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863554", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Naylor:2010:RR, author = "Matthew Naylor and Colin Runciman", title = "The {Reduceron} reconfigured", journal = j-SIGPLAN, volume = "45", number = "9", pages = "75--86", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863556", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The leading implementations of graph reduction all target conventional processors designed for low-level imperative execution. In this paper, we present a processor specially designed to perform graph-reduction. Our processor the Reduceron is implemented using off-the-shelf reconfigurable hardware. We highlight the low-level parallelism present in sequential graph reduction, and show how parallel memories and dynamic analyses are used in the Reduceron to achieve an average reduction rate of 0.55 function applications per clock-cycle.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Scott:2010:UFP, author = "David Scott and Richard Sharp and Thomas Gazagnaire and Anil Madhavapeddy", title = "Using functional programming within an industrial product group: perspectives and perceptions", journal = j-SIGPLAN, volume = "45", number = "9", pages = "87--92", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bergstrom:2010:LTS, author = "Lars Bergstrom and Mike Rainey and John Reppy and Adam Shaw and Matthew Fluet", title = "Lazy tree splitting", journal = j-SIGPLAN, volume = "45", number = "9", pages = "93--104", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863558", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bierman:2010:SSS, author = "Gavin M. Bierman and Andrew D. Gordon and Catalin Hritcu and David Langworthy", title = "Semantic subtyping with an {SMT} solver", journal = j-SIGPLAN, volume = "45", number = "9", pages = "105--116", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863560", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tobin-Hochstadt:2010:LTU, author = "Sam Tobin-Hochstadt and Matthias Felleisen", title = "Logical types for untyped languages", journal = j-SIGPLAN, volume = "45", number = "9", pages = "117--128", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863561", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Felleisen:2010:TC, author = "Matthias Felleisen", title = "{TeachScheme!}: a checkpoint", journal = j-SIGPLAN, volume = "45", number = "9", pages = "129--130", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Crary:2010:HOR, author = "Karl Crary", title = "Higher-order representation of substructural logics", journal = j-SIGPLAN, volume = "45", number = "9", pages = "131--142", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863565", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dreyer:2010:IHO, author = "Derek Dreyer and Georg Neis and Lars Birkedal", title = "The impact of higher-order state and control effects on local relational reasoning", journal = j-SIGPLAN, volume = "45", number = "9", pages = "143--156", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863566", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Reed:2010:DMT, author = "Jason Reed and Benjamin C. Pierce", title = "Distance makes the types grow stronger: a calculus for differential privacy", journal = j-SIGPLAN, volume = "45", number = "9", pages = "157--168", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863568", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morgenstern:2010:STP, author = "Jamie Morgenstern and Daniel R. Licata", title = "Security-typed programming within dependently typed programming", journal = j-SIGPLAN, volume = "45", number = "9", pages = "169--180", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Voigtlander:2010:CSS, author = "Janis Voigtl{\"a}nder and Zhenjiang Hu and Kazutaka Matsuda and Meng Wang", title = "Combining syntactic and semantic bidirectionalization", journal = j-SIGPLAN, volume = "45", number = "9", pages = "181--192", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863571", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Barbosa:2010:MLA, author = "Davi M. J. Barbosa and Julien Cretin and Nate Foster and Michael Greenberg and Benjamin C. Pierce", title = "Matching lenses: alignment and view update", journal = j-SIGPLAN, volume = "45", number = "9", pages = "193--204", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863572", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hidaka:2010:BGT, author = "Soichiro Hidaka and Zhenjiang Hu and Kazuhiro Inaba and Hiroyuki Kato and Kazutaka Matsuda and Keisuke Nakano", title = "Bidirectionalizing graph transformations", journal = j-SIGPLAN, volume = "45", number = "9", pages = "205--216", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863573", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pouillard:2010:FLP, author = "Nicolas Pouillard and Fran{\c{c}}ois Pottier", title = "A fresh look at programming with names and binders", journal = j-SIGPLAN, volume = "45", number = "9", pages = "217--228", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863575", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Crestani:2010:ERG, author = "Marcus Crestani and Michael Sperber", title = "Experience report: growing programming languages for beginning students", journal = j-SIGPLAN, volume = "45", number = "9", pages = "229--234", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863576", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Culpepper:2010:FM, author = "Ryan Culpepper and Matthias Felleisen", title = "Fortifying macros", journal = j-SIGPLAN, volume = "45", number = "9", pages = "235--246", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863577", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Blelloch:2010:FPA, author = "Guy E. Blelloch", title = "Functional parallel algorithms", journal = j-SIGPLAN, volume = "45", number = "9", pages = "247--248", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863579", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Arnold:2010:SVS, author = "Gilad Arnold and Johannes H{\"o}lzl and Ali Sinan K{\"o}ksal and Rastislav Bod{\'\i}k and Mooly Sagiv", title = "Specifying and verifying sparse matrix codes", journal = j-SIGPLAN, volume = "45", number = "9", pages = "249--260", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863581", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Keller:2010:RSP, author = "Gabriele Keller and Manuel M. T. Chakravarty and Roman Leshchinskiy and Simon Peyton Jones and Ben Lippmeier", title = "Regular, shape-polymorphic, parallel arrays in {Haskell}", journal = j-SIGPLAN, volume = "45", number = "9", pages = "261--272", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863582", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{McCreight:2010:CFC, author = "Andrew McCreight and Tim Chevalier and Andrew Tolmach", title = "A certified framework for compiling and executing garbage-collected languages", journal = j-SIGPLAN, volume = "45", number = "9", pages = "273--284", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863584", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Danielsson:2010:TPC, author = "Nils Anders Danielsson", title = "Total parser combinators", journal = j-SIGPLAN, volume = "45", number = "9", pages = "285--296", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863585", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Brady:2010:SYI, author = "Edwin C. Brady and Kevin Hammond", title = "Scrapping your inefficient engine: using partial evaluation to improve domain-specific language implementation", journal = j-SIGPLAN, volume = "45", number = "9", pages = "297--308", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863587", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mitchell:2010:RS, author = "Neil Mitchell", title = "Rethinking supercompilation", journal = j-SIGPLAN, volume = "45", number = "9", pages = "309--320", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863588", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chargueraud:2010:PVT, author = "Arthur Chargu{\'e}raud", title = "Program verification through characteristic formulae", journal = j-SIGPLAN, volume = "45", number = "9", pages = "321--332", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863590", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Stampoulis:2010:VTC, author = "Antonis Stampoulis and Zhong Shao", title = "{VeriML}: typed computation of logical terms inside a language with effects", journal = j-SIGPLAN, volume = "45", number = "9", pages = "333--344", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863591", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bernardy:2010:PDT, author = "Jean-Philippe Bernardy and Patrik Jansson and Ross Paterson", title = "Parametricity and dependent types", journal = j-SIGPLAN, volume = "45", number = "9", pages = "345--356", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863592", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fischer:2010:PRE, author = "Sebastian Fischer and Frank Huch and Thomas Wilke", title = "A play on regular expressions: functional pearl", journal = j-SIGPLAN, volume = "45", number = "9", pages = "357--368", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863594", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pop:2010:ERH, author = "Iustin Pop", title = "Experience report: {Haskell} as a reagent: results and observations on the use of {Haskell} in a {Python} project", journal = j-SIGPLAN, volume = "45", number = "9", pages = "369--374", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863595", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morris:2010:ICT, author = "J. Garrett Morris and Mark P. Jones", title = "Instance chains: type class programming without overlapping instances", journal = j-SIGPLAN, volume = "45", number = "9", pages = "375--386", month = sep, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932681.1863596", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:43 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Forrest:2010:CES, author = "Stephanie Forrest", title = "The case for evolvable software", journal = j-SIGPLAN, volume = "45", number = "10", pages = "1--1", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pierce:2010:ASF, author = "Benjamin C. Pierce", title = "Art, science, and fear", journal = j-SIGPLAN, volume = "45", number = "10", pages = "2--2", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869540", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Syme:2010:FTS, author = "Don Syme", title = "{F\#}: Taking Succinct, Efficient, Typed Functional Programming into the Mainstream", journal = j-SIGPLAN, volume = "45", number = "10", pages = "3--3", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1921682", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Stanley:2010:AOH, author = "Kenneth O. Stanley", title = "To achieve our highest goals, we must be willing to abandon them", journal = j-SIGPLAN, volume = "45", number = "10", pages = "3--3", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Roberson:2010:EMG, author = "Michael Roberson and Chandrasekhar Boyapati", title = "Efficient modular glass box software model checking", journal = j-SIGPLAN, volume = "45", number = "10", pages = "4--21", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869461", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hanenberg:2010:EAS, author = "Stefan Hanenberg", title = "An experiment about static and dynamic type systems: doubts about the positive impact of static type systems on development time", journal = j-SIGPLAN, volume = "45", number = "10", pages = "22--35", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869462", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Itzhaky:2010:SIS, author = "Shachar Itzhaky and Sumit Gulwani and Neil Immerman and Mooly Sagiv", title = "A simple inductive synthesis methodology and its applications", journal = j-SIGPLAN, volume = "45", number = "10", pages = "36--46", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869463", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mercadal:2010:DSA, author = "Julien Mercadal and Quentin Enard and Charles Consel and Nicolas Loriant", title = "A domain-specific approach to architecturing error handling in pervasive computing", journal = j-SIGPLAN, volume = "45", number = "10", pages = "47--61", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869465", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2010:GFR, author = "Wei Li and Charles Zhang and Songlin Hu", title = "{G-Finder}: routing programming questions closer to the experts", journal = j-SIGPLAN, volume = "45", number = "10", pages = "62--73", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869466", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hoda:2010:AC, author = "Rashina Hoda and Philippe Kruchten and James Noble and Stuart Marshall", title = "Agility in context", journal = j-SIGPLAN, volume = "45", number = "10", pages = "74--88", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869467", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Auerbach:2010:LJC, author = "Joshua Auerbach and David F. Bacon and Perry Cheng and Rodric Rabbah", title = "{Lime}: a {Java}-compatible and synthesizable language for heterogeneous architectures", journal = j-SIGPLAN, volume = "45", number = "10", pages = "89--108", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869469", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kou:2010:OFF, author = "Stephen Kou and Jens Palsberg", title = "From {OO} to {FPGA}: fitting round objects into square hardware?", journal = j-SIGPLAN, volume = "45", number = "10", pages = "109--124", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869470", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tian:2010:ICP, author = "Kai Tian and Yunlian Jiang and Eddy Z. Zhang and Xipeng Shen", title = "An input-centric paradigm for program dynamic optimizations", journal = j-SIGPLAN, volume = "45", number = "10", pages = "125--139", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869471", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wood:2010:CSS, author = "Benjamin P. Wood and Adrian Sampson and Luis Ceze and Dan Grossman", title = "Composable specifications for structured shared-memory communication", journal = j-SIGPLAN, volume = "45", number = "10", pages = "140--159", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869473", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shi:2010:DUW, author = "Yao Shi and Soyeon Park and Zuoning Yin and Shan Lu and Yuanyuan Zhou and Wenguang Chen and Weimin Zheng", title = "Do {I} use the wrong definition?: {DeFuse}: definition-use invariants for detecting concurrency and sequential bugs", journal = j-SIGPLAN, volume = "45", number = "10", pages = "160--174", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869474", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gabel:2010:SSD, author = "Mark Gabel and Junfeng Yang and Yuan Yu and Moises Goldszmidt and Zhendong Su", title = "Scalable and systematic detection of buggy inconsistencies in source code", journal = j-SIGPLAN, volume = "45", number = "10", pages = "175--190", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869475", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ogata:2010:SJN, author = "Kazunori Ogata and Dai Mikurube and Kiyokuni Kawachiya and Scott Trent and Tamiya Onodera", title = "A study of {Java}'s non-{Java} memory", journal = j-SIGPLAN, volume = "45", number = "10", pages = "191--204", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869477", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{McIlroy:2010:HJR, author = "Ross McIlroy and Joe Sventek", title = "{Hera-JVM}: a runtime system for heterogeneous multi-core architectures", journal = j-SIGPLAN, volume = "45", number = "10", pages = "205--222", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869478", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wegiel:2010:CLT, author = "Michal Wegiel and Chandra Krintz", title = "Cross-language, type-safe, and transparent object sharing for co-located managed runtimes", journal = j-SIGPLAN, volume = "45", number = "10", pages = "223--240", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869479", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jin:2010:ISS, author = "Guoliang Jin and Aditya Thakur and Ben Liblit and Shan Lu", title = "Instrumentation and sampling strategies for cooperative concurrency bug isolation", journal = j-SIGPLAN, volume = "45", number = "10", pages = "241--255", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869481", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Reichenbach:2010:WCG, author = "Christoph Reichenbach and Neil Immerman and Yannis Smaragdakis and Edward E. Aftandilian and Samuel Z. Guyer", title = "What can the {GC} compute efficiently?: a language for heap assertions at {GC} time", journal = j-SIGPLAN, volume = "45", number = "10", pages = "256--269", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869482", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Purandare:2010:MOS, author = "Rahul Purandare and Matthew B. Dwyer and Sebastian Elbaum", title = "Monitor optimization via stutter-equivalent loop transformation", journal = j-SIGPLAN, volume = "45", number = "10", pages = "270--285", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869483", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Schaefer:2010:SIR, author = "Max Schaefer and Oege de Moor", title = "Specifying and implementing refactorings", journal = j-SIGPLAN, volume = "45", number = "10", pages = "286--301", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869485", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nguyen:2010:GBA, author = "Hoan Anh Nguyen and Tung Thanh Nguyen and Gary {Wilson, Jr.} and Anh Tuan Nguyen and Miryung Kim and Tien N. Nguyen", title = "A graph-based approach to {API} usage adaptation", journal = j-SIGPLAN, volume = "45", number = "10", pages = "302--321", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869486", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kell:2010:CAA, author = "Stephen Kell", title = "Component adaptation and assembly using interface relations", journal = j-SIGPLAN, volume = "45", number = "10", pages = "322--340", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869487", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Oliveira:2010:TCO, author = "Bruno C. d. S. Oliveira and Adriaan Moors and Martin Odersky", title = "Type classes as objects and implicits", journal = j-SIGPLAN, volume = "45", number = "10", pages = "341--360", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869489", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lerner:2010:SDT, author = "Benjamin S. Lerner and Herman Venter and Dan Grossman", title = "Supporting dynamic, third-party code customizations in {JavaScript} using aspects", journal = j-SIGPLAN, volume = "45", number = "10", pages = "361--376", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869490", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Herzeel:2010:DPR, author = "Charlotte Herzeel and Pascal Costanza", title = "Dynamic parallelization of recursive code: part 1: managing control flow interactions with the continuator", journal = j-SIGPLAN, volume = "45", number = "10", pages = "377--396", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869491", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dillig:2010:SHA, author = "Isil Dillig and Thomas Dillig and Alex Aiken", title = "Symbolic heap abstraction with demand-driven axiomatization of memory invariants", journal = j-SIGPLAN, volume = "45", number = "10", pages = "397--410", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869493", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liang:2010:DEP, author = "Percy Liang and Omer Tripp and Mayur Naik and Mooly Sagiv", title = "A dynamic evaluation of the precision of static heap abstractions", journal = j-SIGPLAN, volume = "45", number = "10", pages = "411--427", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869494", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mendez-Lojo:2010:PIB, author = "Mario M{\'e}ndez-Lojo and Augustine Mathew and Keshav Pingali", title = "Parallel inclusion-based points-to analysis", journal = j-SIGPLAN, volume = "45", number = "10", pages = "428--443", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869495", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kats:2010:SLW, author = "Lennart C. L. Kats and Eelco Visser", title = "The {Spoofax} language workbench: rules for declarative specification of languages and {IDEs}", journal = j-SIGPLAN, volume = "45", number = "10", pages = "444--463", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869497", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Servetto:2010:MMC, author = "Marco Servetto and Elena Zucca", title = "{MetaFJig}: a meta-circular composition language for {Java}-like classes", journal = j-SIGPLAN, volume = "45", number = "10", pages = "464--483", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869498", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Klose:2010:MLM, author = "Karl Klose and Klaus Ostermann", title = "Modular logic metaprogramming", journal = j-SIGPLAN, volume = "45", number = "10", pages = "484--503", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869499", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{vanStaden:2010:RAM, author = "Stephan van Staden and Cristiano Calcagno", title = "Reasoning about multiple related abstractions with {MultiStar}", journal = j-SIGPLAN, volume = "45", number = "10", pages = "504--519", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869501", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Qi:2010:HFS, author = "Xin Qi and Andrew C. Myers", title = "Homogeneous family sharing", journal = j-SIGPLAN, volume = "45", number = "10", pages = "520--538", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chiba:2010:MMC, author = "Shigeru Chiba and Atsushi Igarashi and Salikh Zakirov", title = "Mostly modular compilation of crosscutting concerns by contextual predicate dispatch", journal = j-SIGPLAN, volume = "45", number = "10", pages = "539--554", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869503", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Klein:2010:RTH, author = "Casey Klein and Matthew Flatt and Robert Bruce Findler", title = "Random testing for higher-order, stateful programs", journal = j-SIGPLAN, volume = "45", number = "10", pages = "555--566", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869505", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{McCarthy:2010:TSS, author = "Jay A. McCarthy", title = "The two-state solution: native and serializable continuations accord", journal = j-SIGPLAN, volume = "45", number = "10", pages = "567--582", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869506", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Swaine:2010:BFI, author = "James Swaine and Kevin Tew and Peter Dinda and Robert Bruce Findler and Matthew Flatt", title = "Back to the futures: incremental parallelization of existing sequential runtime systems", journal = j-SIGPLAN, volume = "45", number = "10", pages = "583--597", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869507", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zibin:2010:OIG, author = "Yoav Zibin and Alex Potanin and Paley Li and Mahmood Ali and Michael D. Ernst", title = "Ownership and immutability in generic {Java}", journal = j-SIGPLAN, volume = "45", number = "10", pages = "598--617", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cameron:2010:TO, author = "Nicholas Cameron and James Noble and Tobias Wrigstad", title = "Tribal ownership", journal = j-SIGPLAN, volume = "45", number = "10", pages = "618--633", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Matsakis:2010:TAT, author = "Nicholas D. Matsakis and Thomas R. Gross", title = "A time-aware type system for data-race protection and guaranteed initialization", journal = j-SIGPLAN, volume = "45", number = "10", pages = "634--651", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Upadhyaya:2010:AAR, author = "Gautam Upadhyaya and Samuel P. Midkiff and Vijay S. Pai", title = "Automatic atomic region identification in shared memory {SPMD} programs", journal = j-SIGPLAN, volume = "45", number = "10", pages = "652--670", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869513", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kulkarni:2010:TTP, author = "Aditya Kulkarni and Yu David Liu and Scott F. Smith", title = "Task types for pervasive atomicity", journal = j-SIGPLAN, volume = "45", number = "10", pages = "671--690", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Burckhardt:2010:CPR, author = "Sebastian Burckhardt and Alexandro Baldassin and Daan Leijen", title = "Concurrent programming with revisions and isolation types", journal = j-SIGPLAN, volume = "45", number = "10", pages = "691--707", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869515", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bebenita:2010:STB, author = "Michael Bebenita and Florian Brandner and Manuel Fahndrich and Francesco Logozzo and Wolfram Schulte and Nikolai Tillmann and Herman Venter", title = "{SPUR}: a trace-based {JIT} compiler for {CIL}", journal = j-SIGPLAN, volume = "45", number = "10", pages = "708--725", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kapur:2010:RRL, author = "Puneet Kapur and Brad Cossette and Robert J. Walker", title = "Refactoring references for library migration", journal = j-SIGPLAN, volume = "45", number = "10", pages = "726--738", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Altman:2010:PAI, author = "Erik Altman and Matthew Arnold and Stephen Fink and Nick Mitchell", title = "Performance analysis of idle programs", journal = j-SIGPLAN, volume = "45", number = "10", pages = "739--753", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Davis:2010:RBL, author = "Samuel Davis and Gregor Kiczales", title = "Registration-based language abstractions", journal = j-SIGPLAN, volume = "45", number = "10", pages = "754--773", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Verwaest:2010:PBR, author = "Toon Verwaest and Camillo Bruni and David Gurtner and Adrian Lienhard and Oscar Niestrasz", title = "{Pinocchio}: bringing reflection to life with first-class interpreters", journal = j-SIGPLAN, volume = "45", number = "10", pages = "774--789", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rajan:2010:CMD, author = "Hridesh Rajan and Steven M. Kautz and Wayne Rowcliffe", title = "Concurrency by modularity: design patterns, a case in point", journal = j-SIGPLAN, volume = "45", number = "10", pages = "790--805", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rinard:2010:PSA, author = "Martin Rinard and Henry Hoffmann and Sasa Misailovic and Stelios Sidiroglou", title = "Patterns and statistical analysis for understanding reduced resource computing", journal = j-SIGPLAN, volume = "45", number = "10", pages = "806--821", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sorensen:2010:PTC, author = "Andrew Sorensen and Henry Gardner", title = "Programming with time: cyber-physical programming with {impromptu}", journal = j-SIGPLAN, volume = "45", number = "10", pages = "822--834", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chafi:2010:LVH, author = "Hassan Chafi and Zach DeVito and Adriaan Moors and Tiark Rompf and Arvind K. Sujeeth and Pat Hanrahan and Martin Odersky and Kunle Olukotun", title = "Language virtualization for heterogeneous parallel computing", journal = j-SIGPLAN, volume = "45", number = "10", pages = "835--847", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869527", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ossher:2010:FMT, author = "Harold Ossher and Rachel Bellamy and Ian Simmonds and David Amid and Ateret Anaby-Tavor and Matthew Callery and Michael Desmond and Jacqueline de Vries and Amit Fisher and Sophia Krasikov", title = "Flexible modeling tools for pre-requirements analysis: conceptual architecture and research challenges", journal = j-SIGPLAN, volume = "45", number = "10", pages = "848--864", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dumitras:2010:UUI, author = "Tudor Dumitras and Priya Narasimhan and Eli Tilevich", title = "To upgrade or not to upgrade: impact of online upgrades across multiple administrative domains", journal = j-SIGPLAN, volume = "45", number = "10", pages = "865--876", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869530", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Arnold:2010:MAP, author = "Kenneth C. Arnold and Henry Lieberman", title = "Managing ambiguity in programming by finding unambiguous examples", journal = j-SIGPLAN, volume = "45", number = "10", pages = "877--884", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gabriel:2010:BST, author = "Richard P. Gabriel and Kevin J. Sullivan", title = "Better science through art", journal = j-SIGPLAN, volume = "45", number = "10", pages = "885--900", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Quillien:2010:RDN, author = "Jenny Quillien and Dave West", title = "Rubber ducks, nightmares, and unsaturated predicates: proto-scientific schemata are good for agile", journal = j-SIGPLAN, volume = "45", number = "10", pages = "901--917", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kats:2010:PDS, author = "Lennart C. L. Kats and Eelco Visser and Guido Wachsmuth", title = "Pure and declarative syntax definition: paradise lost and regained", journal = j-SIGPLAN, volume = "45", number = "10", pages = "918--932", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hanenberg:2010:FHL, author = "Stefan Hanenberg", title = "Faith, hope, and love: an essay on software science's neglect of human factors", journal = j-SIGPLAN, volume = "45", number = "10", pages = "933--946", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Adamczyk:2010:TBD, author = "Paul Adamczyk and Munawar Hafiz", title = "The {Tower of Babel} did not fail", journal = j-SIGPLAN, volume = "45", number = "10", pages = "947--957", month = oct, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1932682.1869537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:13:46 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rendel:2010:ISD, author = "Tillmann Rendel and Klaus Ostermann", title = "Invertible syntax descriptions: unifying parsing and pretty printing", journal = j-SIGPLAN, volume = "45", number = "11", pages = "1--12", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Straka:2010:PHC, author = "Milan Straka", title = "The performance of the {Haskell} containers package", journal = j-SIGPLAN, volume = "45", number = "11", pages = "13--24", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Pirog:2010:SDS, author = "Maciej Pirog and Dariusz Biernacki", title = "A systematic derivation of the {STG} machine verified in {Coq}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "25--36", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863528", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Magalhaes:2010:GDM, author = "Jos{\'e} Pedro Magalh{\~a}es and Atze Dijkstra and Johan Jeuring and Andres L{\"o}h", title = "A generic deriving mechanism for {Haskell}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "37--48", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{vanGroningen:2010:ESB, author = "John van Groningen and Thomas van Noort and Peter Achten and Pieter Koopman and Rinus Plasmeijer", title = "Exchanging sources between {Clean} and {Haskell}: a double-edged front end for the {Clean} compiler", journal = j-SIGPLAN, volume = "45", number = "11", pages = "49--60", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863530", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The functional programming languages Clean and Haskell have been around for over two decades. Over time, both languages have developed a large body of useful libraries and come with interesting language features. It is our primary goal to benefit from each other's evolutionary results by facilitating the exchange of sources between Clean and Haskell and study the forthcoming interactions between their distinct languages features. This is achieved by using the existing Clean compiler as starting point, and implementing a double-edged front end for this compiler: it supports both standard Clean 2.1 and (currently a large part of) standard Haskell 98. Moreover, it allows both languages to seamlessly use many of each other's language features that were alien to each other before. For instance, Haskell can now use uniqueness typing anywhere, and Clean can use newtypes efficiently. This has given birth to two new dialects of Clean and Haskell, dubbed Clean* and Haskell*. Additionally, measurements of the performance of the new compiler indicate that it is on par with the flagship Haskell compiler GHC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Morris:2010:ERU, author = "J. Garrett Morris", title = "Experience report: using hackage to inform language design", journal = j-SIGPLAN, volume = "45", number = "11", pages = "61--66", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Mainland:2010:NEC, author = "Geoffrey Mainland and Greg Morrisett", title = "{Nikola}: embedding compiled {GPU} functions in {Haskell}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "67--78", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Launchbury:2010:COH, author = "John Launchbury and Trevor Elliott", title = "Concurrent orchestration in {Haskell}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "79--90", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Marlow:2010:SNM, author = "Simon Marlow and Patrick Maier and Hans-Wolfgang Loidl and Mustafa K. Aswad and Phil Trinder", title = "Seq no more: better strategies for parallel {Haskell}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "91--102", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{OSullivan:2010:SEH, author = "Bryan O'Sullivan and Johan Tibell", title = "Scalable {I/O} event handling for {GHC}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "103--108", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Terei:2010:LBG, author = "David A. Terei and Manuel M. T. Chakravarty", title = "An {{\tt llvm}} backend for {GHC}", journal = j-SIGPLAN, volume = "45", number = "11", pages = "109--120", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863538", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Ramsey:2010:HMR, author = "Norman Ramsey and Jo{\~a}o Dias and Simon Peyton Jones", title = "{Hoopl}: a modular, reusable library for dataflow analysis and transformation", journal = j-SIGPLAN, volume = "45", number = "11", pages = "121--134", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Bolingbroke:2010:SE, author = "Maximilian Bolingbroke and Simon Peyton Jones", title = "Supercompilation by evaluation", journal = j-SIGPLAN, volume = "45", number = "11", pages = "135--146", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863540", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Yorgey:2010:SFT, author = "Brent A. Yorgey", title = "Species and functors and types, oh my!", journal = j-SIGPLAN, volume = "45", number = "11", pages = "147--158", month = nov, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2088456.1863542", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:45 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "HASKELL '10 conference proceedings.", } @Article{Brunthaler:2010:EIU, author = "Stefan Brunthaler", title = "Efficient interpretation using quickening", journal = j-SIGPLAN, volume = "45", number = "12", pages = "1--14", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869633", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Just-in-time compilers offer the biggest achievable payoff performance-wise, but their implementation is a non-trivial, time-consuming task affecting the interpreter's maintenance for years to come, too. Recent research addresses this issue by providing ways of leveraging existing just-in-time compilation infrastructures. Though there has been considerable research on improving the efficiency of just-in-time compilers, the area of optimizing interpreters has gotten less attention as if the implementation of a dynamic translation system was the ``ultima ratio'' for efficiently interpreting programming languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zakirov:2010:ODD, author = "Salikh S. Zakirov and Shigeru Chiba and Etsuya Shibayama", title = "Optimizing dynamic dispatch with fine-grained state tracking", journal = j-SIGPLAN, volume = "45", number = "12", pages = "15--26", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869634", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic mixin is a construct available in Ruby and other dynamic languages. It can be used as a base to implement a range of programming paradigms, such as dynamic aspect-oriented programming and context-oriented programming. However, the performance characteristics of current implementation of dynamic mixin in Ruby leaves much to be desired under condition of frequent dynamic mixin operations, global method cache and inline cache misses incur significant overhead. In this work we implemented fine-grained state tracking for CRuby 1. and were able to improve performance by more than six times on the microbenchmark exercising extreme case flowing 4 times to global method cache clearing, 28\% to fine-grained state tracking and further 12\% to inline cache miss elimination by caching alternating states.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gorbovitski:2010:AAO, author = "Michael Gorbovitski and Yanhong A. Liu and Scott D. Stoller and Tom Rothamel and Tuncay K. Tekle", title = "Alias analysis for optimization of dynamic languages", journal = j-SIGPLAN, volume = "45", number = "12", pages = "27--42", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869635", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic languages such as Python allow programs to be written more easily using high-level constructs such as comprehensions for queries and using generic code. Efficient execution of programs then requires powerful optimizations - incrementalization of expensive queries and specialization of generic code. Effective incrementalization and specialization of dynamic languages require precise and scalable alias analysis. This paper describes the development and experimental evaluation of a may-alias analysis for a full dynamic object-oriented language, for program optimization by incrementalization and specialization. The analysis is flow-sensitive; we show that this is necessary for effective optimization of dynamic languages. It uses precise type analysis and a powerful form of context sensitivity, called trace sensitivity, to further improve analysis precision.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pestov:2010:FDS, author = "Sviatoslav Pestov and Daniel Ehrenberg and Joe Groff", title = "{Factor}: a dynamic stack-based programming language", journal = j-SIGPLAN, volume = "45", number = "12", pages = "43--58", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869637", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Factor is a new dynamic object-oriented programming language. It began as an embedded scripting language and evolved to a mature application development language. The language has a simple execution model and is based on the manipulation of data on a stack. An advanced metaprogramming system provides means for easily extending the language. Thus, Factor allows programmers to use the right features for their problem domain. The Factor implementation is self-hosting, featuring an interactive development environment and an optimizing compiler. In this paper, the language and its implementation are presented.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{VanCutsem:2010:PDP, author = "Tom {Van Cutsem} and Mark S. Miller", title = "Proxies: design principles for robust object-oriented intercession {APIs}", journal = j-SIGPLAN, volume = "45", number = "12", pages = "59--72", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869638", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Proxies are a powerful approach to implement meta-objects in object-oriented languages without having to resort to metacircular interpretation. We introduce such a meta-level API based on proxies for Javascript. We simultaneously introduce a set of design principles that characterize such APIs in general, and compare similar APIs of other languages in terms of these principles. We highlight how principled proxy-based APIs improve code robustness by avoiding interference between base and meta-level code that occur in more common reflective intercession mechanisms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tratt:2010:EIL, author = "Laurence Tratt", title = "Experiences with an {Icon3}-like expression evaluation system", journal = j-SIGPLAN, volume = "45", number = "12", pages = "73--80", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869640", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The design of the Icon programming language's expression evaluation system, which can perform limited backtracking, was unique amongst imperative programming languages when created. In this paper I explain and critique the original Icon design and show how a similar system can be integrated into a modern dynamically typed language. Finally I detail my experiences of this system and offer suggestions for the lessons to be learned from it.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Axelsen:2010:CDM, author = "Eyvind W. Axelsen and Stein Krogdahl and Birger M{\o}ller-Pedersen", title = "Controlling dynamic module composition through an extensible meta-level {API}", journal = j-SIGPLAN, volume = "45", number = "12", pages = "81--96", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869641", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In addition to traditional object-oriented (OO) concepts such as inheritance and polymorphism, several modularization and composition mechanisms like e.g. traits, mixins and virtual classes have emerged. The Package Template mechanism is another attempt at providing a flexible mechanism for modularization, composition and adaption. Dynamic languages have traditionally employed strong support for meta-programming, with hooks to control OO concepts such as method invocation and object construction, by utilizing meta-classes and meta-object protocols. In this work, we attempt to bring a corresponding degree of meta-level control to composition primitives, with a concrete starting point in the package template mechanism as developed for the dynamic language Groovy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Strickland:2010:CFC, author = "T. Stephen Strickland and Matthias Felleisen", title = "Contracts for first-class classes", journal = j-SIGPLAN, volume = "45", number = "12", pages = "97--112", month = dec, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1899661.1869642", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Dec 15 10:25:15 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "First-class classes add expressive power to class-based object-oriented languages. Most importantly, programmers can abstract over common scenarios with first-class classes. When it comes to behavioral software contracts, however, first-class classes pose significant challenges. In this paper, we present the first contract system for a programming language with first-class classes. The design has been implemented for Racket, which supports first-class classes and which implements mixins and traits as syntactic sugar. We expect that our experience also applies to languages with native mixins and/or traits.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Leroy:2011:VSD, author = "Xavier Leroy", title = "Verified squared: does critical software deserve verified tools?", journal = j-SIGPLAN, volume = "46", number = "1", pages = "1--2", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lhotak:2011:PAE, author = "Ondrej Lhot{\'a}k and Kwok-Chiang Andrew Chung", title = "Points-to analysis with efficient strong updates", journal = j-SIGPLAN, volume = "46", number = "1", pages = "3--16", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926389", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Smaragdakis:2011:PYC, author = "Yannis Smaragdakis and Martin Bravenboer and Ondrej Lhot{\'a}k", title = "Pick your contexts well: understanding object-sensitivity", journal = j-SIGPLAN, volume = "46", number = "1", pages = "17--30", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926390", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liang:2011:LMA, author = "Percy Liang and Omer Tripp and Mayur Naik", title = "Learning minimal abstractions", journal = j-SIGPLAN, volume = "46", number = "1", pages = "31--42", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926391", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sevcik:2011:RMC, author = "Jaroslav {\v{S}}ev{\c{c}}ik and Viktor Vafeiadis and Francesco Zappa Nardelli and Suresh Jagannathan and Peter Sewell", title = "Relaxed-memory concurrency and verified compilation", journal = j-SIGPLAN, volume = "46", number = "1", pages = "43--54", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926393", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Batty:2011:MCC, author = "Mark Batty and Scott Owens and Susmit Sarkar and Peter Sewell and Tjark Weber", title = "Mathematizing {C++} concurrency", journal = j-SIGPLAN, volume = "46", number = "1", pages = "55--66", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926394", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ramananandro:2011:FVO, author = "Tahina Ramananandro and Gabriel {Dos Reis} and Xavier Leroy", title = "Formal verification of object layout for {C++} multiple inheritance", journal = j-SIGPLAN, volume = "46", number = "1", pages = "67--80", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926395", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Choi:2011:SAM, author = "Wontae Choi and Baris Aktemur and Kwangkeun Yi and Makoto Tatsuta", title = "Static analysis of multi-staged programs via unstaging translation", journal = j-SIGPLAN, volume = "46", number = "1", pages = "81--92", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926397", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Schwarz:2011:SAI, author = "Martin D. Schwarz and Helmut Seidl and Vesal Vojdani and Peter Lammich and Markus M{\"u}ller-Olm", title = "Static analysis of interrupt-driven programs synchronized via the priority ceiling protocol", journal = j-SIGPLAN, volume = "46", number = "1", pages = "93--104", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926398", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cousot:2011:PSF, author = "Patrick Cousot and Radhia Cousot and Francesco Logozzo", title = "A parametric segmentation functor for fully automatic and scalable array content analysis", journal = j-SIGPLAN, volume = "46", number = "1", pages = "105--118", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926399", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Birkedal:2011:SIK, author = "Lars Birkedal and Bernhard Reus and Jan Schwinghammer and Kristian St{\o}vring and Jacob Thamsborg and Hongseok Yang", title = "Step-indexed {Kripke} models over recursive worlds", journal = j-SIGPLAN, volume = "46", number = "1", pages = "119--132", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926401", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hur:2011:KLR, author = "Chung-Kil Hur and Derek Dreyer", title = "A {Kripke} logical relation between {ML} and assembly", journal = j-SIGPLAN, volume = "46", number = "1", pages = "133--146", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926402", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pottier:2011:TSP, author = "Fran{\c{c}}ois Pottier", title = "A typed store-passing translation for general references", journal = j-SIGPLAN, volume = "46", number = "1", pages = "147--158", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926403", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Prountzos:2011:SAO, author = "Dimitrios Prountzos and Roman Manevich and Keshav Pingali and Kathryn S. McKinley", title = "A shape analysis for optimizing parallel graph programs", journal = j-SIGPLAN, volume = "46", number = "1", pages = "159--172", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926405", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rival:2011:CCA, author = "Xavier Rival and Bor-Yuh Evan Chang", title = "Calling context abstraction with shapes", journal = j-SIGPLAN, volume = "46", number = "1", pages = "173--186", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926406", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dillig:2011:PRP, author = "Isil Dillig and Thomas Dillig and Alex Aiken", title = "Precise reasoning for programs using containers", journal = j-SIGPLAN, volume = "46", number = "1", pages = "187--200", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926407", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ahmed:2011:BA, author = "Amal Ahmed and Robert Bruce Findler and Jeremy G. Siek and Philip Wadler", title = "Blame for all", journal = j-SIGPLAN, volume = "46", number = "1", pages = "201--214", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dimoulas:2011:CBC, author = "Christos Dimoulas and Robert Bruce Findler and Cormac Flanagan and Matthias Felleisen", title = "Correct blame for contracts: no more scapegoating", journal = j-SIGPLAN, volume = "46", number = "1", pages = "215--226", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926410", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Weirich:2011:GTA, author = "Stephanie Weirich and Dimitrios Vytiniotis and Simon Peyton Jones and Steve Zdancewic", title = "Generative type abstraction and type-level computation", journal = j-SIGPLAN, volume = "46", number = "1", pages = "227--240", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926411", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{MacLaurin:2011:DKT, author = "Matthew B. MacLaurin", title = "The design of {Kodu}: a tiny visual programming language for children on the {Xbox 360}", journal = j-SIGPLAN, volume = "46", number = "1", pages = "241--246", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926413", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Turon:2011:SLR, author = "Aaron Joseph Turon and Mitchell Wand", title = "A separation logic for refining concurrent objects", journal = j-SIGPLAN, volume = "46", number = "1", pages = "247--258", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926415", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dodds:2011:MRD, author = "Mike Dodds and Suresh Jagannathan and Matthew J. Parkinson", title = "Modular reasoning for deterministic parallelism", journal = j-SIGPLAN, volume = "46", number = "1", pages = "259--270", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926416", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jacobs:2011:EMF, author = "Bart Jacobs and Frank Piessens", title = "Expressive modular fine-grained concurrency specification", journal = j-SIGPLAN, volume = "46", number = "1", pages = "271--282", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926417", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Madhusudan:2011:TWA, author = "P. Madhusudan and Gennaro Parlato", title = "The tree width of auxiliary storage", journal = j-SIGPLAN, volume = "46", number = "1", pages = "283--294", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926419", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tzevelekos:2011:FRA, author = "Nikos Tzevelekos", title = "Fresh-register automata", journal = j-SIGPLAN, volume = "46", number = "1", pages = "295--306", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926420", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Leroux:2011:VAS, author = "J{\'e}r{\^o}me Leroux", title = "Vector addition system reachability problem: a short self-contained proof", journal = j-SIGPLAN, volume = "46", number = "1", pages = "307--316", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926421", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gulwani:2011:ASP, author = "Sumit Gulwani", title = "Automating string processing in spreadsheets using input-output examples", journal = j-SIGPLAN, volume = "46", number = "1", pages = "317--330", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926423", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gupta:2011:PAR, author = "Ashutosh Gupta and Corneliu Popeea and Andrey Rybalchenko", title = "Predicate abstraction and refinement for verifying multi-threaded programs", journal = j-SIGPLAN, volume = "46", number = "1", pages = "331--344", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926424", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ghica:2011:GSIa, author = "Dan R. Ghica and Alex Smith", title = "Geometry of synthesis {III}: resource management through type inference", journal = j-SIGPLAN, volume = "46", number = "1", pages = "345--356", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926425", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hoffmann:2011:MAR, author = "Jan Hoffmann and Klaus Aehlig and Martin Hofmann", title = "Multivariate amortized resource analysis", journal = j-SIGPLAN, volume = "46", number = "1", pages = "357--370", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926427", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hofmann:2011:SL, author = "Martin Hofmann and Benjamin Pierce and Daniel Wagner", title = "Symmetric lenses", journal = j-SIGPLAN, volume = "46", number = "1", pages = "371--384", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926428", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Henglein:2011:REC, author = "Fritz Henglein and Lasse Nielsen", title = "Regular expression containment: coinductive axiomatization and computational interpretation", journal = j-SIGPLAN, volume = "46", number = "1", pages = "385--398", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926429", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cook:2011:MPD, author = "Byron Cook and Eric Koskinen", title = "Making prophecies with decision predicates", journal = j-SIGPLAN, volume = "46", number = "1", pages = "399--410", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926431", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Emmi:2011:DBS, author = "Michael Emmi and Shaz Qadeer and Zvonimir Rakamari{\'c}", title = "Delay-bounded scheduling", journal = j-SIGPLAN, volume = "46", number = "1", pages = "411--422", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926432", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sinha:2011:IA, author = "Nishant Sinha and Chao Wang", title = "On interference abstractions", journal = j-SIGPLAN, volume = "46", number = "1", pages = "423--434", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926433", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Denielou:2011:DMS, author = "Pierre-Malo Deni{\'e}lou and Nobuko Yoshida", title = "Dynamic multirole session types", journal = j-SIGPLAN, volume = "46", number = "1", pages = "435--446", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926435", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tov:2011:PAT, author = "Jesse A. Tov and Riccardo Pucella", title = "Practical affine types", journal = j-SIGPLAN, volume = "46", number = "1", pages = "447--458", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926436", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{An:2011:DIS, author = "Jong-hoon (David) An and Avik Chaudhuri and Jeffrey S. Foster and Michael Hicks", title = "Dynamic inference of static types for {\tt ruby}", journal = j-SIGPLAN, volume = "46", number = "1", pages = "459--472", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926437", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gordon:2011:RMV, author = "Andrew D. Gordon and Robert Harper and John Harrison and Alan Jeffrey and Peter Sewell", title = "{Robin Milner 1934--2010}: verification, languages, and concurrency", journal = j-SIGPLAN, volume = "46", number = "1", pages = "473--474", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926439", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bendersky:2011:SOB, author = "Anna Bendersky and Erez Petrank", title = "Space overhead bounds for dynamic memory management with partial compaction", journal = j-SIGPLAN, volume = "46", number = "1", pages = "475--486", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926441", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Attiya:2011:LOE, author = "Hagit Attiya and Rachid Guerraoui and Danny Hendler and Petr Kuznetsov and Maged M. Michael and Martin Vechev", title = "Laws of order: expensive synchronization in concurrent algorithms cannot be eliminated", journal = j-SIGPLAN, volume = "46", number = "1", pages = "487--498", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926442", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Esparza:2011:CPB, author = "Javier Esparza and Pierre Ganty", title = "Complexity of pattern-based verification for multithreaded programs", journal = j-SIGPLAN, volume = "46", number = "1", pages = "499--510", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926443", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Prabhu:2011:EAF, author = "Tarun Prabhu and Shreyas Ramalingam and Matthew Might and Mary Hall", title = "{EigenCFA}: accelerating flow analysis with {GPUs}", journal = j-SIGPLAN, volume = "46", number = "1", pages = "511--522", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926445", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Feng:2011:BQP, author = "Yuan Feng and Runyao Duan and Mingsheng Ying", title = "Bisimulation for quantum processes", journal = j-SIGPLAN, volume = "46", number = "1", pages = "523--534", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926446", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bocchino:2011:SND, author = "Robert L. {Bocchino, Jr.} and Stephen Heumann and Nima Honarmand and Sarita V. Adve and Vikram S. Adve and Adam Welc and Tatiana Shpeisman", title = "Safe nondeterminism in a deterministic-by-default parallel language", journal = j-SIGPLAN, volume = "46", number = "1", pages = "535--548", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926447", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pouchet:2011:LTC, author = "Louis-No{\"e}l Pouchet and Uday Bondhugula and C{\'e}dric Bastoul and Albert Cohen and J. Ramanujam and P. Sadayappan and Nicolas Vasilache", title = "Loop transformations: convexity, pruning and optimization", journal = j-SIGPLAN, volume = "46", number = "1", pages = "549--562", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926449", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Guo:2011:ECT, author = "Shu-yu Guo and Jens Palsberg", title = "The essence of compiling with traces", journal = j-SIGPLAN, volume = "46", number = "1", pages = "563--574", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926450", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ramsey:2011:RRM, author = "Norman Ramsey and Jo{\~a}o Dias", title = "Resourceable, retargetable, modular instruction selection using a machine-independent, type-based tiling of low-level intermediate code", journal = j-SIGPLAN, volume = "46", number = "1", pages = "575--586", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926451", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ong:2011:VHO, author = "C.-H. Luke Ong and Steven James Ramsay", title = "Verifying higher-order functional programs with pattern-matching algebraic data types", journal = j-SIGPLAN, volume = "46", number = "1", pages = "587--598", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926453", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Alur:2011:STA, author = "Rajeev Alur and Pavol Cern{\'y}", title = "Streaming transducers for algorithmic verification of single-pass list-processing programs", journal = j-SIGPLAN, volume = "46", number = "1", pages = "599--610", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926454", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Madhusudan:2011:DLC, author = "P. Madhusudan and Gennaro Parlato and Xiaokang Qiu", title = "Decidable logics combining heap structures and data", journal = j-SIGPLAN, volume = "46", number = "1", pages = "611--622", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926455", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Joisha:2011:TEA, author = "Pramod G. Joisha and Robert S. Schreiber and Prithviraj Banerjee and Hans J. Boehm and Dhruva R. Chakrabarti", title = "A technique for the effective and automatic reuse of classical compiler optimizations on multithreaded code", journal = j-SIGPLAN, volume = "46", number = "1", pages = "623--636", month = jan, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1925844.1926457", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jan 26 15:06:39 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lammel:2011:HGS, author = "Ralf L{\"a}mmel", title = "The hitchhiker's guide to software languages", journal = j-SIGPLAN, volume = "46", number = "2", pages = "1--2", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868295", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There is only that much space in the CS curriculum, and there are always new subjects that should be accommodated by the curriculum. For instance, in our community, we would want all graduates to leave university with a modest background in technical spaces, software languages, and meta-programming; also, with conceptually informed and reasonably timeless skills to efficiently master related programming techniques and technologies. In reality, the curricula of few CS departments meet this expectation. In this talk, I will discuss such curricula-related expectations of our community and the suboptimal situation at CS departments---as perceive them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Erwig:2011:LSV, author = "Martin Erwig", title = "A language for software variation research", journal = j-SIGPLAN, volume = "46", number = "2", pages = "3--12", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868296", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Managing variation is an important problem in software engineering that takes different forms, ranging from version control and configuration management to software product lines. In this paper, I present our recent work on the choice calculus, a fundamental representation for software variation that can serve as a common language of discourse for variation research, filling a role similar to lambda calculus in programming language research. After motivating the design of the choice calculus and sketching its semantics, I will discuss several potential application areas.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Clarke:2011:ADM, author = "Dave Clarke and Michiel Helvensteijn and Ina Schaefer", title = "Abstract delta modeling", journal = j-SIGPLAN, volume = "46", number = "2", pages = "13--22", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868298", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Delta modeling is an approach to facilitate automated product derivation for software product lines. It is based on a set of deltas specifying modifications that are incrementally applied to a core product. The applicability of deltas depends on feature-dependent conditions. This paper presents abstract delta modeling, which explores delta modeling from an abstract, algebraic perspective. Compared to previous work, we take a more flexible approach with respect to conflicts between modifications and introduce the notion of conflict-resolving deltas. We present conditions on the structure of deltas to ensure unambiguous product generation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ryssel:2011:AVP, author = "Uwe Ryssel and Joern Ploennigs and Klaus Kabitzsch", title = "Automatic variation-point identification in function-block-based models", journal = j-SIGPLAN, volume = "46", number = "2", pages = "23--32", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868299", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Function-block-based modeling is often used to develop embedded systems, particularly as system variants can be developed rapidly from existing modules. Generative approaches can simplify the handling and development of the resulting high variety of function-block-based models. But they often require the development of new generic models that do not utilize existing ones. Reusing existing models will significantly decrease the effort to apply generative programming. This work introduces an automatic approach to recognize variants in a set of models and identify the variation points and their dependencies within variants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sincero:2011:EEA, author = "Julio Sincero and Reinhard Tartler and Daniel Lohmann and Wolfgang Schr{\"o}der-Preikschat", title = "Efficient extraction and analysis of preprocessor-based variability", journal = j-SIGPLAN, volume = "46", number = "2", pages = "33--42", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868300", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The C Preprocessor (CPP) is the tool of choice for the implementation of variability in many large-scale configurable software projects. Linux, probably the most-configurable piece of software ever, employs more than 10,000 preprocessor variables for this purpose. However, this de-facto variability tends to be ``hidden in the code''; which on the long term leads to variability defects, such as dead code or inconsistencies with respect to the intended (modeled) variability of the software. This calls for tool support for the efficient extraction of (and reasoning over) CPP-based variability. We suggest a novel approach to extract CPP-based variability. Our tool transforms CPP-based variability in O(n) complexity into a propositional formula that ``mimics'' all valid effects of conditional compilation and can be analyzed with standard SAT or BDD packages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Middelkoop:2011:ITI, author = "Arie Middelkoop and Atze Dijkstra and S. Doaitse Swierstra", title = "Iterative type inference with attribute grammars", journal = j-SIGPLAN, volume = "46", number = "2", pages = "43--52", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868302", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type inference is the process of constructing a typing derivation while gradually discovering type information. During this process, inference algorithms typically make subtle decisions based on the derivation constructed so far. Because a typing derivation is a decorated tree we aim to use attribute grammars as the main implementation tool. Unfortunately, we can neither express iteration, nor express decisions based on intermediate derivations in such grammars. We present the language ruler-front, a conservative extension to ordered attribute grammars, that deals with the aforementioned problems. We show why this extension is suitable for the description of constraint-based inference algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Krieger:2011:AES, author = "Matthias P. Krieger and Alexander Knapp and Burkhart Wolff", title = "Automatic and efficient simulation of operation contracts", journal = j-SIGPLAN, volume = "46", number = "2", pages = "53--62", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868303", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Operation contracts consisting of pre- and postconditions are a well-known means of specifying operations. In this paper we deal with the problem of operation contract simulation, i.e., determining operation results satisfying the postconditions based on input data supplied by the user; simulating operation contracts is an important technique for requirements validation and prototyping. Current approaches to operation contract simulation exhibit poor performance for large sets of input data or require additional guidance from the user. We show how these problems can be alleviated and describe an efficient as well as fully automatic approach. It is implemented in our tool OCLexec that generates from UML/OCL operation contracts corresponding Java implementations which call a constraint solver at runtime.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Long:2011:IIM, author = "Yuheng Long and Sean L. Mooney and Tyler Sondag and Hridesh Rajan", title = "Implicit invocation meets safe, implicit concurrency", journal = j-SIGPLAN, volume = "46", number = "2", pages = "63--72", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868304", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing correct and efficient concurrent programs still remains a challenge. Explicit concurrency is difficult, error prone, and creates code which is hard to maintain and debug. This type of concurrency also treats modular program design and concurrency as separate goals, where modularity often suffers. To solve these problems, we are designing a new language that we call Panini. In this paper, we focus on Panini's asynchronous, typed events which reconcile the modularity goal promoted by the implicit invocation design style with the concurrency goal of exposing potential concurrency between the execution of subjects and observers. Since modularity is improved and concurrency is implicit in Panini, programs are easier to reason about and maintain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Navas:2011:CBR, author = "Juan F. Navas and Jean-Philippe Babau and Jacques Pulou", title = "A component-based run-time evolution infrastructure for resource-constrained embedded systems", journal = j-SIGPLAN, volume = "46", number = "2", pages = "73--82", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868306", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper deals with embedded systems software and the modification of its architecture and behavior at execution-time. Incautious implementation of these features demands both heavy memory and performance overrun. To accomplish such software evolution activities in resource-constrained embedded systems, we propose a component-based run-time evolution infrastructure that reconciles richness of evolution alternatives and performance requirements. Our proposal is based on off-site components reifications, which are representations of components that allow us to treat evolution concerns remotely. Hence, the workload to be processed by the embedded device is alleviated.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hofer:2011:MDS, author = "Christian Hofer and Klaus Ostermann", title = "Modular domain-specific language components in {Scala}", journal = j-SIGPLAN, volume = "46", number = "2", pages = "83--92", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868307", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programs in domain-specific embedded languages (DSELs) can be represented in the host language in different ways, for instance implicitly as libraries, or explicitly in the form of abstract syntax trees. Each of these representations has its own strengths and weaknesses. The implicit approach has good composability properties, whereas the explicit approach allows more freedom in making syntactic program transformations. Traditional designs for DSELs fix the form of representation, which means that it is not possible to choose the best representation for a particular interpretation or transformation. We propose a new design for implementing DSELs in Scala which makes it easy to use different program representations at the same time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wolfinger:2011:AGP, author = "Reinhard Wolfinger and Markus L{\"o}berbauer and Markus Jahn and Hanspeter M{\"o}ssenb{\"o}ck", title = "Adding genericity to a plug-in framework", journal = j-SIGPLAN, volume = "46", number = "2", pages = "93--102", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868308", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Plug-in components are a means for making feature-rich applications customizable. Combined with plug-and-play composition, end users can assemble customized applications without programming. If plug-and-play composition is also dynamic, applications can be reconfigured on the fly to load only components the user needs for his current work. We have created Plux.NET, a plug-in framework that supports dynamic plug-and-play composition. The basis for plug-and-play in Plux is the composer which replaces programmatic composition by automatic composition. Components just specify their requirements and provisions using metadata. The composer then assembles the components based on that metadata by matching requirements and provisions. When the composer needs to reuse general-purpose components in different parts of an application, the component model requires genericity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Schulze:2011:CCF, author = "Sandro Schulze and Sven Apel and Christian K{\"a}stner", title = "Code clones in feature-oriented software product lines", journal = j-SIGPLAN, volume = "46", number = "2", pages = "103--112", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868310", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Some limitations of object-oriented mechanisms are known to cause code clones (e.g., extension using inheritance). Novel programming paradigms such as feature-oriented programming (FOP) aim at alleviating these limitations. However, it is an open issue whether FOP is really able to avoid code clones or whether it even facilitates (FOP-related) clones. To address this issue, we conduct an empirical analysis on ten feature-oriented software product lines with respect to code cloning. We found that there is a considerable number of clones in feature-oriented software product lines and that a large fraction of these clones is FOP-related (i.e., caused by limitations of feature-oriented mechanisms).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tanter:2011:CDA, author = "{\'E}ric Tanter and Philippe Moret and Walter Binder and Danilo Ansaloni", title = "Composition of dynamic analysis aspects", journal = j-SIGPLAN, volume = "46", number = "2", pages = "113--122", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868311", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aspect-oriented programming provides a convenient high-level model to define several kinds of dynamic analyses, in particular thanks to recent advances in exhaustive weaving in core libraries. Casting dynamic analyses as aspects allows the use of a single weaving infrastructure to apply different analyses to the same base program, simultaneously. However, even if dynamic analysis aspects are mutually independent, their mere presence perturbs the observations of others: this is due to the fact that aspectual computation is potentially visible to all aspects. Because current aspect composition approaches do not address this kind of computational interference, combining different analysis aspects yields at best unpredictable results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wurthinger:2011:AED, author = "Thomas W{\"u}rthinger and Walter Binder and Danilo Ansaloni and Philippe Moret and Hanspeter M{\"o}ssenb{\"o}ck", title = "Applications of enhanced dynamic code evolution for {Java} in {GUI} development and dynamic aspect-oriented programming", journal = j-SIGPLAN, volume = "46", number = "2", pages = "123--126", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868312", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While dynamic code evolution in object-oriented systems is an important feature supported by dynamic languages, there is currently only limited support for dynamic code evolution in high-performance, state-of-the-art runtime systems for statically typed languages, such as the Java Virtual Machine. In this tool demonstration, we present the Dynamic Code Evolution VM, which is based on a recent version of Oracle's state-of-the-art Java HotSpot(TM) VM and allows unlimited changes to loaded classes at runtime. Based on the Dynamic Code Evolution VM, we developed an enhanced version of the Mantisse GUI builder (which is part of the NetBeans IDE) that allows adding GUI components without restarting the application under development.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rompf:2011:LMS, author = "Tiark Rompf and Martin Odersky", title = "Lightweight modular staging: a pragmatic approach to runtime code generation and compiled {DSLs}", journal = j-SIGPLAN, volume = "46", number = "2", pages = "127--136", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868314", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software engineering demands generality and abstraction, performance demands specialization and concretization. Generative programming can provide both, but the effort required to develop high-quality program generators likely offsets their benefits, even if a multi-stage programming language is used. We present lightweight modular staging, a library-based multi-stage programming approach that breaks with the tradition of syntactic quasi-quotation and instead uses only types to distinguish between binding times. Through extensive use of component technology, lightweight modular staging makes an optimizing compiler framework available at the library level, allowing programmers to tightly integrate domain-specific abstractions and optimizations into the generation process. We argue that lightweight modular staging enables a form of language virtualization, i.e.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Porkolab:2011:DSL, author = "Zolt{\'a}n Porkolab and {\'A}bel Sinkovics", title = "Domain-specific language integration with compile-time parser generator library", journal = j-SIGPLAN, volume = "46", number = "2", pages = "137--146", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868315", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Smooth integration of domain-specific languages into a general purpose host language requires absorbing of domain code written in arbitrary syntax. The integration should cause minimal syntactical and semantic overhead and introduce minimal dependency on external tools. In this paper we discuss a DSL integration technique for the C++ programming language. The solution is based on compile-time parsing of the DSL code. The parser generator is a C++ template metaprogram reimplementation of a runtime Haskell parser generator library. The full parsing phase is executed when the host program is compiled. The library uses only standard C++ language features, thus our solution is highly portable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Atkinson:2011:ACT, author = "Kevin Atkinson and Matthew Flatt and Gary Lindstrom", title = "{ABI} compatibility through a customizable language", journal = j-SIGPLAN, volume = "46", number = "2", pages = "147--156", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868316", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "ZL is a C++-compatible language in which high-level constructs, such as classes, are defined using macros over a C-like core language. This approach makes many parts of the language easily customizable. For example, since the class construct can be defined using macros, a programmer can have complete control over the memory layout of objects. Using this capability, a programmer can mitigate certain problems in software evolution such as fragile ABIs (Application Binary Interfaces) due to software changes and incompatible ABIs due to compiler changes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bordignon:2011:MBK, author = "Mirko Bordignon and Ulrik Pagh Schultz and Kasper Stoy", title = "Model-based kinematics generation for modular mechatronic toolkits", journal = j-SIGPLAN, volume = "46", number = "2", pages = "157--166", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868318", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modular robots are mechatronic devices that enable the construction of highly versatile and flexible robotic systems whose mechanical structure can be dynamically modified. The key feature that enables this dynamic modification is the capability of the individual modules to connect to each other in multiple ways and thus generate a number of different mechanical systems, in contrast with the monolithics fixed structure of conventional robots. The mechatronic flexibility, however, complicates the development of models and programming abstractions for modular robots, since manually describing and enumerating the full set of possible interconnections is tedious and error-prone for real-world robots. In order to allow for a general formulation of spatial abstractions for modular robots and to ensure correct and streamlined generation of code dependent on mechanical properties, we have developed the Modular Mechatronics Modelling Language (M3L).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Miao:2011:ITC, author = "Weiyu Miao and Jeremy G. Siek", title = "Incremental type-checking for type-reflective metaprograms", journal = j-SIGPLAN, volume = "46", number = "2", pages = "167--176", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868319", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Garcia introduces a calculus for type-reflective metaprogramming that provides much of the power and flexibility of C++ templates and solves many of its problems. However, one of the problems that remains is that the residual program is not type checked until after meta computation is complete. Ideally, one would like the type system of the metaprogram to also guarantee that the residual program will type check, as is the case in MetaML. However, in a language with type-reflective metaprogramming, type expressions in the residual program may be the result of meta computation, making the MetaML guarantee next to impossible to achieve.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Grech:2011:JGE, author = "Neville Grech and Julian Rathke and Bernd Fischer", title = "{JEqualityGen}: generating equality and hashing methods", journal = j-SIGPLAN, volume = "46", number = "2", pages = "177--186", month = feb, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1942788.1868320", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Feb 14 16:37:34 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Manually implementing equals (for object comparisons) and hashCode (for object hashing) methods in large software projects is tedious and error-prone. This is due to many special cases, such as field shadowing, comparison between different types, or cyclic object graphs. Here, we present JEqualityGen, a source code generator that automatically derives implementations of these methods. JEqualityGen proceeds in two states: it first uses source code reflection in MetaAspectJ to generate aspects that contain the method implementations, before it uses weaving on the bytecode level to insert these into the target application. JEqualityGen generates not only correct, but efficient source code that on a typical large-scale Java application exhibits a performance improvement of more than two orders of magnitude in the equality operations generated, compared to an existing system based on runtime reflection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Larus:2011:CWC, author = "James R. Larus", title = "The cloud will change everything", journal = j-SIGPLAN, volume = "46", number = "3", pages = "1--2", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Yuan:2011:ISD, author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan Zhou and Stefan Savage", title = "Improving software diagnosability via log enhancement", journal = j-SIGPLAN, volume = "46", number = "3", pages = "3--14", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Veeraraghavan:2011:DPS, author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin Wester and Jessica Ouyang and Peter M. Chen and Jason Flinn and Satish Narayanasamy", title = "{DoublePlay}: parallelizing sequential logging and replay", journal = j-SIGPLAN, volume = "46", number = "3", pages = "15--26", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Casper:2011:HAT, author = "Jared Casper and Tayo Oguntebi and Sungpack Hong and Nathan G. Bronson and Christos Kozyrakis and Kunle Olukotun", title = "Hardware acceleration of transactional memory on commodity systems", journal = j-SIGPLAN, volume = "46", number = "3", pages = "27--38", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Dalessandro:2011:HNC, author = "Luke Dalessandro and Fran{\c{c}}ois Carouge and Sean White and Yossi Lev and Mark Moir and Michael L. Scott and Michael F. Spear", title = "{Hybrid NOrec}: a case study in the effectiveness of best effort hardware transactional memory", journal = j-SIGPLAN, volume = "46", number = "3", pages = "39--52", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Singh:2011:EPS, author = "Abhayendra Singh and Daniel Marino and Satish Narayanasamy and Todd Millstein and Madan Musuvathi", title = "Efficient processor support for {DRFx}, a memory model with exceptions", journal = j-SIGPLAN, volume = "46", number = "3", pages = "53--66", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950375", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Devietti:2011:RRC, author = "Joseph Devietti and Jacob Nelson and Tom Bergan and Luis Ceze and Dan Grossman", title = "{RCDC}: a relaxed consistency deterministic computer", journal = j-SIGPLAN, volume = "46", number = "3", pages = "67--78", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950376", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Burnim:2011:SCS, author = "Jacob Burnim and George Necula and Koushik Sen", title = "Specifying and checking semantic atomicity for multithreaded programs", journal = j-SIGPLAN, volume = "46", number = "3", pages = "79--90", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950377", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Volos:2011:MLP, author = "Haris Volos and Andres Jaan Tack and Michael M. Swift", title = "{Mnemosyne}: lightweight persistent memory", journal = j-SIGPLAN, volume = "46", number = "3", pages = "91--104", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950379", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Coburn:2011:NHM, author = "Joel Coburn and Adrian M. Caulfield and Ameen Akel and Laura M. Grupp and Rajesh K. Gupta and Ranjit Jhala and Steven Swanson", title = "{NV-Heaps}: making persistent objects fast and safe with next-generation, non-volatile memories", journal = j-SIGPLAN, volume = "46", number = "3", pages = "105--118", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950380", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Schupbach:2011:DLA, author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy Roscoe and Simon Peter", title = "A declarative language approach to device configuration", journal = j-SIGPLAN, volume = "46", number = "3", pages = "119--132", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950382", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Ryzhyk:2011:IDD, author = "Leonid Ryzhyk and John Keys and Balachandra Mirla and Arun Raghunath and Mona Vij and Gernot Heiser", title = "Improved device driver reliability through hardware verification reuse", journal = j-SIGPLAN, volume = "46", number = "3", pages = "133--144", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950383", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Hashmi:2011:CNI, author = "Atif Hashmi and Andrew Nere and James Jamal Thomas and Mikko Lipasti", title = "A case for neuromorphic {ISAs}", journal = j-SIGPLAN, volume = "46", number = "3", pages = "145--158", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950385", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Ransford:2011:MSS, author = "Benjamin Ransford and Jacob Sorber and Kevin Fu", title = "{Mementos}: system support for long-running computation on {RFID}-scale devices", journal = j-SIGPLAN, volume = "46", number = "3", pages = "159--170", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950386", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Koukoumidis:2011:PC, author = "Emmanouil Koukoumidis and Dimitrios Lymberopoulos and Karin Strauss and Jie Liu and Doug Burger", title = "Pocket cloudlets", journal = j-SIGPLAN, volume = "46", number = "3", pages = "171--184", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Sharma:2011:BMS, author = "Navin Sharma and Sean Barker and David Irwin and Prashant Shenoy", title = "{Blink}: managing server clusters on intermittent power", journal = j-SIGPLAN, volume = "46", number = "3", pages = "185--198", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950389", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Hoffmann:2011:DKR, author = "Henry Hoffmann and Stelios Sidiroglou and Michael Carbin and Sasa Misailovic and Anant Agarwal and Martin Rinard", title = "Dynamic knobs for responsive power-aware computing", journal = j-SIGPLAN, volume = "46", number = "3", pages = "199--212", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950390", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Liu:2011:FSD, author = "Song Liu and Karthik Pattabiraman and Thomas Moscibroda and Benjamin G. Zorn", title = "{Flikker}: saving {DRAM} refresh-power through critical data partitioning", journal = j-SIGPLAN, volume = "46", number = "3", pages = "213--224", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950391", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Deng:2011:MAL, author = "Qingyuan Deng and David Meisner and Luiz Ramos and Thomas F. Wenisch and Ricardo Bianchini", title = "{MemScale}: active low-power modes for main memory", journal = j-SIGPLAN, volume = "46", number = "3", pages = "225--238", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950392", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Gao:2011:TMH, author = "Qi Gao and Wenbin Zhang and Zhezhe Chen and Mai Zheng and Feng Qin", title = "{2ndStrike}: toward manifesting hidden concurrency typestate bugs", journal = j-SIGPLAN, volume = "46", number = "3", pages = "239--250", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950394", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Zhang:2011:CDC, author = "Wei Zhang and Junghee Lim and Ramya Olichandran and Joel Scherpelz and Guoliang Jin and Shan Lu and Thomas Reps", title = "{ConSeq}: detecting concurrency bugs through sequential errors", journal = j-SIGPLAN, volume = "46", number = "3", pages = "251--264", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950395", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Chipounov:2011:SPV, author = "Vitaly Chipounov and Volodymyr Kuznetsov and George Candea", title = "{S2E}: a platform for in-vivo multi-path analysis of software systems", journal = j-SIGPLAN, volume = "46", number = "3", pages = "265--278", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950396", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Hofmann:2011:EOS, author = "Owen S. Hofmann and Alan M. Dunn and Sangman Kim and Indrajit Roy and Emmett Witchel", title = "Ensuring operating system kernel integrity with {OSck}", journal = j-SIGPLAN, volume = "46", number = "3", pages = "279--290", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950398", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Porter:2011:RLT, author = "Donald E. Porter and Silas Boyd-Wickizer and Jon Howell and Reuben Olinsky and Galen C. Hunt", title = "Rethinking the library {OS} from the top down", journal = j-SIGPLAN, volume = "46", number = "3", pages = "291--304", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950399", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Palix:2011:FLT, author = "Nicolas Palix and Ga{\"e}l Thomas and Suman Saha and Christophe Calv{\`e}s and Julia Lawall and Gilles Muller", title = "Faults in {Linux}: ten years later", journal = j-SIGPLAN, volume = "46", number = "3", pages = "305--318", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950401", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Esmaeilzadeh:2011:LBL, author = "Hadi Esmaeilzadeh and Ting Cao and Yang Xi and Stephen M. Blackburn and Kathryn S. McKinley", title = "Looking back on the language and hardware revolutions: measured power, performance, and scaling", journal = j-SIGPLAN, volume = "46", number = "3", pages = "319--332", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950402", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Nguyen:2011:SCS, author = "Donald Nguyen and Keshav Pingali", title = "Synthesizing concurrent schedulers for irregular algorithms", journal = j-SIGPLAN, volume = "46", number = "3", pages = "333--344", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950404", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Hoang:2011:ECT, author = "Giang Hoang and Robby Bruce Findler and Russ Joseph", title = "Exploring circuit timing-aware language and compilation", journal = j-SIGPLAN, volume = "46", number = "3", pages = "345--356", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950405", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Farhad:2011:OAM, author = "Sardar M. Farhad and Yousun Ko and Bernd Burgstaller and Bernhard Scholz", title = "Orchestration by approximation: mapping stream programs onto multicore architectures", journal = j-SIGPLAN, volume = "46", number = "3", pages = "357--368", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950406", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Zhang:2011:FED, author = "Eddy Z. Zhang and Yunlian Jiang and Ziyu Guo and Kai Tian and Xipeng Shen", title = "On-the-fly elimination of dynamic irregularities for {GPU} computing", journal = j-SIGPLAN, volume = "46", number = "3", pages = "369--380", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950408", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Hormati:2011:SPS, author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and Trevor Mudge and Scott Mahlke", title = "{Sponge}: portable stream programming on graphics engines", journal = j-SIGPLAN, volume = "46", number = "3", pages = "381--392", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Kamruzzaman:2011:ICP, author = "Md Kamruzzaman and Steven Swanson and Dean M. Tullsen", title = "Inter-core prefetching for multicore processors using migrating helper threads", journal = j-SIGPLAN, volume = "46", number = "3", pages = "393--404", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950411", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Hayashizaki:2011:IPT, author = "Hiroshige Hayashizaki and Peng Wu and Hiroshi Inoue and Mauricio J. Serrano and Toshio Nakatani", title = "Improving the performance of trace-based systems by false loop filtering", journal = j-SIGPLAN, volume = "46", number = "3", pages = "405--418", month = mar, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1961296.1950412", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:08 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '11 conference proceedings", } @Article{Bala:2011:DTD, author = "Vasanth Bala and Evelyn Duesterwald and Sanjeev Banerjia", title = "{Dynamo}: a transparent dynamic optimization system", journal = j-SIGPLAN, volume = "46", number = "4", pages = "41--52", month = apr, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1988042.1988044", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:07 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe the design and implementation of Dynamo, a software dynamic optimization system that is capable of transparently improving the performance of a native instruction stream as it executes on the processor. The input native instruction stream to Dynamo can be dynamically generated (by a JIT for example), or it can come from the execution of a statically compiled native binary. This paper evaluates the Dynamo system in the latter, more challenging situation, in order to emphasize the limits, rather than the potential, of the system. Our experiments demonstrate that even statically optimized native binaries can be accelerated Dynamo, and often by a significant degree.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Claessen:2011:QLT, author = "Koen Claessen and John Hughes", title = "{QuickCheck}: a lightweight tool for random testing of {Haskell} programs", journal = j-SIGPLAN, volume = "46", number = "4", pages = "53--64", month = apr, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1988042.1988046", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:07 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "QuickCheck is a tool which aids the Haskell programmer in formulating and testing properties of programs. Properties are described as Haskell functions, and can be automatically tested on random input, but it is also possible to define custom test data generators. We present a number of case studies, in which the tool was successfully used, and also point out some pitfalls to avoid. Random testing is especially suitable for functional programs because properties can be stated at a fine grain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Arnold:2011:AOJ, author = "Matthew Arnold and Stephen Fink and David Grove and Michael Hind and Peter F. Sweeney", title = "Adaptive optimization in the {Jalapeno JVM}", journal = j-SIGPLAN, volume = "46", number = "4", pages = "65--83", month = apr, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1988042.1988048", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:07 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Future high-performance virtual machines will improve performance through sophisticated online feedback-directed optimizations. This paper presents the architecture of the Jalapeno Adaptive Optimization System, a system to support leading-edge virtual machine technology and enable ongoing research on online feedback-directed optimizations. We describe the extensible system architecture, based on a federation of threads with asynchronous communication. We present an implementation of the general architecture that supports adaptive multi-level optimization based purely on statistical sampling. We empirically demonstrate that this profiling technique has low overhead and can improve startup and steady-state performance, even without the presence of online feedback-directed optimizations. The paper also describes and evaluates an online feedback-directed inlining optimization based on statistical edge sampling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ishtiaq:2011:BAL, author = "Samin Ishtiaq and Peter W. O'Hearn", title = "{BI} as an assertion language for mutable data structures", journal = j-SIGPLAN, volume = "46", number = "4", pages = "84--96", month = apr, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1988042.1988050", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 24 10:55:07 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reynolds has developed a logic for reasoning about mutable data structures in which the pre- and postconditions are written in an intuitionistic logic enriched with a spatial form of conjunction. We investigate the approach from the point of view of the logic BI of bunched implications of O'Hearn and Pym. We begin by giving a model in which the law of the excluded middle holds, thus showing that the approach is compatible with classical logic. The relationship between the intuitionistic and classical versions of the system is established by a translation, analogous to a translation from intuitionistic logic into the modal logic S4.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Virlet:2011:SSB, author = "Bruno Virlet and Xing Zhou and Jean Pierre Giacalone and Bob Kuhn and Maria J. Garzaran and David Padua", title = "Scheduling of stream-based real-time applications for heterogeneous systems", journal = j-SIGPLAN, volume = "46", number = "5", pages = "1--10", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967679", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Chattopadhyay:2011:SBS, author = "Sudipta Chattopadhyay and Abhik Roychoudhury", title = "Static bus schedule aware scratchpad allocation in multiprocessors", journal = j-SIGPLAN, volume = "46", number = "5", pages = "11--20", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967680", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Albert:2011:TLA, author = "Elvira Albert and Puri Arenas and Samir Genaim and Damiano Zanardini", title = "Task-level analysis for a language with async\slash finish parallelism", journal = j-SIGPLAN, volume = "46", number = "5", pages = "21--30", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967681", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Chang:2011:LCW, author = "Li-Pin Chang and Li-Chun Huang", title = "A low-cost wear-leveling algorithm for block-mapping solid-state disks", journal = j-SIGPLAN, volume = "46", number = "5", pages = "31--40", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967683", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multilevel flash memory cells double or even triple storage density, producing affordable solid-state disks for end users. However, flash lifetime is becoming a critical issue in the popularity of solid-state disks. Wear-leveling methods can prevent flash-storage devices from prematurely retiring any portions of flash memory. The two practical challenges of wear-leveling design are implementation cost and tuning complexity. This study proposes a new wear-leveling design that features both simplicity and adaptiveness. This design requires no new data structures, but utilizes the intelligence available in sector-translating algorithms. Using an on-line tuning method, this design adaptively tunes itself to reach good balance between wear evenness and overhead. A series of trace-driven simulations show that the proposed design outperforms a competitive existing design in terms of wear evenness and overhead reduction. This study also presents a prototype that proves the feasibility of this wear-leveling design in real solid-state disks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Saha:2011:AIS, author = "Suman Saha and Julia Lawall and Gilles Muller", title = "An approach to improving the structure of error-handling code in the {Linux} kernel", journal = j-SIGPLAN, volume = "46", number = "5", pages = "41--50", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967684", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Gray:2011:TCE, author = "Ian Gray and Neil C. Audsley", title = "Targeting complex embedded architectures by combining the multicore communications {API} ({{\tt mcapi}}) with compile-time virtualisation", journal = j-SIGPLAN, volume = "46", number = "5", pages = "51--60", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967685", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Benveniste:2011:DRT, author = "Albert Benveniste and Timothy Bourke and Beno{\^\i}t Caillaud and Marc Pouzet", title = "Divide and recycle: types and compilation for a hybrid synchronous language", journal = j-SIGPLAN, volume = "46", number = "5", pages = "61--70", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967687", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Gamatie:2011:SAS, author = "Abdoulaye Gamatie and Laure Gonnord", title = "Static analysis of synchronous programs in signal for efficient design of multi-clocked embedded systems", journal = j-SIGPLAN, volume = "46", number = "5", pages = "71--80", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967688", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Berthier:2011:SPD, author = "Nicolas Berthier and Florence Maraninchi and Laurent Mounier", title = "Synchronous programming of device drivers for global resource control in embedded operating systems", journal = j-SIGPLAN, volume = "46", number = "5", pages = "81--90", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967689", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Wang:2011:DBM, author = "Man Wang and Zhiyuan Li and Feng Li and Xiaobing Feng and Saurabh Bagchi and Yung-Hsiang Lu", title = "Dependence-based multi-level tracing and replay for wireless sensor networks debugging", journal = j-SIGPLAN, volume = "46", number = "5", pages = "91--100", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967691", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Thomas:2011:LOS, author = "Johnson J. Thomas and Sebastian Fischmeister and Deepak Kumar", title = "Lowering overhead in sampling-based execution monitoring and tracing", journal = j-SIGPLAN, volume = "46", number = "5", pages = "101--110", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967692", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Navabpour:2011:SDT, author = "Samaneh Navabpour and Borzoo Bonakdarpour and Sebastian Fischmeister", title = "Software debugging and testing using the abstract diagnosis theory", journal = j-SIGPLAN, volume = "46", number = "5", pages = "111--120", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967693", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Cullmann:2011:CPA, author = "Christoph Cullmann", title = "Cache persistence analysis: a novel approachtheory and practice", journal = j-SIGPLAN, volume = "46", number = "5", pages = "121--130", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967695", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Sarkar:2011:PTM, author = "Abhik Sarkar and Frank Mueller and Harini Ramaprasad", title = "Predictable task migration for locked caches in multi-core systems", journal = j-SIGPLAN, volume = "46", number = "5", pages = "131--140", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967696", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Althaus:2011:PEP, author = "Ernst Althaus and Sebastian Altmeyer and Rouven Naujoks", title = "Precise and efficient parametric path analysis", journal = j-SIGPLAN, volume = "46", number = "5", pages = "141--150", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967697", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Jang:2011:ISA, author = "Choonki Jang and Jungwon Kim and Jaejin Lee and Hee-Seok Kim and Dong-Hoon Yoo and Sukjin Kim and Hong-Seok Kim and Soojung Ryu", title = "An instruction-scheduling-aware data partitioning technique for coarse-grained reconfigurable architectures", journal = j-SIGPLAN, volume = "46", number = "5", pages = "151--160", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967699", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Bhagat:2011:GPP, author = "Indu Bhagat and Enric Gibert and Jes{\'u}s S{\'a}nchez and Antonio Gonz{\'a}lez", title = "Global productiveness propagation: a code optimization technique to speculatively prune useless narrow computations", journal = j-SIGPLAN, volume = "46", number = "5", pages = "161--170", month = may, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2016603.1967700", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Aug 18 13:30:54 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '10 conference proceedings", } @Article{Prabhu:2011:CSL, author = "Prakash Prabhu and Soumyadeep Ghosh and Yun Zhang and Nick P. Johnson and David I. August", title = "Commutative set: a language extension for implicit parallel programming", journal = j-SIGPLAN, volume = "46", number = "6", pages = "1--11", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993500", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pingali:2011:TPA, author = "Keshav Pingali and Donald Nguyen and Milind Kulkarni and Martin Burtscher and M. Amber Hassaan and Rashid Kaleem and Tsung-Hsien Lee and Andrew Lenharth and Roman Manevich and Mario M{\'e}ndez-Lojo and Dimitrios Prountzos and Xin Sui", title = "The tao of parallelism in algorithms", journal = j-SIGPLAN, volume = "46", number = "6", pages = "12--25", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993501", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Raman:2011:POU, author = "Arun Raman and Hanjun Kim and Taewook Oh and Jae W. Lee and David I. August", title = "Parallelism orchestration using {DoPE}: the degree of parallelism executive", journal = j-SIGPLAN, volume = "46", number = "6", pages = "26--37", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hawkins:2011:DRS, author = "Peter Hawkins and Alex Aiken and Kathleen Fisher and Martin Rinard and Mooly Sagiv", title = "Data representation synthesis", journal = j-SIGPLAN, volume = "46", number = "6", pages = "38--49", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993504", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gulwani:2011:SGC, author = "Sumit Gulwani and Vijay Anand Korthikanti and Ashish Tiwari", title = "Synthesizing geometry constructions", journal = j-SIGPLAN, volume = "46", number = "6", pages = "50--61", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993505", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gulwani:2011:SLF, author = "Sumit Gulwani and Susmit Jha and Ashish Tiwari and Ramarathnam Venkatesan", title = "Synthesis of loop-free programs", journal = j-SIGPLAN, volume = "46", number = "6", pages = "62--73", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993506", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bohm:2011:GJT, author = "Igor B{\"o}hm and Tobias J. K. Edler von Koch and Stephen C. Kyle and Bj{\"o}rn Franke and Nigel Topham", title = "Generalized just-in-time trace compilation using a parallel task farm in a dynamic binary translator", journal = j-SIGPLAN, volume = "46", number = "6", pages = "74--85", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993508", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jung:2011:BES, author = "Changhee Jung and Silvius Rus and Brian P. Railing and Nathan Clark and Santosh Pande", title = "{Brainy}: effective selection of data structures", journal = j-SIGPLAN, volume = "46", number = "6", pages = "86--97", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhou:2011:SBA, author = "Hucheng Zhou and Wenguang Chen and Fred Chow", title = "An {SSA}-based algorithm for optimal speculative code motion under an execution profile", journal = j-SIGPLAN, volume = "46", number = "6", pages = "98--108", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2011:CHD, author = "Xun Li and Mohit Tiwari and Jason K. Oberg and Vineeth Kashyap and Frederic T. Chong and Timothy Sherwood and Ben Hardekopf", title = "{Caisson}: a hardware description language for secure information flow", journal = j-SIGPLAN, volume = "46", number = "6", pages = "109--120", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Murray:2011:SAO, author = "Derek Gordon Murray and Michael Isard and Yuan Yu", title = "{Steno}: automatic optimization of declarative queries", journal = j-SIGPLAN, volume = "46", number = "6", pages = "121--131", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993513", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tobin-Hochstadt:2011:LL, author = "Sam Tobin-Hochstadt and Vincent St-Amour and Ryan Culpepper and Matthew Flatt and Matthias Felleisen", title = "Languages as libraries", journal = j-SIGPLAN, volume = "46", number = "6", pages = "132--141", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jablin:2011:ACG, author = "Thomas B. Jablin and Prakash Prabhu and James A. Jablin and Nick P. Johnson and Stephen R. Beard and David I. August", title = "Automatic {CPU--GPU} communication management and optimization", journal = j-SIGPLAN, volume = "46", number = "6", pages = "142--151", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Prasad:2011:ACM, author = "Ashwin Prasad and Jayvant Anantpur and R. Govindarajan", title = "Automatic compilation of {MATLAB} programs for synergistic execution on heterogeneous processors", journal = j-SIGPLAN, volume = "46", number = "6", pages = "152--163", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sampson:2011:EAD, author = "Adrian Sampson and Werner Dietl and Emily Fortuna and Danushen Gnanapragasam and Luis Ceze and Dan Grossman", title = "{EnerJ}: approximate data types for safe and general low-power computation", journal = j-SIGPLAN, volume = "46", number = "6", pages = "164--174", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sarkar:2011:UPM, author = "Susmit Sarkar and Peter Sewell and Jade Alglave and Luc Maranget and Derek Williams", title = "Understanding {POWER} multiprocessors", journal = j-SIGPLAN, volume = "46", number = "6", pages = "175--186", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kuperstein:2011:PCA, author = "Michael Kuperstein and Martin Vechev and Eran Yahav", title = "Partial-coherence abstractions for relaxed memory models", journal = j-SIGPLAN, volume = "46", number = "6", pages = "187--198", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Marino:2011:CSP, author = "Daniel Marino and Abhayendra Singh and Todd Millstein and Madanlal Musuvathi and Satish Narayanasamy", title = "A case for an {SC}-preserving compiler", journal = j-SIGPLAN, volume = "46", number = "6", pages = "199--210", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The most intuitive memory consistency model for shared-memory multi-threaded programming is sequential consistency (SC). However, current concurrent programming languages support a relaxed model, as such relaxations are deemed necessary for enabling important optimizations. This paper demonstrates that an SC-preserving compiler, one that ensures that every SC behavior of a compiler-generated binary is an SC behavior of the source program, retains most of the performance benefits of an optimizing compiler. The key observation is that a large class of optimizations crucial for performance are either already SC-preserving or can be modified to preserve SC while retaining much of their effectiveness. An SC-preserving compiler, obtained by restricting the optimization phases in LLVM, a state-of-the-art C/C++ compiler, incurs an average slowdown of 3.8\% and a maximum slowdown of 34\% on a set of 30 programs from the SPLASH-2, PARSEC, and SPEC CINT2006 benchmark suites.\par While the performance overhead of preserving SC in the compiler is much less than previously assumed, it might still be unacceptable for certain applications. We believe there are several avenues for improving performance without giving up SC-preservation. In this vein, we observe that the overhead of our SC-preserving compiler arises mainly from its inability to aggressively perform a class of optimizations we identify as eager-load optimizations. This class includes common-subexpression elimination, constant propagation, global value numbering, and common cases of loop-invariant code motion. We propose a notion of interference checks in order to enable eager-load optimizations while preserving SC. Interference checks expose to the compiler a commonly used hardware speculation mechanism that can efficiently detect whether a particular variable has changed its value since last read.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "LLVM compiler suite; sequential consistency (SC)", } @Article{Beckman:2011:PMS, author = "Nels E. Beckman and Aditya V. Nori", title = "Probabilistic, modular and scalable inference of typestate specifications", journal = j-SIGPLAN, volume = "46", number = "6", pages = "211--221", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kobayashi:2011:PAC, author = "Naoki Kobayashi and Ryosuke Sato and Hiroshi Unno", title = "Predicate abstraction and {CEGAR} for higher-order model checking", journal = j-SIGPLAN, volume = "46", number = "6", pages = "222--233", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chlipala:2011:MAV, author = "Adam Chlipala", title = "Mostly-automated verification of low-level programs in computational separation logic", journal = j-SIGPLAN, volume = "46", number = "6", pages = "234--245", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lee:2011:TGR, author = "Kyu Hyung Lee and Yunhui Zheng and Nick Sumner and Xiangyu Zhang", title = "Toward generating reducible replay logs", journal = j-SIGPLAN, volume = "46", number = "6", pages = "246--257", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993528", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Godefroid:2011:HOT, author = "Patrice Godefroid", title = "Higher-order test generation", journal = j-SIGPLAN, volume = "46", number = "6", pages = "258--269", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Xu:2011:LHP, author = "Guoqing Xu and Michael D. Bond and Feng Qin and Atanas Rountev", title = "{LeakChaser}: helping programmers narrow down causes of memory leaks", journal = j-SIGPLAN, volume = "46", number = "6", pages = "270--282", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993530", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yang:2011:FUB, author = "Xuejun Yang and Yang Chen and Eric Eide and John Regehr", title = "Finding and understanding bugs in {C} compilers", journal = j-SIGPLAN, volume = "46", number = "6", pages = "283--294", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993532", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compilers should be correct. To improve the quality of C compilers, we created Csmith, a randomized test-case generation tool, and spent three years using it to find compiler bugs. During this period we reported more than 325 previously unknown bugs to compiler developers. Every compiler we tested was found to crash and also to silently generate wrong code when presented with valid input. In this paper we present our compiler-testing tool and the results of our bug-hunting study. Our first contribution is to advance the state of the art in compiler testing. Unlike previous tools, Csmith generates programs that cover a large subset of C while avoiding the undefined and unspecified behaviors that would destroy its ability to automatically find wrong-code bugs. Our second contribution is a collection of qualitative and quantitative results about the bugs we have found in open-source C compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tristan:2011:EVG, author = "Jean-Baptiste Tristan and Paul Govereau and Greg Morrisett", title = "Evaluating value-graph translation validation for {LLVM}", journal = j-SIGPLAN, volume = "46", number = "6", pages = "295--305", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sevcik:2011:SOS, author = "Jaroslav Sevc{\'\i}k", title = "Safe optimisations for shared-memory concurrent programs", journal = j-SIGPLAN, volume = "46", number = "6", pages = "306--316", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Harris:2011:STT, author = "William R. Harris and Sumit Gulwani", title = "Spreadsheet table transformations from examples", journal = j-SIGPLAN, volume = "46", number = "6", pages = "317--328", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Meng:2011:SEG, author = "Na Meng and Miryung Kim and Kathryn S. McKinley", title = "Systematic editing: generating program transformations from an example", journal = j-SIGPLAN, volume = "46", number = "6", pages = "329--342", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Srivastava:2011:SPO, author = "Varun Srivastava and Michael D. Bond and Kathryn S. McKinley and Vitaly Shmatikov", title = "A security policy oracle: detecting security holes using multiple {API} implementations", journal = j-SIGPLAN, volume = "46", number = "6", pages = "343--354", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ansel:2011:LIS, author = "Jason Ansel and Petr Marchenko and Ulfar Erlingsson and Elijah Taylor and Brad Chen and Derek L. Schuff and David Sehr and Cliff L. Biffle and Bennet Yee", title = "Language-independent sandboxing of just-in-time compilation and self-modifying code", journal = j-SIGPLAN, volume = "46", number = "6", pages = "355--366", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993540", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zeng:2011:CCH, author = "Qiang Zeng and Dinghao Wu and Peng Liu", title = "{Cruiser}: concurrent heap buffer overflow monitoring using lock-free data structures", journal = j-SIGPLAN, volume = "46", number = "6", pages = "367--377", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lucia:2011:IUC, author = "Brandon Lucia and Benjamin P. Wood and Luis Ceze", title = "Isolating and understanding concurrency errors using reconstructed execution fragments", journal = j-SIGPLAN, volume = "46", number = "6", pages = "378--388", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993543", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jin:2011:AAV, author = "Guoliang Jin and Linhai Song and Wei Zhang and Shan Lu and Ben Liblit", title = "Automated atomicity-violation fixing", journal = j-SIGPLAN, volume = "46", number = "6", pages = "389--400", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993544", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Burnim:2011:NRC, author = "Jacob Burnim and Tayfun Elmas and George Necula and Koushik Sen", title = "{NDSeq}: runtime checking for nondeterministic sequential specifications of parallel correctness", journal = j-SIGPLAN, volume = "46", number = "6", pages = "401--414", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jin:2011:GCM, author = "Dongyun Jin and Patrick O'Neil Meredith and Dennis Griffith and Grigore Rosu", title = "Garbage collection for monitoring parametric properties", journal = j-SIGPLAN, volume = "46", number = "6", pages = "415--424", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993547", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Parr:2011:LFA, author = "Terence Parr and Kathleen Fisher", title = "{LL(*)}: the foundation of the {ANTLR} parser generator", journal = j-SIGPLAN, volume = "46", number = "6", pages = "425--436", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the power of Parser Expression Grammars (PEGs) and GLR, parsing is not a solved problem. Adding nondeterminism (parser speculation) to traditional LL and LR parsers can lead to unexpected parse-time behavior and introduces practical issues with error handling, single-step debugging, and side-effecting embedded grammar actions. This paper introduces the LL(*) parsing strategy and an associated grammar analysis algorithm that constructs LL(*) parsing decisions from ANTLR grammars. At parse-time, decisions gracefully throttle up from conventional fixed $ k > = 1 $ lookahead to arbitrary lookahead and, finally, fail over to backtracking depending on the complexity of the parsing decision and the input symbols. LL(*) parsing strength reaches into the context-sensitive languages, in some cases beyond what GLR and PEGs can express. By statically removing as much speculation as possible, LL(*) provides the expressivity of PEGs while retaining LL's good error handling and unrestricted grammar actions. Widespread use of ANTLR (over 70,000 downloads/year) shows that it is effective for a wide variety of applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jose:2011:CCC, author = "Manu Jose and Rupak Majumdar", title = "Cause clue clauses: error localization using maximum satisfiability", journal = j-SIGPLAN, volume = "46", number = "6", pages = "437--446", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993550", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Budi:2011:AMA, author = "Aditya Budi and David Lo and Lingxiao Jiang and Lucia", title = "$ k b $-anonymity: a model for anonymized behaviour-preserving test and debugging data", journal = j-SIGPLAN, volume = "46", number = "6", pages = "447--457", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993551", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Garcia:2011:KRR, author = "Saturnino Garcia and Donghwan Jeon and Christopher M. Louie and Michael Bedford Taylor", title = "{Kremlin}: rethinking and rebooting {{\tt gprof}} for the multicore age", journal = j-SIGPLAN, volume = "46", number = "6", pages = "458--469", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993553", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many recent parallelization tools lower the barrier for parallelizing a program, but overlook one of the first questions that a programmer needs to answer: which parts of the program should I spend time parallelizing?\par This paper examines Kremlin, an automatic tool that, given a serial version of a program, will make recommendations to the user as to what regions (e.g. loops or functions) of the program to attack first. Kremlin introduces a novel hierarchical critical path analysis and develops a new metric for estimating the potential of parallelizing a region: self-parallelism. We further introduce the concept of a parallelism planner, which provides a ranked order of specific regions to the programmer that are likely to have the largest performance impact when parallelized. Kremlin supports multiple planner personalities, which allow the planner to more effectively target a particular programming environment or class of machine.\par We demonstrate the effectiveness of one such personality, an OpenMP planner, by comparing versions of programs that are parallelized according to Kremlin's plan against third-party manually parallelized versions. The results show that Kremlin's OpenMP planner is highly effective, producing plans whose performance is typically comparable to, and sometimes much better than, manual parallelization. At the same time, these plans would require that the user parallelize significantly fewer regions of the program.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sato:2011:APM, author = "Shigeyuki Sato and Hideya Iwasaki", title = "Automatic parallelization via matrix multiplication", journal = j-SIGPLAN, volume = "46", number = "6", pages = "470--479", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993554", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Udupa:2011:AEB, author = "Abhishek Udupa and Kaushik Rajan and William Thies", title = "{ALTER}: exploiting breakable dependences for parallelization", journal = j-SIGPLAN, volume = "46", number = "6", pages = "480--491", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993555", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Srivastava:2011:PBI, author = "Saurabh Srivastava and Sumit Gulwani and Swarat Chaudhuri and Jeffrey S. Foster", title = "Path-based inductive synthesis for program inversion", journal = j-SIGPLAN, volume = "46", number = "6", pages = "492--503", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Person:2011:DIS, author = "Suzette Person and Guowei Yang and Neha Rungta and Sarfraz Khurshid", title = "Directed incremental symbolic execution", journal = j-SIGPLAN, volume = "46", number = "6", pages = "504--515", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993558", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DElia:2011:MHC, author = "Daniele Cono D'Elia and Camil Demetrescu and Irene Finocchi", title = "Mining hot calling contexts in small space", journal = j-SIGPLAN, volume = "46", number = "6", pages = "516--527", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993559", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kim:2011:VSC, author = "Deokhwan Kim and Martin C. Rinard", title = "Verification of semantic commutativity conditions and inverse operations on linked data structures", journal = j-SIGPLAN, volume = "46", number = "6", pages = "528--541", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993561", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kulkarni:2011:ECL, author = "Milind Kulkarni and Donald Nguyen and Dimitrios Prountzos and Xin Sui and Keshav Pingali", title = "Exploiting the commutativity lattice", journal = j-SIGPLAN, volume = "46", number = "6", pages = "542--555", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993562", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Perez:2011:SLS, author = "Juan Antonio Navarro P{\'e}rez and Andrey Rybalchenko", title = "Separation logic $+$ superposition calculus $=$ heap theorem prover", journal = j-SIGPLAN, volume = "46", number = "6", pages = "556--566", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dillig:2011:PCM, author = "Isil Dillig and Thomas Dillig and Alex Aiken and Mooly Sagiv", title = "Precise and compact modular procedure summaries for heap manipulating programs", journal = j-SIGPLAN, volume = "46", number = "6", pages = "567--577", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993565", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bouajjani:2011:IPA, author = "Ahmed Bouajjani and Cezara Dragoi and Constantin Enea and Mihaela Sighireanu", title = "On inter-procedural analysis of programs with lists and data", journal = j-SIGPLAN, volume = "46", number = "6", pages = "578--589", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993566", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liang:2011:SAR, author = "Percy Liang and Mayur Naik", title = "Scaling abstraction refinement via pruning", journal = j-SIGPLAN, volume = "46", number = "6", pages = "590--601", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993567", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Altidor:2011:TWC, author = "John Altidor and Shan Shan Huang and Yannis Smaragdakis", title = "Taming the wildcards: combining definition- and use-site variance", journal = j-SIGPLAN, volume = "46", number = "6", pages = "602--613", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tate:2011:TWJ, author = "Ross Tate and Alan Leung and Sorin Lerner", title = "Taming wildcards in {Java}'s type system", journal = j-SIGPLAN, volume = "46", number = "6", pages = "614--627", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993570", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ziarek:2011:CAE, author = "Lukasz Ziarek and KC Sivaramakrishnan and Suresh Jagannathan", title = "Composable asynchronous events", journal = j-SIGPLAN, volume = "46", number = "6", pages = "628--639", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993572", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Best:2011:SST, author = "Micah J. Best and Shane Mottishaw and Craig Mustard and Mark Roth and Alexandra Fedorova and Andrew Brownsword", title = "Synchronization via scheduling: techniques for efficiently managing shared state", journal = j-SIGPLAN, volume = "46", number = "6", pages = "640--652", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993573", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bacon:2011:VAH, author = "David F. Bacon", title = "Virtualization in the age of heterogeneous machines", journal = j-SIGPLAN, volume = "46", number = "7", pages = "1--2", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952684", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Du:2011:PPV, author = "Jiaqing Du and Nipun Sehrawat and Willy Zwaenepoel", title = "Performance profiling of virtual machines", journal = j-SIGPLAN, volume = "46", number = "7", pages = "3--14", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952686", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nikolaev:2011:PXF, author = "Ruslan Nikolaev and Godmar Back", title = "{Perfctr-Xen}: a framework for performance counter virtualization", journal = j-SIGPLAN, volume = "46", number = "7", pages = "15--26", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952687", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhao:2011:DCC, author = "Qin Zhao and David Koh and Syed Raza and Derek Bruening and Weng-Fai Wong and Saman Amarasinghe", title = "Dynamic cache contention detection in multi-threaded applications", journal = j-SIGPLAN, volume = "46", number = "7", pages = "27--38", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952688", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wang:2011:RVM, author = "Kun Wang and Jia Rao and Cheng-Zhong Xu", title = "Rethink the virtual machine template", journal = j-SIGPLAN, volume = "46", number = "7", pages = "39--50", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952690", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cecchet:2011:DVD, author = "Emmanuel Cecchet and Rahul Singh and Upendra Sharma and Prashant Shenoy", title = "{Dolly}: virtualization-driven database provisioning for the cloud", journal = j-SIGPLAN, volume = "46", number = "7", pages = "51--62", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952691", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Le:2011:REV, author = "Michael Le and Yuval Tamir", title = "{ReHype}: enabling {VM} survival across hypervisor failures", journal = j-SIGPLAN, volume = "46", number = "7", pages = "63--74", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952692", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Park:2011:FSE, author = "Eunbyung Park and Bernhard Egger and Jaejin Lee", title = "Fast and space-efficient virtual machine checkpointing", journal = j-SIGPLAN, volume = "46", number = "7", pages = "75--86", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952694", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2011:FRC, author = "Irene Zhang and Alex Garthwaite and Yury Baskakov and Kenneth C. Barr", title = "Fast restore of checkpointed memory using working set estimation", journal = j-SIGPLAN, volume = "46", number = "7", pages = "87--98", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952695", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kourai:2011:FCP, author = "Kenichi Kourai", title = "Fast and correct performance recovery of operating systems using a virtual machine monitor", journal = j-SIGPLAN, volume = "46", number = "7", pages = "99--110", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952696", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Svard:2011:EDC, author = "Petter Sv{\"a}rd and Benoit Hudzia and Johan Tordsson and Erik Elmroth", title = "Evaluation of delta compression techniques for efficient live migration of large virtual machines", journal = j-SIGPLAN, volume = "46", number = "7", pages = "111--120", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952698", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wood:2011:CDP, author = "Timothy Wood and K. K. Ramakrishnan and Prashant Shenoy and Jacobus van der Merwe", title = "{CloudNet}: dynamic pooling of cloud resources by live {WAN} migration of virtual machines", journal = j-SIGPLAN, volume = "46", number = "7", pages = "121--132", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952699", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zheng:2011:WAL, author = "Jie Zheng and Tze Sing Eugene Ng and Kunwadee Sripanidkulchai", title = "Workload-aware live storage migration for clouds", journal = j-SIGPLAN, volume = "46", number = "7", pages = "133--144", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952700", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Litty:2011:PAI, author = "Lionel Litty and David Lie", title = "Patch auditing in infrastructure as a service clouds", journal = j-SIGPLAN, volume = "46", number = "7", pages = "145--156", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952702", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Payer:2011:FGU, author = "Mathias Payer and Thomas R. Gross", title = "Fine-grained user-space security through virtualization", journal = j-SIGPLAN, volume = "46", number = "7", pages = "157--168", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952703", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lange:2011:MOV, author = "John R. Lange and Kevin Pedretti and Peter Dinda and Patrick G. Bridges and Chang Bae and Philip Soltero and Alexander Merritt", title = "Minimal-overhead virtualization of a large scale supercomputer", journal = j-SIGPLAN, volume = "46", number = "7", pages = "169--180", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952705", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Xia:2011:VWB, author = "Lei Xia and Sanjay Kumar and Xue Yang and Praveen Gopalakrishnan and York Liu and Sebastian Schoenberg and Xingang Guo", title = "Virtual {WiFi}: bring virtualization from wired to wireless", journal = j-SIGPLAN, volume = "46", number = "7", pages = "181--192", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952706", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lange:2011:SSV, author = "John R. Lange and Peter Dinda", title = "{SymCall}: symbiotic virtualization through {VMM}-to-guest upcalls", journal = j-SIGPLAN, volume = "46", number = "7", pages = "193--204", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952707", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Williams:2011:OHM, author = "Dan Williams and Hani Jamjoom and Yew-Huey Liu and Hakim Weatherspoon", title = "{Overdriver}: handling memory overload in an oversubscribed cloud", journal = j-SIGPLAN, volume = "46", number = "7", pages = "205--216", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952709", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wang:2011:SHS, author = "Xiaolin Wang and Jiarui Zang and Zhenlin Wang and Yingwei Luo and Xiaoming Li", title = "Selective hardware\slash software memory virtualization", journal = j-SIGPLAN, volume = "46", number = "7", pages = "217--226", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952710", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Roy:2011:HBR, author = "Amitabha Roy and Steven Hand and Tim Harris", title = "Hybrid binary rewriting for memory access instrumentation", journal = j-SIGPLAN, volume = "46", number = "7", pages = "227--238", month = jul, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2007477.1952711", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 16 10:02:34 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Larus:2011:PC, author = "James R. Larus", title = "Programming the cloud", journal = j-SIGPLAN, volume = "46", number = "8", pages = "1--2", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941555", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Client + cloud computing is a disruptive, new computing platform, combining diverse client devices --- PCs, smartphones, sensors, and single-function and embedded devices --- with the unlimited, on-demand computation and data storage offered by cloud computing services such as Amazon's AWS or Microsoft's Windows Azure. As with every advance in computing, programming is a fundamental challenge as client + cloud computing combines many difficult aspects of software development. Systems built for this world are inherently parallel and distributed, run on unreliable hardware, and must be continually available --- a challenging programming model for even the most skilled programmers. How then do ordinary programmers develop software for the Cloud? This talk presents one answer, Orleans, a software framework for building client + cloud applications. Orleans encourages use of simple concurrency patterns that are easy to understand and implement correctly, building on an actor-like model with declarative specification of persistence, replication, and consistency and using lightweight transactions to support the development of reliable and scalable client + cloud software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hassaan:2011:OVU, author = "Muhammad Amber Hassaan and Martin Burtscher and Keshav Pingali", title = "Ordered vs. unordered: a comparison of parallelism and work-efficiency in irregular algorithms", journal = j-SIGPLAN, volume = "46", number = "8", pages = "3--12", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Outside of computational science, most problems are formulated in terms of irregular data structures such as graphs, trees and sets. Unfortunately, we understand relatively little about the structure of parallelism and locality in irregular algorithms. In this paper, we study multiple algorithms for four such problems: discrete-event simulation, single-source shortest path, breadth-first search, and minimal spanning trees. We show that the algorithms can be classified into two categories that we call unordered and ordered, and demonstrate experimentally that there is a trade-off between parallelism and work efficiency: unordered algorithms usually have more parallelism than their ordered counterparts for the same problem, but they may also perform more work. Nevertheless, our experimental results show that unordered algorithms typically lead to more scalable implementations, demonstrating that less work-efficient irregular algorithms may be better for parallel execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bauer:2011:PMH, author = "Michael Bauer and John Clark and Eric Schkufza and Alex Aiken", title = "Programming the memory hierarchy revisited: supporting irregular parallelism in {Sequoia}", journal = j-SIGPLAN, volume = "46", number = "8", pages = "13--24", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941558", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We describe two novel constructs for programming parallel machines with multi-level memory hierarchies: call-up, which allows a child task to invoke computation on its parent, and spawn, which spawns a dynamically determined number of parallel children until some termination condition in the parent is met. Together we show that these constructs allow applications with irregular parallelism to be programmed in a straightforward manner, and furthermore these constructs complement and can be combined with constructs for expressing regular parallelism. We have implemented spawn and call-up in Sequoia and we present an experimental evaluation on a number of irregular applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Murarasu:2011:CDS, author = "Alin Murarasu and Josef Weidendorfer and Gerrit Buse and Daniel Butnaru and Dirk Pfl{\"u}ger", title = "Compact data structure and scalable algorithms for the sparse grid technique", journal = j-SIGPLAN, volume = "46", number = "8", pages = "25--34", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941559", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The sparse grid discretization technique enables a compressed representation of higher-dimensional functions. In its original form, it relies heavily on recursion and complex data structures, thus being far from well-suited for GPUs. In this paper, we describe optimizations that enable us to implement compression and decompression, the crucial sparse grid algorithms for our application, on Nvidia GPUs. The main idea consists of a bijective mapping between the set of points in a multi-dimensional sparse grid and a set of consecutive natural numbers. The resulting data structure consumes a minimum amount of memory. For a 10-dimensional sparse grid with approximately 127 million points, it consumes up to 30 times less memory than trees or hash tables which are typically used. Compared to a sequential CPU implementation, the speedups achieved on GPU are up to 17 for compression and up to 70 for decompression, respectively. We show that the optimizations are also applicable to multicore CPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chafi:2011:DSA, author = "Hassan Chafi and Arvind K. Sujeeth and Kevin J. Brown and HyoukJoong Lee and Anand R. Atreya and Kunle Olukotun", title = "A domain-specific approach to heterogeneous parallelism", journal = j-SIGPLAN, volume = "46", number = "8", pages = "35--46", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941561", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Exploiting heterogeneous parallel hardware currently requires mapping application code to multiple disparate programming models. Unfortunately, general-purpose programming models available today can yield high performance but are too low-level to be accessible to the average programmer. We propose leveraging domain-specific languages (DSLs) to map high-level application code to heterogeneous devices. To demonstrate the potential of this approach we present OptiML, a DSL for machine learning. OptiML programs are implicitly parallel and can achieve high performance on heterogeneous hardware with no modification required to the source code. For such a DSL-based approach to be tractable at large scales, better tools are required for DSL authors to simplify language creation and parallelization. To address this concern, we introduce Delite, a system designed specifically for DSLs that is both a framework for creating an implicitly parallel DSL as well as a dynamic runtime providing automated targeting to heterogeneous parallel hardware. We show that OptiML running on Delite achieves single-threaded, parallel, and GPU performance superior to explicitly parallelized MATLAB code in nearly all cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Catanzaro:2011:CCE, author = "Bryan Catanzaro and Michael Garland and Kurt Keutzer", title = "{Copperhead}: compiling an embedded data parallel language", journal = j-SIGPLAN, volume = "46", number = "8", pages = "47--56", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941562", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Modern parallel microprocessors deliver high performance on applications that expose substantial fine-grained data parallelism. Although data parallelism is widely available in many computations, implementing data parallel algorithms in low-level languages is often an unnecessarily difficult task. The characteristics of parallel microprocessors and the limitations of current programming methodologies motivate our design of Copperhead, a high-level data parallel language embedded in Python. The Copperhead programmer describes parallel computations via composition of familiar data parallel primitives supporting both flat and nested data parallel computation on arrays of data. Copperhead programs are expressed in a subset of the widely used Python programming language and interoperate with standard Python modules, including libraries for numeric computation, data visualization, and analysis. In this paper, we discuss the language, compiler, and runtime features that enable Copperhead to efficiently execute data parallel code. We define the restricted subset of Python which Copperhead supports and introduce the program analysis techniques necessary for compiling Copperhead code into efficient low-level implementations. We also outline the runtime support by which Copperhead programs interoperate with standard Python modules. We demonstrate the effectiveness of our techniques with several examples targeting the CUDA platform for parallel programming on GPUs. Copperhead code is concise, on average requiring 3.6 times fewer lines of code than CUDA, and the compiler generates efficient code, yielding 45-100\% of the performance of hand-crafted, well optimized CUDA code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jenista:2011:OSO, author = "James Christopher Jenista and Yong hun Eom and Brian Charles Demsky", title = "{OoOJava}: software out-of-order execution", journal = j-SIGPLAN, volume = "46", number = "8", pages = "57--68", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Developing parallel software using current tools can be challenging. Even experts find it difficult to reason about the use of locks and often accidentally introduce race conditions and deadlocks into parallel software. OoOJava is a compiler-assisted approach that leverages developer annotations along with static analysis to provide an easy-to-use deterministic parallel programming model. OoOJava extends Java with a task annotation that instructs the compiler to consider a code block for out-of-order execution. OoOJava executes tasks as soon as their data dependences are resolved and guarantees that the execution of an annotated program preserves the exact semantics of the original sequential program. We have implemented OoOJava and achieved an average speedup of 16.6x on our ten benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Feng:2011:SSP, author = "Min Feng and Rajiv Gupta and Yi Hu", title = "{SpiceC}: scalable parallelism via implicit copying and explicit commit", journal = j-SIGPLAN, volume = "46", number = "8", pages = "69--80", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941564", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In this paper we present an approach to parallel programming called SpiceC. SpiceC simplifies the task of parallel programming through a combination of an intuitive computation model and SpiceC directives. The SpiceC parallel computation model consists of multiple threads where every thread has a private space for data and all threads share data via a shared space. Each thread performs computations using its private space thus offering isolation which allows for speculative computations. SpiceC provides easy to use SpiceC compiler directives using which the programmers can express different forms of parallelism. It allows developers to express high level constraints on data transfers between spaces while the tedious task of generating the code for the data transfers is performed by the compiler. SpiceC also supports data transfers involving dynamic data structures without help from developers. SpiceC allows developers to create clusters of data to enable parallel data transfers. SpiceC programs are portable across modern chip multiprocessor based machines that may or may not support cache coherence. We have developed implementations of SpiceC for shared memory systems with and without cache coherence. We evaluate our implementation using seven benchmarks of which four are parallelized speculatively. Our compiler generated implementations achieve speedups ranging from 2x to 18x on a 24 core system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Negara:2011:IOT, author = "Stas Negara and Rajesh K. Karmani and Gul Agha", title = "Inferring ownership transfer for efficient message passing", journal = j-SIGPLAN, volume = "46", number = "8", pages = "81--90", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941566", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "One of the more popular paradigms for concurrent programming is the Actor model of message passing; it has been adopted in one form or another by a number of languages and frameworks. By avoiding a shared local state and instead relying on message passing, the Actor model facilitates modular programming. An important challenge for message passing languages is to transmit messages efficiently. This requires retaining the pass-by-value semantics of messages while avoiding making a deep copy on sequential or shared memory multicore processors. A key observation is that many messages have an ownership transfer semantics; such messages can be sent efficiently using pointers without introducing shared state between concurrent objects. We propose a conservative static analysis algorithm which infers if the content of a message is compatible with an ownership transfer semantics. Our tool, called SOTER (for Safe Ownership Transfer enablER) transforms the program to avoid the cost of copying the contents of a message whenever it can infer the content obeys the ownership transfer semantics. Experiments using a range of programs suggest that our conservative static analysis method is usually able to infer ownership transfer. Performance results demonstrate that the transformed programs execute up to an order of magnitude faster than the original programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Xiang:2011:AWP, author = "Xiaoya Xiang and Bin Bao and Tongxin Bai and Chen Ding and Trishul Chilimbi", title = "All-window profiling and composable models of cache sharing", journal = j-SIGPLAN, volume = "46", number = "8", pages = "91--102", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941567", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "As multi-core processors become commonplace and cloud computing is gaining acceptance, more applications are run in a shared cache environment. Cache sharing depends on a concept called footprint, which depends on all cache accesses not just cache misses. Previous work has recognized the importance of footprint but has not provided a method for accurate measurement, mainly because the complete measurement requires counting data access in all execution windows, which takes time quadratic in the length of a trace. The paper first presents an algorithm efficient enough for off-line use to approximately measure the footprint with a guaranteed precision. The cost of the analysis can be adjusted by changing the precision. Then the paper presents a composable model. For a set of programs, the model uses the all-window footprint of each program to predict its cache interference with other programs without running these programs together. The paper evaluates the efficiency of all-window profiling using the SPEC 2000 benchmarks and compares the footprint interference model with a miss-rate based model and with exhaustive testing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ding:2011:UUL, author = "Xiaoning Ding and Kaibo Wang and Xiaodong Zhang", title = "{ULCC}: a user-level facility for optimizing shared cache performance on multicores", journal = j-SIGPLAN, volume = "46", number = "8", pages = "103--112", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941568", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Scientific applications face serious performance challenges on multicore processors, one of which is caused by access contention in last level shared caches from multiple running threads. The contention increases the number of long latency memory accesses, and consequently increases application execution times. Optimizing shared cache performance is critical to reduce significantly execution times of multi-threaded programs on multicores. However, there are two unique problems to be solved before implementing cache optimization techniques on multicores at the user level. First, available cache space for each running thread in a last level cache is difficult to predict due to access contention in the shared space, which makes cache conscious algorithms for single cores ineffective on multicores. Second, at the user level, programmers are not able to allocate cache space at will to running threads in the shared cache, thus data sets with strong locality may not be allocated with sufficient cache space, and cache pollution can easily happen. To address these two critical issues, we have designed ULCC (User Level Cache Control), a software runtime library that enables programmers to explicitly manage and optimize last level cache usage by allocating proper cache space for different data sets of different threads. We have implemented ULCC at the user level based on a page-coloring technique for last level cache usage management. By means of multiple case studies on an Intel multicore processor, we show that with ULCC, scientific applications can achieve significant performance improvements by fully exploiting the benefit of cache optimization algorithms and by partitioning the cache space accordingly to protect frequently reused data sets and to avoid cache pollution. Our experiments with various applications show that ULCC can significantly improve application performance by nearly 40\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wu:2011:STB, author = "Xing Wu and Frank Mueller", title = "{ScalaExtrap}: trace-based communication extrapolation for {SPMD} programs", journal = j-SIGPLAN, volume = "46", number = "8", pages = "113--122", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Performance modeling for scientific applications is important for assessing potential application performance and systems procurement in high-performance computing (HPC). Recent progress on communication tracing opens up novel opportunities for communication modeling due to its lossless yet scalable trace collection. Estimating the impact of scaling on communication efficiency still remains non-trivial due to execution-time variations and exposure to hardware and software artifacts. This work contributes a fundamentally novel modeling scheme. We synthetically generate the application trace for large numbers of nodes by extrapolation from a set of smaller traces. We devise an innovative approach for topology extrapolation of single program, multiple data (SPMD) codes with stencil or mesh communication. The extrapolated trace can subsequently be (a) replayed to assess communication requirements before porting an application, (b) transformed to auto-generate communication benchmarks for various target platforms, and (c) analyzed to detect communication inefficiencies and scalability limitations. To the best of our knowledge, rapidly obtaining the communication behavior of parallel applications at arbitrary scale with the availability of timed replay, yet without actual execution of the application at this scale is without precedence and has the potential to enable otherwise infeasible system simulation at the exascale level.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{McKinley:2011:HPC, author = "Kathryn S. McKinley", title = "How's the parallel computing revolution going?", journal = j-SIGPLAN, volume = "46", number = "8", pages = "123--124", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941571", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Two trends changed the computing landscape over the past decade: (1) hardware vendors started delivering chip multiprocessors (CMPs) instead of uniprocessors, and (2) software developers increasingly chose managed languages instead of native languages. Unfortunately, the former change is disrupting the virtuous-cycle between performance improvements and software innovation. Establishing a new parallel performance virtuous cycle for managed languages will require scalable applications executing on scalable Virtual Machine (VM) services, since the VM schedules, monitors, compiles, optimizes, garbage collects, and executes together with the application. This talk describes current progress, opportunities, and challenges for scalable VM services. The parallel computing revolution urgently needs more innovations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Karmani:2011:TCS, author = "Rajesh K. Karmani and P. Madhusudan and Brandon M. Moore", title = "Thread contracts for safe parallelism", journal = j-SIGPLAN, volume = "46", number = "8", pages = "125--134", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941573", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We build a framework of thread contracts, called Accord, that allows programmers to annotate their concurrency co-ordination strategies. Accord annotations allow programmers to declaratively specify the parts of memory that a thread may read or write into, and the locks that protect them, reflecting the concurrency co-ordination among threads and the reason why the program is free of data-races. We provide automatic tools to check if the concurrency co-ordination strategy ensures race-freedom, using constraint-solvers (SMT solvers). Hence programmers using Accord can both formally state and prove their co-ordination strategies ensure race freedom. The programmer's implementation of the co-ordination strategy may however be correct or incorrect. We show how the formal Accord contracts allow us to automatically insert runtime assertions that serve to check, during testing, whether the implementation conforms to the contract. Using a large class of data-parallel programs that share memory in intricate ways, we show that natural and simple contracts suffice to document the co-ordination strategy amongst threads, and that the task of showing that the strategy ensures race-freedom can be handled efficiently and automatically by an existing SMT solver (Z3). While co-ordination strategies can be proved race-free in our framework, failure to prove the co-ordination strategy race-free, accompanied by counter-examples produced by the solver, indicates the presence of races. Using such counterexamples, we report hitherto undiscovered data-races that we found in the long-tested {\tt applu\_l} benchmark in the Spec OMP2001 suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zheng:2011:GLO, author = "Mai Zheng and Vignesh T. Ravi and Feng Qin and Gagan Agrawal", title = "{GRace}: a low-overhead mechanism for detecting data races in {GPU} programs", journal = j-SIGPLAN, volume = "46", number = "8", pages = "135--146", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941574", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In recent years, GPUs have emerged as an extremely cost-effective means for achieving high performance. Many application developers, including those with no prior parallel programming experience, are now trying to scale their applications using GPUs. While languages like CUDA and OpenCL have eased GPU programming for non-graphical applications, they are still explicitly parallel languages. All parallel programmers, particularly the novices, need tools that can help ensuring the correctness of their programs. Like any multithreaded environment, data races on GPUs can severely affect the program reliability. Thus, tool support for detecting race conditions can significantly benefit GPU application developers. Existing approaches for detecting data races on CPUs or GPUs have one or more of the following limitations: (1) being ill-suited for handling non-lock synchronization primitives on GPUs; (2) lacking of scalability due to the state explosion problem; (3) reporting many false positives because of simplified modeling; and/or (4) incurring prohibitive runtime and space overhead. In this paper, we propose GRace, a new mechanism for detecting races in GPU programs that combines static analysis with a carefully designed dynamic checker for logging and analyzing information at runtime. Our design utilizes GPUs memory hierarchy to log runtime data accesses efficiently. To improve the performance, GRace leverages static analysis to reduce the number of statements that need to be instrumented. Additionally, by exploiting the knowledge of thread scheduling and the execution model in the underlying GPUs, GRace can accurately detect data races with no false positives reported. Based on the above idea, we have built a prototype of GRace with two schemes, i.e., GRace-stmt and GRace-addr, for NVIDIA GPUs. Both schemes are integrated with the same static analysis. We have evaluated GRace-stmt and GRace-addr with three data race bugs in three GPU kernel functions and also have compared them with the existing approach, referred to as B-tool. Our experimental results show that both schemes of GRace are effective in detecting all evaluated cases with no false positives, whereas Btool reports many false positives for one evaluated case. On the one hand, GRace-addr incurs low runtime overhead, i.e., 22-116\%, and low space overhead, i.e., 9-18MB, for the evaluated kernels. On the other hand, GRace-stmt offers more help in diagnosing data races with larger overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yi:2011:CRP, author = "Jaeheon Yi and Caitlin Sadowski and Cormac Flanagan", title = "Cooperative reasoning for preemptive execution", journal = j-SIGPLAN, volume = "46", number = "8", pages = "147--156", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941575", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We propose a cooperative methodology for multithreaded software, where threads use traditional synchronization idioms such as locks, but additionally document each point of potential thread interference with a ``yield'' annotation. Under this methodology, code between two successive yield annotations forms a serializable transaction that is amenable to sequential reasoning. This methodology reduces the burden of reasoning about thread interleavings by indicating only those interference points that matter. We present experimental results showing that very few yield annotations are required, typically one or two per thousand lines of code. We also present dynamic analysis algorithms for detecting cooperability violations, where thread interference is not documented by a yield, and for yield annotation inference for legacy software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lesani:2011:CMT, author = "Mohsen Lesani and Jens Palsberg", title = "Communicating memory transactions", journal = j-SIGPLAN, volume = "46", number = "8", pages = "157--168", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941577", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Many concurrent programming models enable both transactional memory and message passing. For such models, researchers have built increasingly efficient implementations and defined reasonable correctness criteria, while it remains an open problem to obtain the best of both worlds. We present a programming model that is the first to have opaque transactions, safe asynchronous message passing, and an efficient implementation. Our semantics uses tentative message passing and keeps track of dependencies to enable undo of message passing in case a transaction aborts. We can program communication idioms such as barrier and rendezvous that do not deadlock when used in an atomic block. Our experiments show that our model adds little overhead to pure transactions, and that it is significantly more efficient than Transactional Events. We use a novel definition of safe message passing that may be of independent interest.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Luchangco:2011:TCE, author = "Victor Luchangco and Virendra J. Marathe", title = "Transaction communicators: enabling cooperation among concurrent transactions", journal = j-SIGPLAN, volume = "46", number = "8", pages = "169--178", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941578", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In this paper, we propose to extend transactional memory with transaction communicators, special objects through which concurrent transactions can communicate: changes by one transaction to a communicator can be seen by concurrent transactions before the first transaction commits. Although isolation of transactions is compromised by such communication, we constrain the effects of this compromise by tracking dependencies among transactions, and preventing any transaction from committing unless every transaction whose changes it saw also commits. In particular, mutually dependent transactions must commit or abort together, and transactions that do not communicate remain isolated. To help programmers synchronize accesses to communicators, we also provide special communicator-isolating transactions, which ensure isolation even for accesses to communicators. We propose language features to help programmers express the communicator constructs. We implemented a novel communicators-enabled STM runtime in the Maxine VM. Our preliminary evaluation demonstrates that communicators can be used in diverse settings to improve the performance of transactional programs, and to empower programmers with the ability to safely express within transactions important programming idioms that fundamentally require compromise of transaction isolation (e.g., CSP-style synchronous communication).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fernandes:2011:LFS, author = "S{\'e}rgio Miguel Fernandes and Jo{\~a}o Cachopo", title = "Lock-free and scalable multi-version software transactional memory", journal = j-SIGPLAN, volume = "46", number = "8", pages = "179--188", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941579", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Software Transactional Memory (STM) was initially proposed as a lock-free mechanism for concurrency control. Early implementations had efficiency limitations, and soon obstruction-free proposals appeared, to tackle this problem, often simplifying STM implementation. Today, most of the modern and top-performing STMs use blocking designs, relying on locks to ensure an atomic commit operation. This approach has revealed better in practice, in part due to its simplicity. Yet, it may have scalability problems when we move into many-core computers, requiring fine-tuning and careful programming to avoid contention. In this paper we present and discuss the modifications we made to a lock-based multi-version STM in Java, to turn it into a lock-free implementation that we have tested to scale at least up to 192 cores, and which provides results that compete with, and sometimes exceed, some of today's top-performing lock-based implementations. The new lock-free commit algorithm allows write transactions to proceed in parallel, by allowing them to run their validation phase independently of each other, and by resorting to helping from threads that would otherwise be waiting to commit, during the write-back phase. We also present a new garbage collection algorithm to dispose of old unused object versions that allows for asynchronous identification of unnecessary versions, which minimizes its interference with the rest of the transactional system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tian:2011:ESP, author = "Chen Tian and Changhui Lin and Min Feng and Rajiv Gupta", title = "Enhanced speculative parallelization via incremental recovery", journal = j-SIGPLAN, volume = "46", number = "8", pages = "189--200", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941580", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The widespread availability of multicore systems has led to an increased interest in speculative parallelization of sequential programs using software-based thread level speculation. Many of the proposed techniques are implemented via state separation where non-speculative computation state is maintained separately from the speculative state of threads performing speculative computations. If speculation is successful, the results from speculative state are committed to non-speculative state. However, upon misspeculation, discard-all scheme is employed in which speculatively computed results of a thread are discarded and the computation is performed again. While this scheme is simple to implement, one disadvantage of discard-all is its inability to tolerate high misspeculation rates due to its high runtime overhead. Thus, it is not suitable for use in applications where misspeculation rates are input dependent and therefore may reach high levels. In this paper we develop an approach for incremental recovery in which, instead of discarding all of the results and reexecuting the speculative computation in its entirety, the computation is restarted from the earliest point at which a misspeculation causing value is read. This approach has two advantages. First, the cost of recovery is reduced as only part of the computation is reexecuted. Second, since recovery takes less time, the likelihood of future misspeculations is reduced. We design and implement a strategy for implementing incremental recovery that allows results of partial computations to be efficiently saved and reused. For a set of programs where misspeculation rate is input dependent, our experiments show that with inputs that result in misspeculation rates of around 40\% and 80\%, applying incremental recovery technique results in 1.2x-3.3x and 2.0x-6.6x speedups respectively over the discard-all recovery scheme. Furthermore, misspeculations observed during discard-all scheme are reduced when incremental recovery is employed --- reductions range from 10\% to 85\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Saraswat:2011:LBG, author = "Vijay A. Saraswat and Prabhanjan Kambadur and Sreedhar Kodali and David Grove and Sriram Krishnamoorthy", title = "Lifeline-based global load balancing", journal = j-SIGPLAN, volume = "46", number = "8", pages = "201--212", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941582", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "{On shared-memory systems, Cilk-style work-stealing has been used to effectively parallelize irregular task-graph based applications such as Unbalanced Tree Search (UTS). There are two main difficulties in extending this approach to distributed memory. In the shared memory approach, thieves (nodes without work) constantly attempt to asynchronously steal work from randomly chosen victims until they find work. In distributed memory, thieves cannot autonomously steal work from a victim without disrupting its execution. When work is sparse, this results in performance degradation. In essence, a direct extension of traditional work-stealing to distributed memory violates the work-first principle underlying work-stealing. Further, thieves spend useless CPU cycles attacking victims that have no work, resulting in system inefficiencies in multi-programmed contexts. Second, it is non-trivial to detect active distributed termination (detect that programs at all nodes are looking for work, hence there is no work). This problem is well-studied and requires careful design for good performance. Unfortunately, in most existing languages/frameworks, application developers are forced to implement their own distributed termination detection. In this paper, we develop a simple set of ideas that allow work-stealing to be efficiently extended to distributed memory. First, we introduce lifeline graphs: low-degree, low-diameter, fully connected directed graphs. Such graphs can be constructed from k -dimensional hypercubes. When a node is unable to find work after w unsuccessful steals, it quiesces after informing the outgoing edges in its lifeline graph. Quiescent nodes do not disturb other nodes. A quiesced node is reactivated when work arrives from a lifeline and itself shares this work with those of its incoming lifelines that are activated. Termination occurs precisely when computation at all nodes has quiesced. In a language such as X10, such passive distributed termination can be detected automatically using the finish construct --- no application code is necessary. Our design is implemented in a few hundred lines of X10. On the binomial tree described in Olivier:08}, the program achieve 87\% efficiency on an Infiniband cluster of 1024 Power7 cores, with a peak throughput of 2.37 GNodes/sec. It achieves 87\% efficiency on a Blue Gene/P with 2048 processors, and a peak throughput of 0.966 GNodes/s. All numbers are relative to single core sequential performance. This implementation has been refactored into a reusable global load balancing framework. Applications can use this framework to obtain global load balance with minimal code changes. In summary, we claim: (a) the first formulation of UTS that does not involve application level global termination detection, (b) the introduction of lifeline graphs to reduce failed steals (c) the demonstration of simple lifeline graphs based on k-hypercubes, (d) performance with superior efficiency (or the same efficiency but over a wider range) than published results on UTS. In particular, our framework can deliver the same or better performance as an unrestricted random work-stealing implementation, while reducing the number of attempted steals.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wang:2011:CSP, author = "Zhaoguo Wang and Ran Liu and Yufei Chen and Xi Wu and Haibo Chen and Weihua Zhang and Binyu Zang", title = "{COREMU}: a scalable and portable parallel full-system emulator", journal = j-SIGPLAN, volume = "46", number = "8", pages = "213--222", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941583", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "This paper presents the open-source COREMU, a scalable and portable parallel emulation framework that decouples the complexity of parallelizing full-system emulators from building a mature sequential one. The key observation is that CPU cores and devices in current (and likely future) multiprocessors are loosely-coupled and communicate through well-defined interfaces. Based on this observation, COREMU emulates multiple cores by creating multiple instances of existing sequential emulators, and uses a thin library layer to handle the inter-core and device communication and synchronization, to maintain a consistent view of system resources. COREMU also incorporates lightweight memory transactions, feedback-directed scheduling, lazy code invalidation and adaptive signal control to provide scalable performance. To make COREMU useful in practice, we also provide some preliminary tools and APIs that can help programmers to diagnose performance problems and (concurrency) bugs. A working prototype, which reuses the widely-used QEMU as the sequential emulator, is with only 2500 lines of code (LOCs) changes to QEMU. It currently supports x64 and ARM platforms, and can emulates up to 255 cores running commodity OSes with practical performance, while QEMU cannot scale above 32 cores. A set of performance evaluation against QEMU indicates that, COREMU has negligible uniprocessor emulation overhead, performs and scales significantly better than QEMU. We also show how COREMU could be used to diagnose performance problems and concurrency bugs of both OS kernel and parallel applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kogan:2011:WFQ, author = "Alex Kogan and Erez Petrank", title = "Wait-free queues with multiple enqueuers and dequeuers", journal = j-SIGPLAN, volume = "46", number = "8", pages = "223--234", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941585", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The queue data structure is fundamental and ubiquitous. Lock-free versions of the queue are well known. However, an important open question is whether practical wait-free queues exist. Until now, only versions with limited concurrency were proposed. In this paper we provide a design for a practical wait-free queue. Our construction is based on the highly efficient lock-free queue of Michael and Scott. To achieve wait-freedom, we employ a priority-based helping scheme in which faster threads help the slower peers to complete their pending operations. We have implemented our scheme on multicore machines and present performance measurements comparing our implementation with that of Michael and Scott in several system configurations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tanase:2011:SPC, author = "Gabriel Tanase and Antal Buss and Adam Fidel and Harshvardhan Harshvardhan and Ioannis Papadopoulos and Olga Pearce and Timmie Smith and Nathan Thomas and Xiabing Xu and Nedal Mourad and Jeremy Vu and Mauro Bianco and Nancy M. Amato and Lawrence Rauchwerger", title = "The {STAPL} parallel container framework", journal = j-SIGPLAN, volume = "46", number = "8", pages = "235--246", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941586", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The Standard Template Adaptive Parallel Library (STAPL) is a parallel programming infrastructure that extends C++ with support for parallelism. It includes a collection of distributed data structures called pContainers that are thread-safe, concurrent objects, i.e., shared objects that provide parallel methods that can be invoked concurrently. In this work, we present the STAPL Parallel Container Framework (PCF), that is designed to facilitate the development of generic parallel containers. We introduce a set of concepts and a methodology for assembling a pContainer from existing sequential or parallel containers, without requiring the programmer to deal with concurrency or data distribution issues. The PCF provides a large number of basic parallel data structures (e.g., pArray, pList, pVector, pMatrix, pGraph, pMap, pSet). The PCF provides a class hierarchy and a composition mechanism that allows users to extend and customize the current container base for improved application expressivity and performance. We evaluate STAPL pContainer performance on a CRAY XT4 massively parallel system and show that pContainer methods, generic pAlgorithms, and different applications provide good scalability on more than 16,000 processors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kourtis:2011:CEC, author = "Kornilios Kourtis and Vasileios Karakasis and Georgios Goumas and Nectarios Koziris", title = "{CSX}: an extended compression format for {SpMV} on shared memory systems", journal = j-SIGPLAN, volume = "46", number = "8", pages = "247--256", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941587", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The Sparse Matrix-Vector multiplication (SpMV) kernel scales poorly on shared memory systems with multiple processing units due to the streaming nature of its data access pattern. Previous research has demonstrated that an effective strategy to improve the kernel's performance is to drastically reduce the data volume involved in the computations. Since the storage formats for sparse matrices include metadata describing the structure of non-zero elements within the matrix, we propose a generalized approach to compress metadata by exploiting substructures within the matrix. We call the proposed storage format Compressed Sparse eXtended (CSX). In our implementation we employ runtime code generation to construct specialized SpMV routines for each matrix. Experimental evaluation on two shared memory systems for 15 sparse matrices demonstrates significant performance gains as the number of participating cores increases. Regarding the cost of CSX construction, we propose several strategies which trade performance for preprocessing cost making CSX applicable both to online and offline preprocessing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dotsenko:2011:ATF, author = "Yuri Dotsenko and Sara S. Baghsorkhi and Brandon Lloyd and Naga K. Govindaraju", title = "Auto-tuning of {Fast Fourier Transform} on graphics processors", journal = j-SIGPLAN, volume = "46", number = "8", pages = "257--266", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941589", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We present an auto-tuning framework for FFTs on graphics processors (GPUs). Due to complex design of the memory and compute subsystems on GPUs, the performance of FFT kernels over the range of possible input parameters can vary widely. We generate several variants for each component of the FFT kernel that, for different cases, are likely to perform well. Our auto-tuner composes variants to generate kernels and selects the best ones. We present heuristics to prune the search space and profile only a small fraction of all possible kernels. We compose optimized kernels to improve the performance of larger FFT computations. We implement the system using the NVIDIA CUDA API and compare its performance to the state-of-the-art FFT libraries. On a range of NVIDIA GPUs and input sizes, our auto-tuned FFTs outperform the NVIDIA CUFFT 3.0 library by up to 38x and deliver up to 3x higher performance compared to a manually-tuned FFT.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hong:2011:ACG, author = "Sungpack Hong and Sang Kyun Kim and Tayo Oguntebi and Kunle Olukotun", title = "Accelerating {CUDA} graph algorithms at maximum warp", journal = j-SIGPLAN, volume = "46", number = "8", pages = "267--276", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941590", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Graphs are powerful data representations favored in many computational domains. Modern GPUs have recently shown promising results in accelerating computationally challenging graph problems but their performance suffered heavily when the graph structure is highly irregular, as most real-world graphs tend to be. In this study, we first observe that the poor performance is caused by work imbalance and is an artifact of a discrepancy between the GPU programming model and the underlying GPU architecture.We then propose a novel virtual warp-centric programming method that exposes the traits of underlying GPU architectures to users. Our method significantly improves the performance of applications with heavily imbalanced workloads, and enables trade-offs between workload imbalance and ALU underutilization for fine-tuning the performance. Our evaluation reveals that our method exhibits up to 9x speedup over previous GPU algorithms and 12x over single thread CPU execution on irregular graphs. When properly configured, it also yields up to 30\% improvement over previous GPU algorithms on regular graphs. In addition to performance gains on graph algorithms, our programming method achieves 1.3x to 15.1x speedup on a set of GPU benchmark applications. Our study also confirms that the performance gap between GPUs and other multi-threaded CPU graph implementations is primarily due to the large difference in memory bandwidth.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kim:2011:ASC, author = "Jungwon Kim and Honggyu Kim and Joo Hwan Lee and Jaejin Lee", title = "Achieving a single compute device image in {OpenCL} for multiple {GPUs}", journal = j-SIGPLAN, volume = "46", number = "8", pages = "277--288", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941591", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In this paper, we propose an OpenCL framework that combines multiple GPUs and treats them as a single compute device. Providing a single virtual compute device image to the user makes an OpenCL application written for a single GPU portable to the platform that has multiple GPU devices. It also makes the application exploit full computing power of the multiple GPU devices and the total amount of GPU memories available in the platform. Our OpenCL framework automatically distributes at run-time the OpenCL kernel written for a single GPU into multiple CUDA kernels that execute on the multiple GPU devices. It applies a run-time memory access range analysis to the kernel by performing a sampling run and identifies an optimal workload distribution for the kernel. To achieve a single compute device image, the runtime maintains virtual device memory that is allocated in the main memory. The OpenCL runtime treats the memory as if it were the memory of a single GPU device and keeps it consistent to the memories of the multiple GPU devices. Our OpenCL-C-to-C translator generates the sampling code from the OpenCL kernel code and OpenCL-C-to-CUDA-C translator generates the CUDA kernel code for the distributed OpenCL kernel. We show the effectiveness of our OpenCL framework by implementing the OpenCL runtime and two source-to-source translators. We evaluate its performance with a system that contains 8 GPUs using 11 OpenCL benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Prabhakar:2011:QAS, author = "Ramya Prabhakar and Shekhar Srikantaiah and Rajat Garg and Mahmut Kandemir", title = "{QoS} aware storage cache management in multi-server environments", journal = j-SIGPLAN, volume = "46", number = "8", pages = "289--290", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941593", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In this paper, we propose a novel two-step approach to the management of the storage caches to provide predictable performance in multi-server storage architectures: (1) An adaptive QoS decomposition and optimization step uses max-flow algorithm to determine the best decomposition of application-level QoS to sub-QoSs such that the application performance is optimized, and (2) A storage cache allocation step uses feedback control theory to allocate shared storage cache space such that the specified QoSs are satisfied throughout the execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Roy:2011:WAU, author = "Amitabha Roy and Steven Hand and Tim Harris", title = "Weak atomicity under the x86 memory consistency model", journal = j-SIGPLAN, volume = "46", number = "8", pages = "291--292", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941594", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We consider the problem of building a weakly atomic Software Transactional Memory (STM), that provides Single (Global) Lock Atomicity (SLA) while adhering to the x86 memory consistency model (x86-MM).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jeon:2011:KLG, author = "Donghwan Jeon and Saturnino Garcia and Chris Louie and Sravanthi Kota Venkata and Michael Bedford Taylor", title = "{Kremlin}: like {\tt gprof}, but for parallelization", journal = j-SIGPLAN, volume = "46", number = "8", pages = "293--294", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941595", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "This paper overviews Kremlin, a software profiling tool designed to assist the parallelization of serial programs. Kremlin accepts a serial source code, profiles it, and provides a list of regions that should be considered in parallelization. Unlike a typical profiler, Kremlin profiles not only work but also parallelism, which is accomplished via a novel technique called hierarchical critical path analysis. Our evaluation demonstrates that Kremlin is highly effective, resulting in a parallelized program whose performance sometimes outperforms, and is mostly comparable to, manual parallelization. At the same time, Kremlin would require that the user parallelize significantly fewer regions of the program. Finally, a user study suggests Kremlin is effective in improving the productivity of programmers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Strzodka:2011:TSM, author = "Robert Strzodka and Mohammed Shaheen and Dawid Pajak", title = "Time skewing made simple", journal = j-SIGPLAN, volume = "46", number = "8", pages = "295--296", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941596", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Time skewing and loop tiling has been known for a long time to be a highly beneficial acceleration technique for nested loops especially on bandwidth hungry multi-core processors, but it is little used in practice because efficient implementations utilize complicated code and simple or abstract ones show much smaller gains over naive nested loops. We break this dilemma with an essential time skewing scheme that is both compact and fast.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Grosset:2011:EGC, author = "Andre Vincent Pascal Grosset and Peihong Zhu and Shusen Liu and Suresh Venkatasubramanian and Mary Hall", title = "Evaluating graph coloring on {GPUs}", journal = j-SIGPLAN, volume = "46", number = "8", pages = "297--298", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941597", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "This paper evaluates features of graph coloring algorithms implemented on graphics processing units (GPUs), comparing coloring heuristics and thread decompositions. As compared to prior work on graph coloring for other parallel architectures, we find that the large number of cores and relatively high global memory bandwidth of a GPU lead to different strategies for the parallel implementation. Specifically, we find that a simple uniform block partitioning is very effective on GPUs and our parallel coloring heuristics lead to the same or fewer colors than prior approaches for distributed-memory cluster architecture. Our algorithm resolves many coloring conflicts across partitioned blocks on the GPU by iterating through the coloring process, before returning to the CPU to resolve remaining conflicts. With this approach we get as few color (if not fewer) than the best sequential graph coloring algorithm and performance is close to the fastest sequential graph coloring algorithms which have poor color quality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ding:2011:TEP, author = "Chen Ding", title = "Two examples of parallel programming without concurrency constructs {(PP-CC)}", journal = j-SIGPLAN, volume = "46", number = "8", pages = "299--300", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941598", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Stellwag:2011:WFN, author = "Philippe Stellwag and Fabian Scheler and Jakob Krainz and Wolfgang Schr{\"o}der-Preikschat", title = "A wait-free {NCAS} library for parallel applications with timing constraints", journal = j-SIGPLAN, volume = "46", number = "8", pages = "301--302", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941599", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We introduce our major ideas of a wait-free, linearizable, and disjoint access parallel NCAS library, called rtNCAS. It focuses the construction of wait-free data structure operations (DSO) in real-time circumstances. rtNCAS is able to conditionally swap multiple independent words (NCAS) in an atomic manner. It allows us, furthermore, to implement arbitrary DSO by means of their sequential specification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Davies:2011:ABR, author = "Teresa Davies and Zizhong Chen and Christer Karlsson and Hui Liu", title = "Algorithm-based recovery for {HPL}", journal = j-SIGPLAN, volume = "46", number = "8", pages = "303--304", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941600", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "When more processors are used for a calculation, the probability that one will fail during the calculation increases. Fault tolerance is a technique for allowing a calculation to survive a failure, and includes recovering lost data. A common method of recovery is diskless checkpointing. However, it has high overhead when a large amount of data is involved, as is the case with matrix operations. A checksum-based method allows fault tolerance of matrix operations with lower overhead. This technique is applicable to the LU decomposition in the benchmark HPL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Willcock:2011:APP, author = "Jeremiah James Willcock and Torsten Hoefler and Nicholas Gerard Edmonds and Andrew Lumsdaine", title = "{Active Pebbles}: a programming model for highly parallel fine-grained data-driven computations", journal = j-SIGPLAN, volume = "46", number = "8", pages = "305--306", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941601", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "A variety of programming models exist to support large-scale, distributed memory, parallel computation. These programming models have historically targeted coarse-grained applications with natural locality such as those found in a variety of scientific simulations of the physical world. Fine-grained, irregular, and unstructured applications such as those found in biology, social network analysis, and graph theory are less well supported. We propose Active Pebbles, a programming model which allows these applications to be expressed naturally; an accompanying execution model ensures performance and scalability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fischer:2011:SMC, author = "Topher Fischer and Eric Mercer and Neha Rungta", title = "Symbolically modeling concurrent {MCAPI} executions", journal = j-SIGPLAN, volume = "46", number = "8", pages = "307--308", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941602", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Improper use of Inter-Process Communication (IPC) within concurrent systems often creates data races which can lead to bugs that are challenging to discover. Techniques that use Satisfiability Modulo Theories (SMT) problems to symbolically model possible executions of concurrent software have recently been proposed for use in the formal verification of software. In this work we describe a new technique for modeling executions of concurrent software that use a message passing API called MCAPI. Our technique uses an execution trace to create an SMT problem that symbolically models all possible concurrent executions and follows the same sequence of conditional branch outcomes as the provided execution trace. We check if there exists a satisfying assignment to the SMT problem with respect to specific safety properties. If such an assignment exists, it provides the conditions that lead to the violation of the property. We show how our method models behaviors of MCAPI applications that are ignored in previously published techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Siegel:2011:AFV, author = "Stephen F. Siegel and Timothy K. Zirkel", title = "Automatic formal verification of {MPI}-based parallel programs", journal = j-SIGPLAN, volume = "46", number = "8", pages = "309--310", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941603", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The Toolkit for Accurate Scientific Software (TASS) is a suite of tools for the formal verification of MPI-based parallel programs used in computational science. TASS can verify various safety properties as well as compare two programs for functional equivalence. The TASS front end takes an integer $ n \geq 1 $ and a C/MPI program, and constructs an abstract model of the program with $n$ processes. Procedures, structs, (multi-dimensional) arrays, heap-allocated data, pointers, and pointer arithmetic are all representable in a TASS model. The model is then explored using symbolic execution and explicit state space enumeration. A number of techniques are used to reduce the time and memory consumed. A variety of realistic MPI programs have been verified with TASS, including Jacobi iteration and manager-worker type programs, and some subtle defects have been discovered. TASS is written in Java and is available from \path=http://vsl.cis.udel.edu/tass= under the Gnu Public License.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Donaldson:2011:STA, author = "Alastair F. Donaldson and Daniel Kroening and Philipp Ruemmer", title = "{SCRATCH}: a tool for automatic analysis of {DMA} races", journal = j-SIGPLAN, volume = "46", number = "8", pages = "311--312", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941604", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We present the SCRATCH tool, which uses bounded model checking and k-induction to automatically analyse software for multicore processors such as the Cell BE, in order to detect DMA races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Botincan:2011:ASP, author = "Matko Botincan and Mike Dodds and Alastair F. Donaldson and Matthew J. Parkinson", title = "Automatic safety proofs for asynchronous memory operations", journal = j-SIGPLAN, volume = "46", number = "8", pages = "313--314", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941605", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We present a work-in-progress proof system and tool, based on separation logic, for analysing memory safety of multicore programs that use asynchronous memory operations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Filinski:2011:TCT, author = "Andrzej Filinski", title = "Towards a comprehensive theory of monadic effects", journal = j-SIGPLAN, volume = "46", number = "9", pages = "1--1", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034775", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gibbons:2011:JDI, author = "Jeremy Gibbons and Ralf Hinze", title = "Just do it: simple monadic equational reasoning", journal = j-SIGPLAN, volume = "46", number = "9", pages = "2--14", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034777", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Swamy:2011:LMP, author = "Nikhil Swamy and Nataliya Guts and Daan Leijen and Michael Hicks", title = "Lightweight monadic programming in {ML}", journal = j-SIGPLAN, volume = "46", number = "9", pages = "15--27", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034778", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mitchell:2011:FPT, author = "Emily G. Mitchell", title = "Functional programming through deep time: modeling the first complex ecosystems on {Earth}", journal = j-SIGPLAN, volume = "46", number = "9", pages = "28--31", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034779", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Schrijvers:2011:MZV, author = "Tom Schrijvers and Bruno C. d. S. Oliveira", title = "Monads, zippers and views: virtualizing the monad stack", journal = j-SIGPLAN, volume = "46", number = "9", pages = "32--44", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034781", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Krishnaswami:2011:SMG, author = "Neelakantan R. Krishnaswami and Nick Benton", title = "A semantic model for graphical user interfaces", journal = j-SIGPLAN, volume = "46", number = "9", pages = "45--57", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034782", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shivers:2011:MRT, author = "Olin Shivers and Aaron J. Turon", title = "Modular rollback through control logging: a pair of twin functional pearls", journal = j-SIGPLAN, volume = "46", number = "9", pages = "58--68", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034783", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Vardoulakis:2011:PFA, author = "Dimitrios Vardoulakis and Olin Shivers", title = "Pushdown flow analysis of first-class control", journal = j-SIGPLAN, volume = "46", number = "9", pages = "69--80", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034785", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Materzok:2011:SDC, author = "Marek Materzok and Dariusz Biernacki", title = "Subtyping delimited continuations", journal = j-SIGPLAN, volume = "46", number = "9", pages = "81--93", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034786", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Castagna:2011:STF, author = "Giuseppe Castagna and Zhiwu Xu", title = "Set-theoretic foundation of parametric polymorphism and subtyping", journal = j-SIGPLAN, volume = "46", number = "9", pages = "94--106", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034788", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gesbert:2011:PPS, author = "Nils Gesbert and Pierre Genev{\`e}s and Nabil Laya{\"\i}da", title = "Parametric polymorphism and semantic subtyping: the logical connection", journal = j-SIGPLAN, volume = "46", number = "9", pages = "107--116", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034789", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morihata:2011:BTI, author = "Akimasa Morihata and Kiminori Matsuzaki", title = "Balanced trees inhabiting functional parallel programming", journal = j-SIGPLAN, volume = "46", number = "9", pages = "117--128", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034791", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2011:ISA, author = "Yan Chen and Joshua Dunfield and Matthew A. Hammer and Umut A. Acar", title = "Implicit self-adjusting computation for purely functional programs", journal = j-SIGPLAN, volume = "46", number = "9", pages = "129--141", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034792", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Takeyama:2011:PAC, author = "Makoto Takeyama", title = "Programming assurance cases in {Agda}", journal = j-SIGPLAN, volume = "46", number = "9", pages = "142--142", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034794", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Devriese:2011:BST, author = "Dominique Devriese and Frank Piessens", title = "On the bright side of type classes: instance arguments in {Agda}", journal = j-SIGPLAN, volume = "46", number = "9", pages = "143--155", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034796", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Magalhaes:2011:FMM, author = "Jos{\'e} Pedro Magalh{\~a}es and W. Bas de Haas", title = "Functional modelling of musical harmony: an experience report", journal = j-SIGPLAN, volume = "46", number = "9", pages = "156--162", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034797", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gonthier:2011:HMA, author = "Georges Gonthier and Beta Ziliani and Aleksandar Nanevski and Derek Dreyer", title = "How to make ad hoc proof automation less ad hoc", journal = j-SIGPLAN, volume = "46", number = "9", pages = "163--175", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034798", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Disney:2011:THO, author = "Tim Disney and Cormac Flanagan and Jay McCarthy", title = "Temporal higher-order contracts", journal = j-SIGPLAN, volume = "46", number = "9", pages = "176--188", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034800", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Might:2011:PDF, author = "Matthew Might and David Darais and Daniel Spiewak", title = "Parsing with derivatives: a functional pearl", journal = j-SIGPLAN, volume = "46", number = "9", pages = "189--195", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034801", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ueno:2011:ENM, author = "Katsuhiro Ueno and Atsushi Ohori and Toshiaki Otomo", title = "An efficient non-moving garbage collector for functional languages", journal = j-SIGPLAN, volume = "46", number = "9", pages = "196--208", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034802", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gill:2011:DEF, author = "Andy Gill and Andrew Farmer", title = "Deriving an efficient {FPGA} implementation of a low density parity check forward error corrector", journal = j-SIGPLAN, volume = "46", number = "9", pages = "209--220", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034804", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ghica:2011:GSIb, author = "Dan R. Ghica and Alex Smith and Satnam Singh", title = "Geometry of synthesis {IV}: compiling affine recursion into static hardware", journal = j-SIGPLAN, volume = "46", number = "9", pages = "221--233", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034805", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ahn:2011:HMS, author = "Ki Yung Ahn and Tim Sheard", title = "A hierarchy of mendler style recursion combinators: taming inductive datatypes with negative occurrences", journal = j-SIGPLAN, volume = "46", number = "9", pages = "234--246", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034807", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jay:2011:TSI, author = "Barry Jay and Jens Palsberg", title = "Typed self-interpretation by pattern matching", journal = j-SIGPLAN, volume = "46", number = "9", pages = "247--258", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034808", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chyzak:2011:UCP, author = "Fr{\'e}d{\'e}ric Chyzak and Alexis Darrasse", title = "Using {{\tt camlp4}} for presenting dynamic mathematics on the {Web}: {DynaMoW}, an {OCaml} language extension for the run-time generation of mathematical contents and their presentation on the {Web}", journal = j-SIGPLAN, volume = "46", number = "9", pages = "259--265", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034809", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Swamy:2011:SDP, author = "Nikhil Swamy and Juan Chen and C{\'e}dric Fournet and Pierre-Yves Strub and Karthikeyan Bhargavan and Jean Yang", title = "Secure distributed programming with value-dependent types", journal = j-SIGPLAN, volume = "46", number = "9", pages = "266--278", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034811", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Foster:2011:FNP, author = "Nate Foster and Rob Harrison and Michael J. Freedman and Christopher Monsanto and Jennifer Rexford and Alec Story and David Walker", title = "{Frenetic}: a network programming language", journal = j-SIGPLAN, volume = "46", number = "9", pages = "279--291", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034812", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fisher:2011:FLT, author = "Kathleen Fisher and Nate Foster and David Walker and Kenny Q. Zhu", title = "{Forest}: a language and toolkit for programming with filestores", journal = j-SIGPLAN, volume = "46", number = "9", pages = "292--306", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034814", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ohori:2011:MSM, author = "Atsushi Ohori and Katsuhiro Ueno", title = "Making {Standard ML} a practical database programming language", journal = j-SIGPLAN, volume = "46", number = "9", pages = "307--319", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034815", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pouillard:2011:NP, author = "Nicolas Pouillard", title = "Nameless, painless", journal = j-SIGPLAN, volume = "46", number = "9", pages = "320--332", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034817", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Weirich:2011:BU, author = "Stephanie Weirich and Brent A. Yorgey and Tim Sheard", title = "Binders unbound", journal = j-SIGPLAN, volume = "46", number = "9", pages = "333--345", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034818", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Popescu:2011:RPS, author = "Andrei Popescu and Elsa L. Gunter", title = "Recursion principles for syntax with bindings and substitution", journal = j-SIGPLAN, volume = "46", number = "9", pages = "346--358", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034819", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hinze:2011:PUF, author = "Ralf Hinze and Daniel W. H. James", title = "Proving the unique fixed-point principle correct: an adventure with category theory", journal = j-SIGPLAN, volume = "46", number = "9", pages = "359--371", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034821", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gaboardi:2011:LPS, author = "Marco Gaboardi and Luca Paolini and Mauro Piccolo", title = "Linearity and {PCF}: a semantic insight!", journal = j-SIGPLAN, volume = "46", number = "9", pages = "372--384", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034822", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mu:2011:GDT, author = "Shin-Cheng Mu and Akimasa Morihata", title = "Generalising and dualising the third list-homomorphism theorem: functional pearl", journal = j-SIGPLAN, volume = "46", number = "9", pages = "385--391", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034824", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wang:2011:IUE, author = "Meng Wang and Jeremy Gibbons and Nicolas Wu", title = "Incremental updates for efficient bidirectional transformations", journal = j-SIGPLAN, volume = "46", number = "9", pages = "392--403", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034825", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gotsman:2011:MVP, author = "Alexey Gotsman and Hongseok Yang", title = "Modular verification of preemptive {OS} kernels", journal = j-SIGPLAN, volume = "46", number = "9", pages = "404--417", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034827", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chargueraud:2011:CFV, author = "Arthur Chargu{\'e}raud", title = "Characteristic formulae for the verification of imperative programs", journal = j-SIGPLAN, volume = "46", number = "9", pages = "418--430", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034828", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ahmed:2011:EPC, author = "Amal Ahmed and Matthias Blume", title = "An equivalence-preserving {CPS} translation via multi-language semantics", journal = j-SIGPLAN, volume = "46", number = "9", pages = "431--444", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034830", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Thamsborg:2011:KLR, author = "Jacob Thamsborg and Lars Birkedal", title = "A {Kripke} logical relation for effect-based program transformations", journal = j-SIGPLAN, volume = "46", number = "9", pages = "445--456", month = sep, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2034574.2034831", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Oct 22 08:31:30 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ICFP '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sutherland:2011:SP, author = "Ivan Sutherland", title = "The sequential prison", journal = j-SIGPLAN, volume = "46", number = "10", pages = "1--2", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048068", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2011:SPD, author = "Tongping Liu and Emery D. Berger", title = "{SHERIFF}: precise detection and automatic mitigation of false sharing", journal = j-SIGPLAN, volume = "46", number = "10", pages = "3--18", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048070", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Weeratunge:2011:APA, author = "Dasarath Weeratunge and Xiangyu Zhang and Suresh Jaganathan", title = "Accentuating the positive: atomicity inference and enforcement using correct executions", journal = j-SIGPLAN, volume = "46", number = "10", pages = "19--34", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048071", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2011:SST, author = "Du Li and Witawas Srisa-an and Matthew B. Dwyer", title = "{SOS}: saving time in dynamic race detection with stationary analysis", journal = j-SIGPLAN, volume = "46", number = "10", pages = "35--50", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048072", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shacham:2011:TAC, author = "Ohad Shacham and Nathan Bronson and Alex Aiken and Mooly Sagiv and Martin Vechev and Eran Yahav", title = "Testing atomicity of composed concurrent operations", journal = j-SIGPLAN, volume = "46", number = "10", pages = "51--64", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048073", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yessenov:2011:DDS, author = "Kuat Yessenov and Zhilei Xu and Armando Solar-Lezama", title = "Data-driven synthesis for object-oriented frameworks", journal = j-SIGPLAN, volume = "46", number = "10", pages = "65--82", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048075", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pu:2011:SFO, author = "Yewen Pu and Rastislav Bodik and Saurabh Srivastava", title = "Synthesis of first-order dynamic programming algorithms", journal = j-SIGPLAN, volume = "46", number = "10", pages = "83--98", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048076", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Doherty:2011:KAM, author = "Jesse Doherty and Laurie Hendren and Soroush Radpour", title = "Kind analysis for {MATLAB}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "99--118", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048077", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Feldthaus:2011:TSR, author = "Asger Feldthaus and Todd Millstein and Anders M{\o}ller and Max Sch{\"a}fer and Frank Tip", title = "Tool-supported refactoring for {JavaScript}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "119--138", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048078", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kats:2011:ILD, author = "Lennart C. L. Kats and Rob Vermaas and Eelco Visser", title = "Integrated language definition testing: enabling test-driven language development", journal = j-SIGPLAN, volume = "46", number = "10", pages = "139--154", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048080", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jovic:2011:CMI, author = "Milan Jovic and Andrea Adamoli and Matthias Hauswirth", title = "Catch me if you can: performance bug detection in the wild", journal = j-SIGPLAN, volume = "46", number = "10", pages = "155--170", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048081", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Joshi:2011:PPT, author = "Pallavi Joshi and Haryadi S. Gunawi and Koushik Sen", title = "{PREFAIL}: a programmable tool for multiple-failure injection", journal = j-SIGPLAN, volume = "46", number = "10", pages = "171--188", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048082", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Thummalapenta:2011:SMS, author = "Suresh Thummalapenta and Tao Xie and Nikolai Tillmann and Jonathan de Halleux and Zhendong Su", title = "Synthesizing method sequences for high-coverage testing", journal = j-SIGPLAN, volume = "46", number = "10", pages = "189--206", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048083", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tripp:2011:HED, author = "Omer Tripp and Greta Yorsh and John Field and Mooly Sagiv", title = "{HAWKEYE}: effective discovery of dataflow impediments to parallelization", journal = j-SIGPLAN, volume = "46", number = "10", pages = "207--224", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048085", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Golan-Gueta:2011:AFG, author = "Guy Golan-Gueta and Nathan Bronson and Alex Aiken and G. Ramalingam and Mooly Sagiv and Eran Yahav", title = "Automatic fine-grain locking using shape properties", journal = j-SIGPLAN, volume = "46", number = "10", pages = "225--242", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048086", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ke:2011:SPP, author = "Chuanle Ke and Lei Liu and Chao Zhang and Tongxin Bai and Bryan Jacobs and Chen Ding", title = "Safe parallel programming using dynamic dependence hints", journal = j-SIGPLAN, volume = "46", number = "10", pages = "243--258", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048087", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Raman:2011:SSP, author = "Arun Raman and Greta Yorsh and Martin Vechev and Eran Yahav", title = "{Sprint}: speculative prefetching of remote data", journal = j-SIGPLAN, volume = "46", number = "10", pages = "259--274", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048088", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Aftandilian:2011:AA, author = "Edward E. Aftandilian and Samuel Z. Guyer and Martin Vechev and Eran Yahav", title = "Asynchronous assertions", journal = j-SIGPLAN, volume = "46", number = "10", pages = "275--288", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048090", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hoffman:2011:RPS, author = "Kevin J. Hoffman and Harrison Metzger and Patrick Eugster", title = "{Ribbons}: a partially shared memory programming model", journal = j-SIGPLAN, volume = "46", number = "10", pages = "289--306", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048091", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yang:2011:WNM, author = "Xi Yang and Stephen M. Blackburn and Daniel Frampton and Jennifer B. Sartor and Kathryn S. McKinley", title = "Why nothing matters: the impact of zeroing", journal = j-SIGPLAN, volume = "46", number = "10", pages = "307--324", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048092", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Darulova:2011:TNC, author = "Eva Darulova and Viktor Kuncak", title = "Trustworthy numerical computation in {Scala}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "325--344", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048094", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2011:JEC, author = "Siliang Li and Gang Tan", title = "{JET}: exception checking in the {Java Native Interface}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "345--358", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048095", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{David:2011:ISM, author = "Cristina David and Wei-Ngan Chin", title = "Immutable specifications for more concise and precise verification", journal = j-SIGPLAN, volume = "46", number = "10", pages = "359--374", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048096", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shali:2011:HPE, author = "Amin Shali and William R. Cook", title = "Hybrid partial evaluation", journal = j-SIGPLAN, volume = "46", number = "10", pages = "375--390", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048098", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Erdweg:2011:SLB, author = "Sebastian Erdweg and Tillmann Rendel and Christian K{\"a}stner and Klaus Ostermann", title = "{SugarJ}: library-based syntactic language extensibility", journal = j-SIGPLAN, volume = "46", number = "10", pages = "391--406", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048099", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Demetrescu:2011:RIP, author = "Camil Demetrescu and Irene Finocchi and Andrea Ribichini", title = "Reactive imperative programming with dataflow constraints", journal = j-SIGPLAN, volume = "46", number = "10", pages = "407--426", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048100", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Burckhardt:2011:TPO, author = "Sebastian Burckhardt and Daan Leijen and Caitlin Sadowski and Jaeheon Yi and Thomas Ball", title = "Two for the price of one: a model for parallel and incremental computation", journal = j-SIGPLAN, volume = "46", number = "10", pages = "427--444", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048101", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tian:2011:STT, author = "Kai Tian and Eddy Zhang and Xipeng Shen", title = "A step towards transparent integration of input-consciousness into dynamic program optimizations", journal = j-SIGPLAN, volume = "46", number = "10", pages = "445--462", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048103", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jo:2011:ELR, author = "Youngjoon Jo and Milind Kulkarni", title = "Enhancing locality for recursive traversals of recursive structures", journal = j-SIGPLAN, volume = "46", number = "10", pages = "463--482", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048104", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Adams:2011:FST, author = "Michael D. Adams and Andrew W. Keep and Jan Midtgaard and Matthew Might and Arun Chauhan and R. Kent Dybvig", title = "Flow-sensitive type recovery in linear-log time", journal = j-SIGPLAN, volume = "46", number = "10", pages = "483--498", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048105", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Acar:2011:OSC, author = "Umut A. Acar and Arthur Chargu{\'e}raud and Mike Rainey", title = "Oracle scheduling: controlling granularity in implicitly parallel languages", journal = j-SIGPLAN, volume = "46", number = "10", pages = "499--518", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048106", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jeon:2011:KPS, author = "Donghwan Jeon and Saturnino Garcia and Chris Louie and Michael Bedford Taylor", title = "{Kismet}: parallel speedup estimates for serial programs", journal = j-SIGPLAN, volume = "46", number = "10", pages = "519--536", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048108", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cledat:2011:ESS, author = "Romain E. Cledat and Tushar Kumar and Santosh Pande", title = "Efficiently speeding up sequential computation through the n-way programming model", journal = j-SIGPLAN, volume = "46", number = "10", pages = "537--554", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048109", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pyla:2011:ECG, author = "Hari K. Pyla and Calvin Ribbens and Srinidhi Varadarajan", title = "Exploiting coarse-grain speculative parallelism", journal = j-SIGPLAN, volume = "46", number = "10", pages = "555--574", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048110", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Turon:2011:SJP, author = "Aaron J. Turon and Claudio V. Russo", title = "Scalable join patterns", journal = j-SIGPLAN, volume = "46", number = "10", pages = "575--594", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048111", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Delaware:2011:PLT, author = "Benjamin Delaware and William Cook and Don Batory", title = "Product lines of theorems", journal = j-SIGPLAN, volume = "46", number = "10", pages = "595--608", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048113", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ina:2011:GTG, author = "Lintaro Ina and Atsushi Igarashi", title = "Gradual typing for generics", journal = j-SIGPLAN, volume = "46", number = "10", pages = "609--624", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048114", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tov:2011:TST, author = "Jesse A. Tov and Riccardo Pucella", title = "A theory of substructural types and control", journal = j-SIGPLAN, volume = "46", number = "10", pages = "625--642", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048115", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Buse:2011:BBU, author = "Raymond P. L. Buse and Caitlin Sadowski and Westley Weimer", title = "Benefits and barriers of user evaluation in software engineering research", journal = j-SIGPLAN, volume = "46", number = "10", pages = "643--656", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048117", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sewe:2011:CCS, author = "Andreas Sewe and Mira Mezini and Aibek Sarimbekov and Walter Binder", title = "Da capo con {Scala}: design and analysis of a {Scala} benchmark suite for the {Java Virtual Machine}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "657--676", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048118", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Richards:2011:ACJ, author = "Gregor Richards and Andreas Gal and Brendan Eich and Jan Vitek", title = "Automated construction of {JavaScript} benchmarks", journal = j-SIGPLAN, volume = "46", number = "10", pages = "677--694", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048119", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hemel:2011:DPM, author = "Zef Hemel and Eelco Visser", title = "Declaratively programming the {Mobile Web} with {Mobl}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "695--712", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048121", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sunshine:2011:FCS, author = "Joshua Sunshine and Karl Naden and Sven Stork and Jonathan Aldrich and {\'E}ric Tanter", title = "First-class state change in {Plaid}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "713--732", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048122", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lorenz:2011:CLL, author = "David H. Lorenz and Boaz Rosenan", title = "{Cedalion}: a language for language oriented programming", journal = j-SIGPLAN, volume = "46", number = "10", pages = "733--752", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048123", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hammer:2011:SAS, author = "Matthew A. Hammer and Georg Neis and Yan Chen and Umut A. Acar", title = "Self-adjusting stack machines", journal = j-SIGPLAN, volume = "46", number = "10", pages = "753--772", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048124", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kulkarni:2011:JCP, author = "Prasad A. Kulkarni", title = "{JIT} compilation policy for modern machines", journal = j-SIGPLAN, volume = "46", number = "10", pages = "773--788", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048126", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wu:2011:RTS, author = "Peng Wu and Hiroshige Hayashizaki and Hiroshi Inoue and Toshio Nakatani", title = "Reducing trace selection footprint for large-scale {Java} applications without performance loss", journal = j-SIGPLAN, volume = "46", number = "10", pages = "789--804", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048127", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kastner:2011:VAP, author = "Christian K{\"a}stner and Paolo G. Giarrusso and Tillmann Rendel and Sebastian Erdweg and Klaus Ostermann and Thorsten Berger", title = "Variability-aware parsing in the presence of lexical macros and conditional compilation", journal = j-SIGPLAN, volume = "46", number = "10", pages = "805--824", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048128", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wurthinger:2011:SAR, author = "Thomas W{\"u}rthinger and Danilo Ansaloni and Walter Binder and Christian Wimmer and Hanspeter M{\"o}ssenb{\"o}ck", title = "Safe and atomic run-time code evolution for {Java} and its application to dynamic {AOP}", journal = j-SIGPLAN, volume = "46", number = "10", pages = "825--844", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048129", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pinto:2011:SAC, author = "Pedro da Rocha Pinto and Thomas Dinsdale-Young and Mike Dodds and Philippa Gardner and Mark Wheelhouse", title = "A simple abstraction for complex concurrent indexes", journal = j-SIGPLAN, volume = "46", number = "10", pages = "845--864", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048131", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Anderson:2011:CNP, author = "Zachary Anderson and David Gay", title = "Composable, nestable, pessimistic atomic statements", journal = j-SIGPLAN, volume = "46", number = "10", pages = "865--884", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048132", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lublinerman:2011:DI, author = "Roberto Lublinerman and Jisheng Zhao and Zoran Budimli{\'c} and Swarat Chaudhuri and Vivek Sarkar", title = "Delegated isolation", journal = j-SIGPLAN, volume = "46", number = "10", pages = "885--902", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048133", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Harris:2011:ACA, author = "Tim Harris and Martin Abadi and Rebecca Isaacs and Ross McIlroy", title = "{AC}: composable asynchronous {IO} for native languages", journal = j-SIGPLAN, volume = "46", number = "10", pages = "903--920", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048134", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Austin:2011:VVL, author = "Thomas H. Austin and Tim Disney and Cormac Flanagan", title = "Virtual values for language extension", journal = j-SIGPLAN, volume = "46", number = "10", pages = "921--938", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048136", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Palmer:2011:BJM, author = "Zachary Palmer and Scott F. Smith", title = "Backstage {Java}: making a difference in metaprogramming", journal = j-SIGPLAN, volume = "46", number = "10", pages = "939--958", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048137", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Verwaest:2011:FOL, author = "Toon Verwaest and Camillo Bruni and Mircea Lungu and Oscar Nierstrasz", title = "Flexible object layouts: enabling lightweight language extensions by intercepting slot access", journal = j-SIGPLAN, volume = "46", number = "10", pages = "959--972", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048138", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Allen:2011:TCM, author = "Eric Allen and Justin Hilburn and Scott Kilpatrick and Victor Luchangco and Sukyoung Ryu and David Chase and Guy Steele", title = "Type checking modular multiple dispatch with parametric polymorphism and multiple inheritance", journal = j-SIGPLAN, volume = "46", number = "10", pages = "973--992", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048140", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Im:2011:STS, author = "Hyeonseung Im and Keiko Nakata and Jacques Garrigue and Sungwoo Park", title = "A syntactic type system for recursive modules", journal = j-SIGPLAN, volume = "46", number = "10", pages = "993--1012", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048141", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Summers:2011:FBC, author = "Alexander J. Summers and Peter Mueller", title = "Freedom before commitment: a lightweight type system for object initialisation", journal = j-SIGPLAN, volume = "46", number = "10", pages = "1013--1032", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048142", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Madhavan:2011:NDV, author = "Ravichandhran Madhavan and Raghavan Komondoor", title = "Null dereference verification via over-approximated weakest pre-conditions analysis", journal = j-SIGPLAN, volume = "46", number = "10", pages = "1033--1052", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048144", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sridharan:2011:FTA, author = "Manu Sridharan and Shay Artzi and Marco Pistoia and Salvatore Guarnieri and Omer Tripp and Ryan Berg", title = "{F4F}: taint analysis of framework-based {Web} applications", journal = j-SIGPLAN, volume = "46", number = "10", pages = "1053--1068", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048145", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Son:2011:RFM, author = "Sooel Son and Kathryn S. McKinley and Vitaly Shmatikov", title = "{RoleCast}: finding missing security checks when you do not know what checks are", journal = j-SIGPLAN, volume = "46", number = "10", pages = "1069--1084", month = oct, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076021.2048146", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:53 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '11 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Veldema:2011:IDP, author = "Ronald Veldema and Mich{\ae}l Philippsen", title = "Iterative data-parallel mark\&sweep on a {GPU}", journal = j-SIGPLAN, volume = "46", number = "11", pages = "1--10", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993480", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "Automatic memory management makes programming easier. This is also true for general purpose GPU computing where currently no garbage collectors exist. In this paper we present a parallel mark-and-sweep collector to collect GPU memory on the GPU and tune its performance. Performance is increased by: (1) data-parallel marking and sweeping of regions of memory, (2) marking all elements of large arrays in parallel, (3) trading recursion over parallelism to match deeply linked data structures. (1) is achieved by coarsely processing all potential objects in a region of memory in parallel. When during (1) a large array is detected, it is put aside and a parallel-for is later issued on the GPU to mark its elements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Majo:2011:MMN, author = "Zoltan Majo and Thomas R. Gross", title = "Memory management in {NUMA} multicore systems: trapped between cache contention and interconnect overhead", journal = j-SIGPLAN, volume = "46", number = "11", pages = "11--20", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993481", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "Multiprocessors based on processors with multiple cores usually include a non-uniform memory architecture (NUMA); even current 2-processor systems with 8 cores exhibit non-uniform memory access times. As the cores of a processor share a common cache, the issues of memory management and process mapping must be revisited. We find that optimizing only for data locality can counteract the benefits of cache contention avoidance and vice versa. Therefore, system software must take both data locality and cache contention into account to achieve good performance, and memory management cannot be decoupled from process scheduling. We present a detailed analysis of a commercially available NUMA-multicore architecture, the Intel Nehalem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Marlow:2011:MGC, author = "Simon Marlow and Simon Peyton Jones", title = "Multicore garbage collection with local heaps", journal = j-SIGPLAN, volume = "46", number = "11", pages = "21--32", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993482", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "In a parallel, shared-memory, language with a garbage collected heap, it is desirable for each processor to perform minor garbage collections independently. Although obvious, it is difficult to make this idea pay off in practice, especially in languages where mutation is common. We present several techniques that substantially improve the state of the art. We describe these techniques in the context of a full-scale implementation of Haskell, and demonstrate that our local-heap collector substantially improves scaling, peak performance, and robustness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Garner:2011:CEO, author = "Robin J. Garner and Stephen M. Blackburn and Daniel Frampton", title = "A comprehensive evaluation of object scanning techniques", journal = j-SIGPLAN, volume = "46", number = "11", pages = "33--42", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993484", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "At the heart of all garbage collectors lies the process of identifying and processing reference fields within an object. Despite its key role, and evidence of many different implementation approaches, to our knowledge no comprehensive quantitative study of this design space exists. The lack of such a study means that implementers must rely on `conventional wisdom', hearsay, and their own costly analysis. Starting with mechanisms described in the literature and a variety of permutations of these, we explore the impact of a number of dimensions including: (a) the choice of data structure, (b) levels of indirection from object to metadata, and (c) specialization of scanning code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gu:2011:TPL, author = "Xiaoming Gu and Chen Ding", title = "On the theory and potential of {LRU--MRU} collaborative cache management", journal = j-SIGPLAN, volume = "46", number = "11", pages = "43--54", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993485", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "The goal of cache management is to maximize data reuse. Collaborative caching provides an interface for software to communicate access information to hardware. In theory, it can obtain optimal cache performance. In this paper, we study a collaborative caching system that allows a program to choose different caching methods for its data. As an interface, it may be used in arbitrary ways, sometimes optimal but probably suboptimal most times and even counter productive. We develop a theoretical foundation for collaborative caches to show the inclusion principle and the existence of a distance metric we call LRU-MRU stack distance. The new stack distance is important for program analysis and transformation to target a hierarchical collaborative cache system rather than a single cache configuration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Afek:2011:CIA, author = "Yehuda Afek and Dave Dice and Adam Morrison", title = "Cache index-aware memory allocation", journal = j-SIGPLAN, volume = "46", number = "11", pages = "55--64", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993486", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "Poor placement of data blocks in memory may negatively impact application performance because of an increase in the cache conflict miss rate [18]. For dynamically allocated structures this placement is typically determined by the memory allocator. Cache index-oblivious allocators may inadvertently place blocks on a restricted fraction of the available cache indexes, artificially and needlessly increasing the conflict miss rate. While some allocators are less vulnerable to this phenomena, no general-purpose malloc allocator is index-aware and methodologically addresses this concern. We demonstrate that many existing state-of-the-art allocators are index-oblivious, admitting performance pathologies for certain block sizes. We show that a simple adjustment within the allocator to control the spacing of blocks can provide better index coverage, which in turn reduces the superfluous conflict miss rate in various applications, improving performance with no observed negative consequ", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hertz:2011:WWR, author = "Matthew Hertz and Stephen Kane and Elizabeth Keudel and Tongxin Bai and Chen Ding and Xiaoming Gu and Jonathan E. Bard", title = "Waste not, want not: resource-based garbage collection in a shared environment", journal = j-SIGPLAN, volume = "46", number = "11", pages = "65--76", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993487", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "To achieve optimal performance, garbage-collected applications must balance the sizes of their heaps dynamically. Sizing the heap too small can reduce throughput by increasing the number of garbage collections that must be performed. Too large a heap, however, can cause the system to page and drag down the overall throughput. In today's multicore, multiprocessor machines, multiple garbage-collected applications may run simultaneously. As a result, each virtual machine (VM) must adjust its memory demands to reflect not only the behavior of the application it is running, but also the behavior of the peer applications running on the system. We present a memory management system that enables VMs to react to memory demands dynamically.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mutlu:2011:MSM, author = "Onur Mutlu", title = "Memory systems in the many-core era: challenges, opportunities, and solution directions", journal = j-SIGPLAN, volume = "46", number = "11", pages = "77--78", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993489", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "The memory subsystem is a fundamental performance and energy bottleneck in almost all computing systems. Recent trends towards increasingly more cores on die, consolidation of diverse workloads on a single chip, and difficulty of DRAM scaling impose new requirements and exacerbate old demands on the memory system. In particular, the need for memory bandwidth and capacity is increasing [14], applications' interference in memory system increasingly limits system performance and makes the system hard to control [12], memory energy and power are key design concerns [8], and DRAM technology consumes significant amount of energy and does not scale down easily to smaller technology nodes [7].", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tene:2011:CCC, author = "Gil Tene and Balaji Iyengar and Michael Wolf", title = "{C4}: the continuously concurrent compacting collector", journal = j-SIGPLAN, volume = "46", number = "11", pages = "79--88", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993491", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "C4, the Continuously Concurrent Compacting Collector, an updated generational form of the Pauseless GC Algorithm [7], is introduced and described, along with details of its implementation on modern X86 hardware. It uses a read barrier to support concur- rent compaction, concurrent remapping, and concurrent incremental update tracing. C4 differentiates itself from other generational garbage collectors by supporting simultaneous-generational concurrency: the different generations are collected using concurrent (non stop-the-world) mechanisms that can be simultaneously and independently active. C4 is able to continuously perform concurrent young generation collections, even during long periods of concurrent full heap collection, allowing C4 to sustain high allocation rates and maintain the efficiency typical to generational collectors, without sacrificing response times or reverting to stop-the-world operation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kalibera:2011:HRO, author = "Tomas Kalibera and Richard Jones", title = "Handles revisited: optimising performance and memory costs in a real-time collector", journal = j-SIGPLAN, volume = "46", number = "11", pages = "89--98", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993492", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "Compacting garbage collectors must update all references to objects they move. Updating is a lengthy operation but the updates must be transparent to the mutator. The consequence is that no space can be reclaimed until all references have been updated which, in a real-time collector, must be done incrementally. One solution is to replace direct references to objects with handles. Handles offer several advantages to a real-time collector. They eliminate the updating problem. They allow immediate reuse of the space used by evacuated objects. They incur no copy reserve overhead. However, the execution time overhead of handles has led to them being abandoned by most modern systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Aigner:2011:STM, author = "Martin Aigner and Andreas Haas and Christoph M. Kirsch and Michael Lippautz and Ana Sokolova and Stephanie Stroka and Andreas Unterweger", title = "Short-term memory for self-collecting mutators", journal = j-SIGPLAN, volume = "46", number = "11", pages = "99--108", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993493", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "We propose a new memory model called short-term memory for managing objects on the heap. In contrast to the traditional persistent memory model for heap management, objects in short-term memory expire after a finite amount of time, which makes deallocation unnecessary. Instead, expiration of objects may be extended, if necessary, by refreshing. We have developed a concurrent, incremental, and non-moving implementation of short-term memory for explicit refreshing called self-collecting mutators that is based on programmer-controlled time and integrated into state-of-the-art runtimes of three programming languages: C, Java, and Go. All memory management operations run in constant time without acquiring any locks modulo the underlying allocators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Singer:2011:GCA, author = "Jeremy Singer and George Kovoor and Gavin Brown and Mikel Luj{\'a}n", title = "Garbage collection auto-tuning for {Java} {MapReduce} on multi-cores", journal = j-SIGPLAN, volume = "46", number = "11", pages = "109--118", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993495", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "MapReduce has been widely accepted as a simple programming pattern that can form the basis for efficient, large-scale, distributed data processing. The success of the MapReduce pattern has led to a variety of implementations for different computational scenarios. In this paper we present MRJ, a MapReduce Java framework for multi-core architectures. We evaluate its scalability on a four-core, hyperthreaded Intel Core i7 processor, using a set of standard MapReduce benchmarks. We investigate the significant impact that Java runtime garbage collection has on the performance and scalability of MRJ. We propose the use of memory management auto-tuning techniques based on machine learning.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wagner:2011:CMM, author = "Gregor Wagner and Andreas Gal and Christian Wimmer and Brendan Eich and Michael Franz", title = "Compartmental memory management in a modern {Web} browser", journal = j-SIGPLAN, volume = "46", number = "11", pages = "119--128", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993496", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "Since their inception, the usage pattern of web browsers has changed substantially. Rather than sequentially navigating static web sites, modern web browsers often manage a large number of simultaneous tabs displaying dynamic web content, each of which might be running a substantial amount of client-side JavaScript code. This environment introduced a new degree of parallelism that was not fully embraced by the underlying JavaScript virtual machine architecture. We propose a novel abstraction for multiple disjoint JavaScript heaps, which we call compartments. We use the notion of document origin to cluster objects into separate compartments. Objects within a compartment can reference each other directly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tarau:2011:IST, author = "Paul Tarau", title = "Integrated symbol table, engine and heap memory management in multi-engine {Prolog}", journal = j-SIGPLAN, volume = "46", number = "11", pages = "129--138", month = nov, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2076022.1993497", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 15 07:46:57 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '11 conference proceedings.", abstract = "We describe an integrated solution to symbol, heap and logic engine memory management in a context where exchanges of arbitrary Prolog terms occur between multiple dynamically created engines, implemented in a new Java-based experimental Prolog system. As our symbols represent not just Prolog atoms, but also handles to Java objects (including arbitrary size integers and decimals), everything is centered around a symbol garbage collection algorithm ensuring that external objects are shared and exchanged between logic engines efficiently. Taking advantage of a tag-on-data heap representation of Prolog terms, our algorithm performs in-place updates of live symbol references directly on heap cells.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Petricek:2011:EMP, author = "Tomas Petricek and Alan Mycroft and Don Syme", title = "Extending monads with pattern matching", journal = j-SIGPLAN, volume = "46", number = "12", pages = "1--12", month = dec, year = "2011", DOI = "https://doi.org/10.1145/2096148.2034677", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sequencing of effectful computations can be neatly captured using monads and elegantly written using do notation. In practice such monads often allow additional ways of composing computations, which have to be written explicitly using combinators. We identify joinads, an abstract notion of computation that is stronger than monads and captures many such ad-hoc extensions. In particular, joinads are monads with three additional operations: one of type $m a \to m b \to m (a, b)$ captures various forms of parallel composition, one of type $m a \to m a \to m a$ that is inspired by choice and one of type $m a \to m (m a)$ that captures aliasing of computations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Giorgidze:2011:BBM, author = "George Giorgidze and Torsten Grust and Nils Schweinsberg and Jeroen Weijers", title = "Bringing back monad comprehensions", journal = j-SIGPLAN, volume = "46", number = "12", pages = "13--22", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034678", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper is about a Glasgow Haskell Compiler (GHC) extension that generalises Haskell's list comprehension notation to monads. The monad comprehension notation implemented by the extension supports generator and filter clauses, as was the case in the Haskell 1.4 standard. In addition, the extension generalises the recently proposed parallel and SQL-like list comprehension notations to monads. The aforementioned generalisations are formally defined in this paper. The extension will be available in GHC 7.2. This paper gives several instructive examples that we hope will facilitate wide adoption of the extension by the Haskell community. We also argue why the do notation is not always a good fit for monadic libraries and embedded domain-specific languages, especially for those that are based on collection monads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Bolingbroke:2011:TCF, author = "Maximilian Bolingbroke and Simon Peyton Jones and Dimitrios Vytiniotis", title = "Termination combinators forever", journal = j-SIGPLAN, volume = "46", number = "12", pages = "23--34", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034680", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a library-based approach to constructing termination tests suitable for controlling termination of symbolic methods such as partial evaluation, supercompilation and theorem proving. With our combinators, all termination tests are correct by construction. We show how the library can be designed to embody various optimisations of the termination tests, which the user of the library takes advantage of entirely transparently.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Westbrook:2011:HHL, author = "Edwin Westbrook and Nicolas Frisby and Paul Brauner", title = "{Hobbits} for {Haskell}: a library for higher-order encodings in functional programming languages", journal = j-SIGPLAN, volume = "46", number = "12", pages = "35--46", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034681", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Adequate encodings are a powerful programming tool, which eliminate whole classes of program bugs: they ensure that a program cannot generate ill-formed data, because such data is not part of the representation; and they also ensure that a program is well-defined, meaning that it cannot have different behaviors on different representations of the same piece of data. Unfortunately, it has proven difficult to define adequate encodings of programming languages themselves. Such encodings would be very useful in language processing tools such as interpreters, compilers, model-checking tools, etc., as these systems are often difficult to get correct. The key problem in representing programming languages is in encoding binding constructs; previous approaches have serious limitations in either the operations they allow or the correctness guarantees they make.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Harper:2011:LWG, author = "Thomas Harper", title = "A library writer's guide to shortcut fusion", journal = j-SIGPLAN, volume = "46", number = "12", pages = "47--58", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034682", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There are now a variety of shortcut fusion techniques in the wild for removing intermediate data structures in Haskell. They are often presented, however, specialised to a specific data structure and interface. This can make it difficult to transfer these techniques to other settings. In this paper, we give a roadmap for a library writer who would like to implement fusion for his own library. We explain shortcut fusion without reference to any specific implementation by treating it as an instance of data refinement.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Lippmeier:2011:EPS, author = "Ben Lippmeier and Gabriele Keller", title = "Efficient parallel stencil convolution in {Haskell}", journal = j-SIGPLAN, volume = "46", number = "12", pages = "59--70", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034684", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Stencil convolution is a fundamental building block of many scientific and image processing algorithms. We present a declarative approach to writing such convolutions in Haskell that is both efficient at runtime and implicitly parallel. To achieve this we extend our prior work on the Repa array library with two new features: partitioned and cursored arrays. Combined with careful management of the interaction between GHC and its back-end code generator LLVM, we achieve performance comparable to the standard OpenCV library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Marlow:2011:MDP, author = "Simon Marlow and Ryan Newton and Simon Peyton Jones", title = "A monad for deterministic parallelism", journal = j-SIGPLAN, volume = "46", number = "12", pages = "71--82", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034685", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new programming model for deterministic parallel computation in a pure functional language. The model is monadic and has explicit granularity, but allows dynamic construction of dataflow networks that are scheduled at runtime, while remaining deterministic and pure. The implementation is based on monadic concurrency, which has until now only been used to simulate concurrency in functional languages, rather than to provide parallelism. We present the API with its semantics, and argue that parallel execution is deterministic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Leijen:2011:PCP, author = "Daan Leijen and Manuel Fahndrich and Sebastian Burckhardt", title = "Prettier concurrency: purely functional concurrent revisions", journal = j-SIGPLAN, volume = "46", number = "12", pages = "83--94", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034686", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This article presents an extension to the work of Launchbury and Peyton-Jones on the ST monad. Using a novel model for concurrency, called concurrent revisions [3,5], we show how we can use concurrency together with imperative mutable variables, while still being able to safely convert such computations (in the Rev monad) into pure values again. In contrast to many other transaction models, like software transactional memory (STM), concurrent revisions never use rollback and always deterministically resolve conflicts. As a consequence, concurrent revisions integrate well with side-effecting I/O operations. Using deterministic conflict resolution, concurrent revisions can deal well with situations where there are many conflicts between different threads that modify a shared data structure.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Stefan:2011:FDI, author = "Deian Stefan and Alejandro Russo and John C. Mitchell and David Mazi{\`e}res", title = "Flexible dynamic information flow control in {Haskell}", journal = j-SIGPLAN, volume = "46", number = "12", pages = "95--106", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034688", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a new, dynamic, floating-label approach to language-based information flow control, and present an implementation in Haskell. A labeled IO monad, LIO, keeps track of a current label and permits restricted access to IO functionality, while ensuring that the current label exceeds the labels of all data observed and restricts what can be modified. Unlike other language-based work, LIO also bounds the current label with a current clearance that provides a form of discretionary access control. In addition, programs may encapsulate and pass around the results of computations with different labels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Duregaard:2011:EPG, author = "Jonas Dureg{\aa}rd and Patrik Jansson", title = "Embedded parser generators", journal = j-SIGPLAN, volume = "46", number = "12", pages = "107--117", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034689", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel method of embedding context-free grammars in Haskell, and to automatically generate parsers and pretty-printers from them. We have implemented this method in a library called BNFC-meta (from the BNF Converter, which it is built on). The library builds compiler front ends using metaprogramming instead of conventional code generation. Parsers are built from labelled BNF grammars that are defined directly in Haskell modules. Our solution combines features of parser generators (static grammar checks, a highly specialised grammar DSL) and adds several features that are otherwise exclusive to combinatory libraries such as the ability to reuse, parameterise and generate grammars inside Haskell.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Epstein:2011:THC, author = "Jeff Epstein and Andrew P. Black and Simon Peyton-Jones", title = "Towards {Haskell} in the cloud", journal = j-SIGPLAN, volume = "46", number = "12", pages = "118--129", month = dec, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2096148.2034690", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jan 17 17:51:46 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Cloud Haskell, a domain-specific language for developing programs for a distributed computing environment. Implemented as a shallow embedding in Haskell, it provides a message-passing communication model, inspired by Erlang, without introducing incompatibility with Haskell's established shared-memory concurrency. A key contribution is a method for serializing function closures for transmission across the network. Cloud Haskell has been implemented; we present example code and some preliminary performance measurements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '11 conference proceedings.", } @Article{Black:2012:PSD, author = "Andrew P. Black and Peter W. O'Hearn", title = "Presentation of the {SIGPLAN} distinguished achievement award to {Sir Charles Antony Richard Hoare, FRS, FREng, FBCS}; and interview", journal = j-SIGPLAN, volume = "47", number = "1", pages = "1--2", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103658", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Hoare:2012:MTR, author = "Tony Hoare", title = "Message of thanks: on the receipt of the {2011 ACM SIGPLAN} distinguished achievement award", journal = j-SIGPLAN, volume = "47", number = "1", pages = "3--6", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103659", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{vanStaden:2012:F, author = "Stephan van Staden and Cristiano Calcagno and Bertrand Meyer", title = "Freefinement", journal = j-SIGPLAN, volume = "47", number = "1", pages = "7--18", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103661", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Freefinement is an algorithm that constructs a sound refinement calculus from a verification system under certain conditions. In this paper, a verification system is any formal system for establishing whether an inductively defined term, typically a program, satisfies a specification. Examples of verification systems include Hoare logics and type systems. Freefinement first extends the term language to include specification terms, and builds a verification system for the extended language that is a sound and conservative extension of the original system. The extended system is then transformed into a sound refinement calculus. The resulting refinement calculus can interoperate closely with the verification system --- it is even possible to reuse and translate proofs between them. Freefinement gives a semantics to refinement at an abstract level: it associates each term of the extended language with a set of terms from the original language, and refinement simply reduces this set. The paper applies freefinement to a simple type system for the lambda calculus and also to a Hoare logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Joshi:2012:UHI, author = "Saurabh Joshi and Shuvendu K. Lahiri and Akash Lal", title = "Underspecified harnesses and interleaved bugs", journal = j-SIGPLAN, volume = "47", number = "1", pages = "19--30", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103662", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static assertion checking of open programs requires setting up a precise harness to capture the environment assumptions. For instance, a library may require a file handle to be properly initialized before it is passed into it. A harness is used to set up or specify the appropriate preconditions before invoking methods from the program. In the absence of a precise harness, even the most precise automated static checkers are bound to report numerous false alarms. This often limits the adoption of static assertion checking in the hands of a user. In this work, we explore the possibility of automatically filtering away (or prioritizing) warnings that result from imprecision in the harness. We limit our attention to the scenario when one is interested in finding bugs due to concurrency. We define a warning to be an interleaved bug when it manifests on an input for which no sequential interleaving produces a warning. As we argue in the paper, limiting a static analysis to only consider interleaved bugs greatly reduces false positives during static concurrency analysis in the presence of an imprecise harness. We formalize interleaved bugs as a differential analysis between the original program and its sequential version and provide various techniques for finding them. Our implementation CBugs demonstrates that the scheme of finding interleaved bugs can alleviate the need to construct precise harnesses while checking real-life concurrent programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Gardner:2012:TPL, author = "Philippa Anne Gardner and Sergio Maffeis and Gareth David Smith", title = "Towards a program logic for {JavaScript}", journal = j-SIGPLAN, volume = "47", number = "1", pages = "31--44", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103663", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript has become the most widely used language for client-side web programming. The dynamic nature of JavaScript makes understanding its code notoriously difficult, leading to buggy programs and a lack of adequate static-analysis tools. We believe that logical reasoning has much to offer JavaScript: a simple description of program behaviour, a clear understanding of module boundaries, and the ability to verify security contracts. We introduce a program logic for reasoning about a broad subset of JavaScript, including challenging features such as prototype inheritance and `with'. We adapt ideas from separation logic to provide tractable reasoning about JavaScript code: reasoning about easy programs is easy; reasoning about hard programs is possible. We prove a strong soundness result. All libraries written in our subset and proved correct with respect to their specifications will be well-behaved, even when called by arbitrary JavaScript code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Krishnaswami:2012:HOF, author = "Neelakantan R. Krishnaswami and Nick Benton and Jan Hoffmann", title = "Higher-order functional reactive programming in bounded space", journal = j-SIGPLAN, volume = "47", number = "1", pages = "45--58", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103665", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional reactive programming (FRP) is an elegant and successful approach to programming reactive systems declaratively. The high levels of abstraction and expressivity that make FRP attractive as a programming model do, however, often lead to programs whose resource usage is excessive and hard to predict. In this paper, we address the problem of space leaks in discrete-time functional reactive programs. We present a functional reactive programming language that statically bounds the size of the dataflow graph a reactive program creates, while still permitting use of higher-order functions and higher-type streams such as streams of streams. We achieve this with a novel linear type theory that both controls allocation and ensures that all recursive definitions are well-founded. We also give a denotational semantics for our language by combining recent work on metric spaces for the interpretation of higher-order causal functions with length-space models of space-bounded computation. The resulting category is doubly closed and hence forms a model of the logic of bunched implications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Hur:2012:MBK, author = "Chung-Kil Hur and Derek Dreyer and Georg Neis and Viktor Vafeiadis", title = "The marriage of bisimulations and {Kripke} logical relations", journal = j-SIGPLAN, volume = "47", number = "1", pages = "59--72", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103666", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There has been great progress in recent years on developing effective techniques for reasoning about program equivalence in ML-like languages---that is, languages that combine features like higher-order functions, recursive types, abstract types, and general mutable references. Two of the most prominent types of techniques to have emerged are *bisimulations* and *Kripke logical relations (KLRs)*. While both approaches are powerful, their complementary advantages have led us and other researchers to wonder whether there is an essential tradeoff between them. Furthermore, both approaches seem to suffer from fundamental limitations if one is interested in scaling them to inter-language reasoning. In this paper, we propose *relation transition systems (RTSs)*, which marry together some of the most appealing aspects of KLRs and bisimulations. In particular, RTSs show how bisimulations' support for reasoning about recursive features via *coinduction* can be synthesized with KLRs' support for reasoning about local state via *state transition systems*. Moreover, we have designed RTSs to avoid the limitations of KLRs and bisimulations that preclude their generalization to inter-language reasoning. Notably, unlike KLRs, RTSs are transitively composable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{James:2012:IE, author = "Roshan P. James and Amr Sabry", title = "Information effects", journal = j-SIGPLAN, volume = "47", number = "1", pages = "73--84", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103667", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computation is a physical process which, like all other physical processes, is fundamentally reversible. From the notion of type isomorphisms, we derive a typed, universal, and reversible computational model in which information is treated as a linear resource that can neither be duplicated nor erased. We use this model as a semantic foundation for computation and show that the `gap' between conventional irreversible computation and logically reversible computation can be captured by a type-and-effect system. Our type-and-effect system is structured as an arrow metalanguage that exposes creation and erasure of information as explicit effect operations. Irreversible computations arise from interactions with an implicit information environment, thus making them a derived notion, much like open systems in Physics. We sketch several applications which can benefit from an explicit treatment of information effects, such as quantitative information-flow security and differential privacy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Yang:2012:LAE, author = "Jean Yang and Kuat Yessenov and Armando Solar-Lezama", title = "A language for automatically enforcing privacy policies", journal = j-SIGPLAN, volume = "47", number = "1", pages = "85--96", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103669", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is becoming increasingly important for applications to protect sensitive data. With current techniques, the programmer bears the burden of ensuring that the application's behavior adheres to policies about where sensitive values may flow. Unfortunately, privacy policies are difficult to manage because their global nature requires coordinated reasoning and enforcement. To address this problem, we describe a programming model that makes the system responsible for ensuring adherence to privacy policies. The programming model has two components: (1) core programs describing functionality independent of privacy concerns and (2) declarative, decentralized policies controlling how sensitive values are disclosed. Each sensitive value encapsulates multiple views; policies describe which views are allowed based on the output context. The system is responsible for automatically ensuring that outputs are consistent with the policies. We have implemented this programming model in a new functional constraint language named Jeeves. In Jeeves, sensitive values are introduced as symbolic variables and policies correspond to constraints that are resolved at output channels. We have implemented Jeeves as a Scala library using an SMT solver as a model finder. In this paper we describe the dynamic and static semantics of Jeeves and the properties about policy enforcement that the semantics guarantees. We also describe our experience implementing a conference management system and a social network.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Barthe:2012:PRR, author = "Gilles Barthe and Boris K{\"o}pf and Federico Olmedo and Santiago Zanella B{\'e}guelin", title = "Probabilistic relational reasoning for differential privacy", journal = j-SIGPLAN, volume = "47", number = "1", pages = "97--110", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103670", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Differential privacy is a notion of confidentiality that protects the privacy of individuals while allowing useful computations on their private data. Deriving differential privacy guarantees for real programs is a difficult and error-prone task that calls for principled approaches and tool support. Approaches based on linear types and static analysis have recently emerged; however, an increasing number of programs achieve privacy using techniques that cannot be analyzed by these approaches. Examples include programs that aim for weaker, approximate differential privacy guarantees, programs that use the Exponential mechanism, and randomized programs that achieve differential privacy without using any standard mechanism. Providing support for reasoning about the privacy of such programs has been an open problem. We report on CertiPriv, a machine-checked framework for reasoning about differential privacy built on top of the Coq proof assistant. The central component of CertiPriv is a quantitative extension of a probabilistic relational Hoare logic that enables one to derive differential privacy guarantees for programs from first principles. We demonstrate the expressiveness of CertiPriv using a number of examples whose formal analysis is out of the reach of previous techniques. In particular, we provide the first machine-checked proofs of correctness of the Laplacian and Exponential mechanisms and of the privacy of randomized and streaming algorithms from the recent literature.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Heidegger:2012:APC, author = "Phillip Heidegger and Annette Bieniusa and Peter Thiemann", title = "Access permission contracts for scripting languages", journal = j-SIGPLAN, volume = "47", number = "1", pages = "111--122", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103671", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ideal software contract fully specifies the behavior of an operation. Often, in particular in the context of scripting languages, a full specification may be cumbersome to state and may not even be desired. In such cases, a partial specification, which describes selected aspects of the behavior, may be used to raise the confidence in an implementation of the operation to a reasonable level. We propose a novel kind of contract for object-based languages that specifies the side effects of an operation with access permissions. An access permission contract uses sets of access paths to express read and write permissions for the properties of the objects accessible from the operation. We specify a monitoring semantics for access permission contracts and implement this semantics in a contract system for JavaScript. We prove soundness and stability of violation under increasing aliasing for our semantics. Applications of access permission contracts include enforcing modularity, test-driven development, program understanding, and regression testing. With respect to testing and understanding, we find that adding access permissions to contracts increases the effectiveness of error detection through contract monitoring by 6-13\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Madhusudan:2012:RPI, author = "Parthasarathy Madhusudan and Xiaokang Qiu and Andrei Stefanescu", title = "Recursive proofs for inductive tree data-structures", journal = j-SIGPLAN, volume = "47", number = "1", pages = "123--136", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103673", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop logical mechanisms and procedures to facilitate the verification of full functional properties of inductive tree data-structures using recursion that are sound, incomplete, but terminating. Our contribution rests in a new extension of first-order logic with recursive definitions called Dryad, a syntactical restriction on pre- and post-conditions of recursive imperative programs using Dryad, and a systematic methodology for accurately unfolding the footprint on the heap uncovered by the program that leads to finding simple recursive proofs using formula abstraction and calls to SMT solvers. We evaluate our methodology empirically and show that several complex tree data-structure algorithms can be checked against full functional specifications automatically, given pre- and post-conditions. This results in the first automatic terminating methodology for proving a wide variety of annotated algorithms on tree data-structures correct, including max-heaps, treaps, red-black trees, AVL trees, binomial heaps, and B-trees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Veanes:2012:SFS, author = "Margus Veanes and Pieter Hooimeijer and Benjamin Livshits and David Molnar and Nikolaj Bjorner", title = "Symbolic finite state transducers: algorithms and applications", journal = j-SIGPLAN, volume = "47", number = "1", pages = "137--150", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103674", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Finite automata and finite transducers are used in a wide range of applications in software engineering, from regular expressions to specification languages. We extend these classic objects with symbolic alphabets represented as parametric theories. Admitting potentially infinite alphabets makes this representation strictly more general and succinct than classical finite transducers and automata over strings. Despite this, the main operations, including composition, checking that a transducer is single-valued, and equivalence checking for single-valued symbolic finite transducers are effective given a decision procedure for the background theory. We provide novel algorithms for these operations and extend composition to symbolic transducers augmented with registers. Our base algorithms are unusual in that they are nonconstructive, therefore, we also supply a separate model generation algorithm that can quickly find counterexamples in the case two symbolic finite transducers are not equivalent. The algorithms give rise to a complete decidable algebra of symbolic transducers. Unlike previous work, we do not need any syntactic restriction of the formulas on the transitions, only a decision procedure. In practice we leverage recent advances in satisfiability modulo theory (SMT) solvers. We demonstrate our techniques on four case studies, covering a wide range of applications. Our techniques can synthesize string pre-images in excess of 8,000 bytes in roughly a minute, and we find that our new encodings significantly outperform previous techniques in succinctness and speed of analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Koksal:2012:CC, author = "Ali Sinan K{\"o}ksal and Viktor Kuncak and Philippe Suter", title = "Constraints as control", journal = j-SIGPLAN, volume = "47", number = "1", pages = "151--164", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103675", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an extension of Scala that supports constraint programming over bounded and unbounded domains. The resulting language, Kaplan, provides the benefits of constraint programming while preserving the existing features of Scala. Kaplan integrates constraint and imperative programming by using constraints as an advanced control structure; the developers use the monadic 'for' construct to iterate over the solutions of constraints or branch on the existence of a solution. The constructs we introduce have simple semantics that can be understood as explicit enumeration of values, but are implemented more efficiently using symbolic reasoning. Kaplan programs can manipulate constraints at run-time, with the combined benefits of type-safe syntax trees and first-class functions. The language of constraints is a functional subset of Scala, supporting arbitrary recursive function definitions over algebraic data types, sets, maps, and integers. Our implementation runs on a platform combining a constraint solver with a standard virtual machine. For constraint solving we use an algorithm that handles recursive function definitions through fair function unrolling and builds upon the state-of-the art SMT solver Z3. We evaluate Kaplan on examples ranging from enumeration of data structures to execution of declarative specifications. We found Kaplan promising because it is expressive, supporting a range of problem domains, while enabling full-speed execution of programs that do not rely on constraint programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Austin:2012:MFD, author = "Thomas H. Austin and Cormac Flanagan", title = "Multiple facets for dynamic information flow", journal = j-SIGPLAN, volume = "47", number = "1", pages = "165--178", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103677", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript has become a central technology of the web, but it is also the source of many security problems, including cross-site scripting attacks and malicious advertising code. Central to these problems is the fact that code from untrusted sources runs with full privileges. We implement information flow controls in Firefox to help prevent violations of data confidentiality and integrity. Most previous information flow techniques have primarily relied on either static type systems, which are a poor fit for JavaScript, or on dynamic analyses that sometimes get stuck due to problematic implicit flows, even in situations where the target web application correctly satisfies the desired security policy. We introduce faceted values, a new mechanism for providing information flow security in a dynamic manner that overcomes these limitations. Taking inspiration from secure multi-execution, we use faceted values to simultaneously and efficiently simulate multiple executions for different security levels, thus providing non-interference with minimal overhead, and without the reliance on the stuck executions of prior dynamic approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Ray:2012:DCI, author = "Donald Ray and Jay Ligatti", title = "Defining code-injection attacks", journal = j-SIGPLAN, volume = "47", number = "1", pages = "179--190", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103678", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper shows that existing definitions of code-injection attacks (e.g., SQL-injection attacks) are flawed. The flaws make it possible for attackers to circumvent existing mechanisms, by supplying code-injecting inputs that are not recognized as such. The flaws also make it possible for benign inputs to be treated as attacks. After describing these flaws in conventional definitions of code-injection attacks, this paper proposes a new definition, which is based on whether the symbols input to an application get used as (normal-form) values in the application's output. Because values are already fully evaluated, they cannot be considered `code' when injected. This simple new definition of code-injection attacks avoids the problems of existing definitions, improves our understanding of how and when such attacks occur, and enables us to evaluate the effectiveness of mechanisms for mitigating such attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Basu:2012:DCR, author = "Samik Basu and Tevfik Bultan and Meriem Ouederni", title = "Deciding choreography realizability", journal = j-SIGPLAN, volume = "47", number = "1", pages = "191--202", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103680", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Since software systems are becoming increasingly more concurrent and distributed, modeling and analysis of interactions among their components is a crucial problem. In several application domains, message-based communication is used as the interaction mechanism, and the communication contract among the components of the system is specified semantically as a state machine. In the service-oriented computing domain such communication contracts are called `choreography' specifications. A choreography specification identifies allowable ordering of message exchanges in a distributed system. A fundamental question about a choreography specification is determining its realizability, i.e., given a choreography specification, is it possible to build a distributed system that communicates exactly as the choreography specifies? Checking realizability of choreography specifications has been an open problem for several years and it was not known if this was a decidable problem. In this paper we give necessary and sufficient conditions for realizability of choreographies. We implemented the proposed realizability check and our experiments show that it can efficiently determine the realizability of (1) web service choreographies, (2) Singularity OS channel contracts, and (3) UML collaboration (communication) diagrams.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Bouajjani:2012:ARP, author = "Ahmed Bouajjani and Michael Emmi", title = "Analysis of recursively parallel programs", journal = j-SIGPLAN, volume = "47", number = "1", pages = "203--214", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103681", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a general formal model of isolated hierarchical parallel computations, and identify several fragments to match the concurrency constructs present in real-world programming languages such as Cilk and X10. By associating fundamental formal models (vector addition systems with recursive transitions) to each fragment, we provide a common platform for exposing the relative difficulties of algorithmic reasoning. For each case we measure the complexity of deciding state-reachability for finite-data recursive programs, and propose algorithms for the decidable cases. The complexities which include PTIME, NP, EXPSPACE, and 2EXPTIME contrast with undecidable state-reachability for recursive multi-threaded programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Rexford:2012:PLP, author = "Jennifer Rexford", title = "Programming languages for programmable networks", journal = j-SIGPLAN, volume = "47", number = "1", pages = "215--216", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103683", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's computer networks perform a bewildering array of tasks, from routing and access control, to traffic monitoring and load balancing. To support wireless users accessing services hosted in the cloud, enterprise and data-center networks are under increasing pressure to support client mobility, virtual-machine migration, resource isolation between cloud services, and energy-efficient operation. Yet, network administrators must configure the network through closed and proprietary interfaces to heterogeneous devices, such as routers, switches, firewalls, load balancers, network address translators, and intrusion detection systems. Not surprisingly, configuring these complex networks is expensive and error-prone, and innovation in network management proceeds at a snail's pace. During the past several years, the networking industry and research community have pushed for greater openness in networking software, and a clearer separation between networking devices and the software that controls them. This broad trend is known as Software Defined Networking (SDN). A hallmark of SDN is having an open interface for controller software running on a commodity computer to install packet-processing rules in the underlying switches. In particular, the OpenFlow protocol (see www.openflow.org) has significant momentum. Many commercial switches support OpenFlow, and a number of campus, data-center, and backbone networks have deployed the new technology. With the emergence of open interfaces to network devices, the time is ripe to rethink the design of network software, to put networking on a stronger foundation and foster innovation in networked services. The programming languages community can play a vital role in this transformation, by creating languages, compilers, run-time systems, and testing and verification techniques that raise the level of abstraction for programming the network. In this talk, we give an overview of Software Defined Networking, and survey the early programming-languages research in this area. We also outline exciting opportunities for interdisciplinary research at the intersection of programming languages and computer networks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Monsanto:2012:CRT, author = "Christopher Monsanto and Nate Foster and Rob Harrison and David Walker", title = "A compiler and run-time system for network programming languages", journal = j-SIGPLAN, volume = "47", number = "1", pages = "217--230", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103685", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-defined networks (SDNs) are a new kind of network architecture in which a controller machine manages a distributed collection of switches by instructing them to install or uninstall packet-forwarding rules and report traffic statistics. The recently formed Open Networking Consortium, whose members include Google, Facebook, Microsoft, Verizon, and others, hopes to use this architecture to transform the way that enterprise and data center networks are implemented. In this paper, we define a high-level, declarative language, called NetCore, for expressing packet-forwarding policies on SDNs. NetCore is expressive, compositional, and has a formal semantics. To ensure that a majority of packets are processed efficiently on switches---instead of on the controller---we present new compilation algorithms for NetCore and couple them with a new run-time system that issues rule installation commands and traffic-statistics queries to switches. Together, the compiler and run-time system generate efficient rules whenever possible and outperform the simple, manual techniques commonly used to program SDNs today. In addition, the algorithms we develop are generic, assuming only that the packet-matching capabilities available on switches satisfy some basic algebraic laws. Overall, this paper delivers a new design for a high-level network programming language; an improved set of compiler algorithms; a new run-time system for SDN architectures; the first formal semantics and proofs of correctness in this domain; and an implementation and evaluation that demonstrates the performance benefits over traditional manual techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Chugh:2012:NRL, author = "Ravi Chugh and Patrick M. Rondon and Ranjit Jhala", title = "Nested refinements: a logic for duck typing", journal = j-SIGPLAN, volume = "47", number = "1", pages = "231--244", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103686", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programs written in dynamic languages make heavy use of features --- run-time type tests, value-indexed dictionaries, polymorphism, and higher-order functions --- that are beyond the reach of type systems that employ either purely syntactic or purely semantic reasoning. We present a core calculus, System D, that merges these two modes of reasoning into a single powerful mechanism of nested refinement types wherein the typing relation is itself a predicate in the refinement logic. System D coordinates SMT-based logical implication and syntactic subtyping to automatically typecheck sophisticated dynamic language programs. By coupling nested refinements with McCarthy's theory of finite maps, System D can precisely reason about the interaction of higher-order functions, polymorphism, and dictionaries. The addition of type predicates to the refinement logic creates a circularity that leads to unique technical challenges in the metatheory, which we solve with a novel stratification approach that we use to prove the soundness of System D.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Cousot:2012:AIFa, author = "Patrick Cousot and Radhia Cousot", title = "An abstract interpretation framework for termination", journal = j-SIGPLAN, volume = "47", number = "1", pages = "245--258", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103687", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Proof, verification and analysis methods for termination all rely on two induction principles: (1) a variant function or induction on data ensuring progress towards the end and (2) some form of induction on the program structure. The abstract interpretation design principle is first illustrated for the design of new forward and backward proof, verification and analysis methods for safety. The safety collecting semantics defining the strongest safety property of programs is first expressed in a constructive fixpoint form. Safety proof and checking/verification methods then immediately follow by fixpoint induction. Static analysis of abstract safety properties such as invariance are constructively designed by fixpoint abstraction (or approximation) to (automatically) infer safety properties. So far, no such clear design principle did exist for termination so that the existing approaches are scattered and largely not comparable with each other. For (1), we show that this design principle applies equally well to potential and definite termination. The trace-based termination collecting semantics is given a fixpoint definition. Its abstraction yields a fixpoint definition of the best variant function. By further abstraction of this best variant function, we derive the Floyd/Turing termination proof method as well as new static analysis methods to effectively compute approximations of this best variant function. For (2), we introduce a generalization of the syntactic notion of structural induction (as found in Hoare logic) into a semantic structural induction based on the new semantic concept of inductive trace cover covering execution traces by segments, a new basis for formulating program properties. Its abstractions allow for generalized recursive proof, verification and static analysis methods by induction on both program structure, control, and data. Examples of particular instances include Floyd's handling of loop cutpoints as well as nested loops, Burstall's intermittent assertion total correctness proof method, and Podelski-Rybalchenko transition invariants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Hoder:2012:PGA, author = "Krystof Hoder and Laura Kovacs and Andrei Voronkov", title = "Playing in the grey area of proofs", journal = j-SIGPLAN, volume = "47", number = "1", pages = "259--272", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103689", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interpolation is an important technique in verification and static analysis of programs. In particular, interpolants extracted from proofs of various properties are used in invariant generation and bounded model checking. A number of recent papers studies interpolation in various theories and also extraction of smaller interpolants from proofs. In particular, there are several algorithms for extracting of interpolants from so-called local proofs. The main contribution of this paper is a technique of minimising interpolants based on transformations of what we call the `grey area' of local proofs. Another contribution is a technique of transforming, under certain common conditions, arbitrary proofs into local ones. Unlike many other interpolation techniques, our technique is very general and applies to arbitrary theories. Our approach is implemented in the theorem prover Vampire and evaluated on a large number of benchmarks coming from first-order theorem proving and bounded model checking using logic with equality, uninterpreted functions and linear integer arithmetic. Our experiments demonstrate the power of the new techniques: for example, it is not unusual that our proof transformation gives more than a tenfold reduction in the size of interpolants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Stampoulis:2012:SUE, author = "Antonis Stampoulis and Zhong Shao", title = "Static and user-extensible proof checking", journal = j-SIGPLAN, volume = "47", number = "1", pages = "273--284", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103690", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite recent successes, large-scale proof development within proof assistants remains an arcane art that is extremely time-consuming. We argue that this can be attributed to two profound shortcomings in the architecture of modern proof assistants. The first is that proofs need to include a large amount of minute detail; this is due to the rigidity of the proof checking process, which cannot be extended with domain-specific knowledge. In order to avoid these details, we rely on developing and using tactics, specialized procedures that produce proofs. Unfortunately, tactics are both hard to write and hard to use, revealing the second shortcoming of modern proof assistants. This is because there is no static knowledge about their expected use and behavior. As has recently been demonstrated, languages that allow type-safe manipulation of proofs, like Beluga, Delphin and VeriML, can be used to partly mitigate this second issue, by assigning rich types to tactics. Still, the architectural issues remain. In this paper, we build on this existing work, and demonstrate two novel ideas: an extensible conversion rule and support for static proof scripts. Together, these ideas enable us to support both user-extensible proof checking, and sophisticated static checking of tactics, leading to a new point in the design space of future proof assistants. Both ideas are based on the interplay between a light-weight staging construct and the rich type information available.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Klein:2012:RYR, author = "Casey Klein and John Clements and Christos Dimoulas and Carl Eastlund and Matthias Felleisen and Matthew Flatt and Jay A. McCarthy and Jon Rafkind and Sam Tobin-Hochstadt and Robert Bruce Findler", title = "Run your research: on the effectiveness of lightweight mechanization", journal = j-SIGPLAN, volume = "47", number = "1", pages = "285--296", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103691", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Formal models serve in many roles in the programming language community. In its primary role, a model communicates the idea of a language design; the architecture of a language tool; or the essence of a program analysis. No matter which role it plays, however, a faulty model doesn't serve its purpose. One way to eliminate flaws from a model is to write it down in a mechanized formal language. It is then possible to state theorems about the model, to prove them, and to check the proofs. Over the past nine years, PLT has developed and explored a lightweight version of this approach, dubbed Redex. In a nutshell, Redex is a domain-specific language for semantic models that is embedded in the Racket programming language. The effort of creating a model in Redex is often no more burdensome than typesetting it with LaTeX; the difference is that Redex comes with tools for the semantics engineering life cycle.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Farzan:2012:VPC, author = "Azadeh Farzan and Zachary Kincaid", title = "Verification of parameterized concurrent programs by modular reasoning about data and control", journal = j-SIGPLAN, volume = "47", number = "1", pages = "297--308", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103693", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we consider the problem of verifying thread-state properties of multithreaded programs in which the number of active threads cannot be statically bounded. Our approach is based on decomposing the task into two modules, where one reasons about data and the other reasons about control. The data module computes thread-state invariants (e.g., linear constraints over global variables and local variables of one thread) using the thread interference information computed by the control module. The control module computes a representation of thread interference, as an incrementally constructed data flow graph, using the data invariants provided by the data module. These invariants are used to rule out patterns of thread interference that can not occur in a real program execution. The two modules are incorporated into a feedback loop, so that the abstractions of data and interference are iteratively coarsened as the algorithm progresses (that is, they become weaker) until a fixed point is reached. Our approach is sound and terminating, and applicable to programs with infinite state (e.g., unbounded integers) and unboundedly many threads. The verification method presented in this paper has been implemented into a tool, called Duet. We demonstrate the effectiveness of our technique by verifying properties of a selection of Linux device drivers using Duet, and also compare Duet with previous work on verification of parameterized Boolean program using the Boolean abstractions of these drivers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Botincan:2012:RSS, author = "Matko Botincan and Mike Dodds and Suresh Jagannathan", title = "Resource-sensitive synchronization inference by abduction", journal = j-SIGPLAN, volume = "47", number = "1", pages = "309--322", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103694", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an analysis which takes as its input a sequential program, augmented with annotations indicating potential parallelization opportunities, and a sequential proof, written in separation logic, and produces a correctly-synchronized parallelized program and proof of that program. Unlike previous work, ours is not an independence analysis; we insert synchronization constructs to preserve relevant dependencies found in the sequential program that may otherwise be violated by a naive translation. Separation logic allows us to parallelize fine-grained patterns of resource-usage, moving beyond straightforward points-to analysis. Our analysis works by using the sequential proof to discover dependencies between different parts of the program. It leverages these discovered dependencies to guide the insertion of synchronization primitives into the parallelized program, and to ensure that the resulting parallelized program satisfies the same specification as the original sequential program, and exhibits the same sequential behaviour. Our analysis is built using frame inference and abduction, two techniques supported by an increasing number of separation logic tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Reddy:2012:SCI, author = "Uday S. Reddy and John C. Reynolds", title = "Syntactic control of interference for separation logic", journal = j-SIGPLAN, volume = "47", number = "1", pages = "323--336", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103695", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Separation Logic has witnessed tremendous success in recent years in reasoning about programs that deal with heap storage. Its success owes to the fundamental principle that one should keep separate areas of the heap storage separate in program reasoning. However, the way Separation Logic deals with program variables continues to be based on traditional Hoare Logic without taking any benefit of the separation principle. This has led to unwieldy proof rules suffering from lack of clarity as well as questions surrounding their soundness. In this paper, we extend the separation idea to the treatment of variables in Separation Logic, especially Concurrent Separation Logic, using the system of Syntactic Control of Interference proposed by Reynolds in 1978. We extend the original system with permission algebras, making it more powerful and able to deal with the issues of concurrent programs. The result is a streamined presentation of Concurrent Separation Logic, whose rules are memorable and soundness obvious. We also include a discussion of how the new rules impact the semantics and devise static analysis techniques to infer the required permissions automatically.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Licata:2012:CDT, author = "Daniel R. Licata and Robert Harper", title = "Canonicity for $2$-dimensional type theory", journal = j-SIGPLAN, volume = "47", number = "1", pages = "337--348", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103697", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Higher-dimensional dependent type theory enriches conventional one-dimensional dependent type theory with additional structure expressing equivalence of elements of a type. This structure may be employed in a variety of ways to capture rather coarse identifications of elements, such as a universe of sets considered modulo isomorphism. Equivalence must be respected by all families of types and terms, as witnessed computationally by a type-generic program. Higher-dimensional type theory has applications to code reuse for dependently typed programming, and to the formalization of mathematics. In this paper, we develop a novel judgemental formulation of a two-dimensional type theory, which enjoys a canonicity property: a closed term of boolean type is definitionally equal to true or false. Canonicity is a necessary condition for a computational interpretation of type theory as a programming language, and does not hold for existing axiomatic presentations of higher-dimensional type theory. The method of proof is a generalization of the NuPRL semantics, interpreting types as syntactic groupoids rather than equivalence relations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Kammar:2012:AFE, author = "Ohad Kammar and Gordon D. Plotkin", title = "Algebraic foundations for effect-dependent optimisations", journal = j-SIGPLAN, volume = "47", number = "1", pages = "349--360", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103698", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a general theory of Gifford-style type and effect annotations, where effect annotations are sets of effects. Generality is achieved by recourse to the theory of algebraic effects, a development of Moggi's monadic theory of computational effects that emphasises the operations causing the effects at hand and their equational theory. The key observation is that annotation effects can be identified with operation symbols. We develop an annotated version of Levy's Call-by-Push-Value language with a kind of computations for every effect set; it can be thought of as a sequential, annotated intermediate language. We develop a range of validated optimisations (i.e., equivalences), generalising many existing ones and adding new ones. We classify these optimisations as structural, algebraic, or abstract: structural optimisations always hold; algebraic ones depend on the effect theory at hand; and abstract ones depend on the global nature of that theory (we give modularly-checkable sufficient conditions for their validity).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Cretin:2012:PCA, author = "Julien Cretin and Didier R{\'e}my", title = "On the power of coercion abstraction", journal = j-SIGPLAN, volume = "47", number = "1", pages = "361--372", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103699", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Erasable coercions in System F-eta, also known as retyping functions, are well-typed eta-expansions of the identity. They may change the type of terms without changing their behavior and can thus be erased before reduction. Coercions in F-eta can model subtyping of known types and some displacement of quantifiers, but not subtyping assumptions nor certain forms of delayed type instantiation. We generalize F-eta by allowing abstraction over retyping functions. We follow a general approach where computing with coercions can be seen as computing in the lambda-calculus but keeping track of which parts of terms are coercions. We obtain a language where coercions do not contribute to the reduction but may block it and are thus not erasable. We recover erasable coercions by choosing a weak reduction strategy and restricting coercion abstraction to value-forms or by restricting abstraction to coercions that are polymorphic in their domain or codomain. The latter variant subsumes F-eta, F-sub, and MLF in a unified framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Naik:2012:AT, author = "Mayur Naik and Hongseok Yang and Ghila Castelnuovo and Mooly Sagiv", title = "Abstractions from tests", journal = j-SIGPLAN, volume = "47", number = "1", pages = "373--386", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103701", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a framework for leveraging dynamic analysis to find good abstractions for static analysis. A static analysis in our framework is parametrised. Our main insight is to directly and efficiently compute from a concrete trace, a necessary condition on the parameter configurations to prove a given query, and thereby prune the space of parameter configurations that the static analysis must consider. We provide constructive algorithms for two instance analyses in our framework: a flow- and context-sensitive thread-escape analysis and a flow- and context-insensitive points-to analysis. We show the efficacy of these analyses, and our approach, on six Java programs comprising two million bytecodes: the thread-escape analysis resolves 80\% of queries on average, disproving 28\% and proving 52\%; the points-to analysis resolves 99\% of queries on average, disproving 29\% and proving 70\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Smaragdakis:2012:SPR, author = "Yannis Smaragdakis and Jacob Evans and Caitlin Sadowski and Jaeheon Yi and Cormac Flanagan", title = "Sound predictive race detection in polynomial time", journal = j-SIGPLAN, volume = "47", number = "1", pages = "387--400", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103702", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data races are among the most reliable indicators of programming errors in concurrent software. For at least two decades, Lamport's happens-before (HB) relation has served as the standard test for detecting races--other techniques, such as lockset-based approaches, fail to be sound, as they may falsely warn of races. This work introduces a new relation, causally-precedes (CP), which generalizes happens-before to observe more races without sacrificing soundness. Intuitively, CP tries to capture the concept of happens-before ordered events that must occur in the observed order for the program to observe the same values. What distinguishes CP from past predictive race detection approaches (which also generalize an observed execution to detect races in other plausible executions) is that CP-based race detection is both sound and of polynomial complexity. We demonstrate that the unique aspects of CP result in practical benefit. Applying CP to real-world programs, we successfully analyze server-level applications (e.g., Apache FtpServer) and show that traces longer than in past predictive race analyses can be analyzed in mere seconds to a few minutes. For these programs, CP race detection uncovers races that are hard to detect by repeated execution and HB race detection: a single run of CP race detection produces several races not discovered by 10 separate rounds of happens-before race detection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Bojanczyk:2012:TNC, author = "Mikolaj Bojanczyk and Laurent Braud and Bartek Klin and Slawomir Lasota", title = "Towards nominal computation", journal = j-SIGPLAN, volume = "47", number = "1", pages = "401--412", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103704", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nominal sets are a different kind of set theory, with a more relaxed notion of finiteness. They offer an elegant formalism for describing lambda-terms modulo alpha-conversion, or automata on data words. This paper is an attempt at defining computation in nominal sets. We present a rudimentary programming language, called Nlambda. The key idea is that it includes a native type for finite sets in the nominal sense. To illustrate the power of our language, we write short programs that process automata on data words.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Cave:2012:PBI, author = "Andrew Cave and Brigitte Pientka", title = "Programming with binders and indexed data-types", journal = j-SIGPLAN, volume = "47", number = "1", pages = "413--424", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103705", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show how to combine a general purpose type system for an existing language with support for programming with binders and contexts by refining the type system of ML with a restricted form of dependent types where index objects are drawn from contextual LF. This allows the user to specify formal systems within the logical framework LF and index ML types with contextual LF objects. Our language design keeps the index language generic only requiring decidability of equality of the index language providing a modular design. To illustrate the elegance and effectiveness of our language, we give programs for closure conversion and normalization by evaluation. Our three key technical contribution are: (1) We give a bi-directional type system for our core language which is centered around refinement substitutions instead of constraint solving. As a consequence, type checking is decidable and easy to trust, although constraint solving may be undecidable. (2) We give a big-step environment based operational semantics with environments which lends itself to efficient implementation. (3) We prove our language to be type safe and have mechanized our theoretical development in the proof assistant Coq using the fresh approach to binding.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Moore:2012:MLF, author = "J. Strother Moore", title = "Meta-level features in an industrial-strength theorem prover", journal = j-SIGPLAN, volume = "47", number = "1", pages = "425--426", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103707", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ACL2 theorem prover---the current incarnation of `the' Boyer--Moore theorem prover---is a theorem prover for an extension of a first-order, applicative subset of Common Lisp. The ACL2 system provides a useful specification and modeling language as well as a useful mechanical theorem proving environment. ACL2 is in use at several major microprocessor manufacturers to verify functional correctness of important components of commercial designs. This talk explores the design of ACL2 and the tradeoffs that have turned out to be pivotal to its success.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Zhao:2012:FLI, author = "Jianzhou Zhao and Santosh Nagarakatte and Milo M. K. Martin and Steve Zdancewic", title = "Formalizing the {LLVM} intermediate representation for verified program transformations", journal = j-SIGPLAN, volume = "47", number = "1", pages = "427--440", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103709", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents Vellvm (verified LLVM), a framework for reasoning about programs expressed in LLVM's intermediate representation and transformations that operate on it. Vellvm provides a mechanized formal semantics of LLVM's intermediate representation, its type system, and properties of its SSA form. The framework is built using the Coq interactive theorem prover. It includes multiple operational semantics and proves relations among them to facilitate different reasoning styles and proof techniques. To validate Vellvm's design, we extract an interpreter from the Coq formal semantics that can execute programs from LLVM test suite and thus be compared against LLVM reference implementations. To demonstrate Vellvm's practicality, we formalize and verify a previously proposed transformation that hardens C programs against spatial memory safety violations. Vellvm's tools allow us to extract a new, verified implementation of the transformation pass that plugs into the real LLVM infrastructure; its performance is competitive with the non-verified, ad-hoc original.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Zhu:2012:RAA, author = "Zeyuan Allen Zhu and Sasa Misailovic and Jonathan A. Kelner and Martin Rinard", title = "Randomized accuracy-aware program transformations for efficient approximate computations", journal = j-SIGPLAN, volume = "47", number = "1", pages = "441--454", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103710", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the fact that approximate computations have come to dominate many areas of computer science, the field of program transformations has focused almost exclusively on traditional semantics-preserving transformations that do not attempt to exploit the opportunity, available in many computations, to acceptably trade off accuracy for benefits such as increased performance and reduced resource consumption. We present a model of computation for approximate computations and an algorithm for optimizing these computations. The algorithm works with two classes of transformations: substitution transformations (which select one of a number of available implementations for a given function, with each implementation offering a different combination of accuracy and resource consumption) and sampling transformations (which randomly discard some of the inputs to a given reduction). The algorithm produces a $ (1 + \epsilon) $ randomized approximation to the optimal randomized computation (which minimizes resource consumption subject to a probabilistic accuracy specification in the form of a maximum expected error or maximum error variance).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Liang:2012:RGB, author = "Hongjin Liang and Xinyu Feng and Ming Fu", title = "A rely-guarantee-based simulation for verifying concurrent program transformations", journal = j-SIGPLAN, volume = "47", number = "1", pages = "455--468", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103711", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Verifying program transformations usually requires proving that the resulting program (the target) refines or is equivalent to the original one (the source). However, the refinement relation between individual sequential threads cannot be preserved in general with the presence of parallel compositions, due to instruction reordering and the different granularities of atomic operations at the source and the target. On the other hand, the refinement relation defined based on fully abstract semantics of concurrent programs assumes arbitrary parallel environments, which is too strong and cannot be satisfied by many well-known transformations. In this paper, we propose a Rely-Guarantee-based Simulation (RGSim) to verify concurrent program transformations. The relation is parametrized with constraints of the environments that the source and the target programs may compose with. It considers the interference between threads and their environments, thus is less permissive than relations over sequential programs. It is compositional w.r.t. parallel compositions as long as the constraints are satisfied. Also, RGSim does not require semantics preservation under all environments, and can incorporate the assumptions about environments made by specific program transformations in the form of rely/guarantee conditions. We use RGSim to reason about optimizations and prove atomicity of concurrent objects. We also propose a general garbage collector verification framework based on RGSim, and verify the Boehm et al. concurrent mark-sweep GC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Balabonski:2012:UAF, author = "Thibaut Balabonski", title = "A unified approach to fully lazy sharing", journal = j-SIGPLAN, volume = "47", number = "1", pages = "469--480", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103713", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We give an axiomatic presentation of sharing-via-labelling for weak lambda-calculi, that makes it possible to formally compare many different approaches to fully lazy sharing, and obtain two important results. We prove that the known implementations of full laziness are all equivalent in terms of the number of beta-reductions performed, although they behave differently regarding the duplication of terms. We establish a link between the optimality theories of weak lambda-calculi and first-order rewriting systems by expressing fully lazy lambda-lifting in our framework, thus emphasizing the first-order essence of weak reduction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Rastogi:2012:IOG, author = "Aseem Rastogi and Avik Chaudhuri and Basil Hosmer", title = "The ins and outs of gradual type inference", journal = j-SIGPLAN, volume = "47", number = "1", pages = "481--494", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103714", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Gradual typing lets programmers evolve their dynamically typed programs by gradually adding explicit type annotations, which confer benefits like improved performance and fewer run-time failures. However, we argue that such evolution often requires a giant leap, and that type inference can offer a crucial missing step. If omitted type annotations are interpreted as unknown types, rather than the dynamic type, then static types can often be inferred, thereby removing unnecessary assumptions of the dynamic type. The remaining assumptions of the dynamic type may then be removed by either reasoning outside the static type system, or restructuring the code. We present a type inference algorithm that can improve the performance of existing gradually typed programs without introducing any new run-time failures. To account for dynamic typing, types that flow in to an unknown type are treated in a fundamentally different manner than types that flow out. Furthermore, in the interests of backward-compatibility, an escape analysis is conducted to decide which types are safe to infer. We have implemented our algorithm for ActionScript, and evaluated it on the SunSpider and V8 benchmark suites. We demonstrate that our algorithm can improve the performance of unannotated programs as well as recover most of the type annotations in annotated programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Hofmann:2012:EL, author = "Martin Hofmann and Benjamin Pierce and Daniel Wagner", title = "Edit lenses", journal = j-SIGPLAN, volume = "47", number = "1", pages = "495--508", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103715", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A lens is a bidirectional transformation between a pair of connected data structures, capable of translating an edit on one structure into an appropriate edit on the other. Many varieties of lenses have been studied, but none, to date, has offered a satisfactory treatment of how edits are represented. Many foundational accounts only consider edits of the form `overwrite the whole structure,' leading to poor behavior in many situations by failing to track the associations between corresponding parts of the structures when elements are inserted and deleted in ordered lists, for example. Other theories of lenses do maintain these associations, either by annotating the structures themselves with change information or using auxiliary data structures, but every extant theory assumes that the entire original source structure is part of the information passed to the lens. We offer a general theory of edit lenses, which work with descriptions of changes to structures, rather than with the structures themselves. We identify a simple notion of `editable structure'--a set of states plus a monoid of edits with a partial monoid action on the states--and construct a semantic space of lenses between such structures, with natural laws governing their behavior. We show how a range of constructions from earlier papers on `state-based' lenses can be carried out in this space, including composition, products, sums, list operations, etc. Further, we show how to construct edit lenses for arbitrary containers in the sense of Abbott, Altenkirch, and Ghani. Finally, we show that edit lenses refine a well-known formulation of state-based lenses, in the sense that every state-based lens gives rise to an edit lens over structures with a simple overwrite-only edit language, and conversely every edit lens on such structures gives rise to a state-based lens.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Batty:2012:CCC, author = "Mark Batty and Kayvan Memarian and Scott Owens and Susmit Sarkar and Peter Sewell", title = "Clarifying and compiling {C\slash C++} concurrency: from {C++11} to {POWER}", journal = j-SIGPLAN, volume = "47", number = "1", pages = "509--520", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103717", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The upcoming C and C++ revised standards add concurrency to the languages, for the first time, in the form of a subtle *relaxed memory model* (the *C++11 model*). This aims to permit compiler optimisation and to accommodate the differing relaxed-memory behaviours of mainstream multiprocessors, combining simple semantics for most code with high-performance *low-level atomics* for concurrency libraries. In this paper, we first establish two simpler but provably equivalent models for C++11, one for the full language and another for the subset without consume operations. Subsetting further to the fragment without low-level atomics, we identify a subtlety arising from atomic initialisation and prove that, under an additional condition, the model is equivalent to sequential consistency for race-free programs. We then prove our main result, the correctness of two proposed compilation schemes for the C++11 load and store concurrency primitives to Power assembly, having noted that an earlier proposal was flawed. (The main ideas apply also to ARM, which has a similar relaxed memory architecture.) This should inform the ongoing development of production compilers for C++11 and C1x, clarifies what properties of the machine architecture are required, and builds confidence in the C++11 and Power semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Ramananandro:2012:MSC, author = "Tahina Ramananandro and Gabriel {Dos Reis} and Xavier Leroy", title = "A mechanized semantics for {C++} object construction and destruction, with applications to resource management", journal = j-SIGPLAN, volume = "47", number = "1", pages = "521--532", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103718", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a formal operational semantics and its Coq mechanization for the C++ object model, featuring object construction and destruction, shared and repeated multiple inheritance, and virtual function call dispatch. These are key C++ language features for high-level system programming, in particular for predictable and reliable resource management. This paper is the first to present a formal mechanized account of the metatheory of construction and destruction in C++, and applications to popular programming techniques such as `resource acquisition is initialization'. We also report on irregularities and apparent contradictions in the ISO C++03 and C++11 standards.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Ellison:2012:EFS, author = "Chucky Ellison and Grigore Rosu", title = "An executable formal semantics of {C} with applications", journal = j-SIGPLAN, volume = "47", number = "1", pages = "533--544", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103719", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes an executable formal semantics of C. Being executable, the semantics has been thoroughly tested against the GCC torture test suite and successfully passes 99.2\% of 776 test programs. It is the most complete and thoroughly tested formal definition of C to date. The semantics yields an interpreter, debugger, state space search tool, and model checker `for free'. The semantics is shown capable of automatically finding program errors, both statically and at runtime. It is also used to enumerate nondeterministic behavior.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Bhat:2012:TTP, author = "Sooraj Bhat and Ashish Agarwal and Richard Vuduc and Alexander Gray", title = "A type theory for probability density functions", journal = j-SIGPLAN, volume = "47", number = "1", pages = "545--556", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103721", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There has been great interest in creating probabilistic programming languages to simplify the coding of statistical tasks; however, there still does not exist a formal language that simultaneously provides (1) continuous probability distributions, (2) the ability to naturally express custom probabilistic models, and (3) probability density functions (PDFs). This collection of features is necessary for mechanizing fundamental statistical techniques. We formalize the first probabilistic language that exhibits these features, and it serves as a foundational framework for extending the ideas to more general languages. Particularly novel are our type system for absolutely continuous (AC) distributions (those which permit PDFs) and our PDF calculation procedure, which calculates PDFs for a large class of AC distributions. Our formalization paves the way toward the rigorous encoding of powerful statistical reformulations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Naden:2012:TSB, author = "Karl Naden and Robert Bocchino and Jonathan Aldrich and Kevin Bierhoff", title = "A type system for borrowing permissions", journal = j-SIGPLAN, volume = "47", number = "1", pages = "557--570", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103722", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In object-oriented programming, unique permissions to object references are useful for checking correctness properties such as consistency of typestate and noninterference of concurrency. To be usable, unique permissions must be borrowed --- for example, one must be able to read a unique reference out of a field, use it for something, and put it back. While one can null out the field and later reassign it, this paradigm is ungainly and requires unnecessary writes, potentially hurting cache performance. Therefore, in practice borrowing must occur in the type system, without requiring memory updates. Previous systems support borrowing with external alias analysis and/or explicit programmer management of fractional permissions. While these approaches are powerful, they are also awkward and difficult for programmers to understand. We present an integrated language and type system with unique, immutable, and shared permissions, together with new local permissions that say that a reference may not be stored to the heap. Our system also includes change permissions such as unique {\tt >>} unique and unique {\tt >>} none that describe how permissions flow in and out of method formal parameters. Together, these features support common patterns of borrowing, including borrowing multiple local permissions from a unique reference and recovering the unique reference when the local permissions go out of scope, without any explicit management of fractions in the source language. All accounting of fractional permissions is done by the type system `under the hood.' We present the syntax and static and dynamic semantics of a formal core language and state soundness results. We also illustrate the utility and practicality of our design by using it to express several realistic examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{Strub:2012:SCB, author = "Pierre-Yves Strub and Nikhil Swamy and Cedric Fournet and Juan Chen", title = "Self-certification: bootstrapping certified typecheckers in {F*} with {Coq}", journal = j-SIGPLAN, volume = "47", number = "1", pages = "571--584", month = jan, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2103621.2103723", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Mar 15 18:16:55 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Well-established dependently-typed languages like Agda and Coq provide reliable ways to build and check formal proofs. Several other dependently-typed languages such as Aura, ATS, Cayenne, Epigram, F*, F7, Fine, Guru, PCML5, and Ur also explore reliable ways to develop and verify programs. All these languages shine in their own regard, but their implementations do not themselves enjoy the degree of safety provided by machine-checked verification. We propose a general technique called self-certification that allows a typechecker for a suitably expressive language to be certified for correctness. We have implemented this technique for F*, a dependently typed language on the {.NET} platform. Self-certification involves implementing a typechecker for F* in F*, while using all the conveniences F* provides for the compiler-writer (e.g., partiality, effects, implicit conversions, proof automation, libraries). This typechecker is given a specification (in F*) strong enough to ensure that it computes valid typing derivations. We obtain a typing derivation for the core typechecker by running it on itself, and we export it to Coq as a type-derivation certificate. By typechecking this derivation (in Coq) and applying the F* metatheory (also mechanized in Coq), we conclude that our type checker is correct. Once certified in this manner, the F* typechecker is emancipated from Coq.\par Self-certification leads to an efficient certification scheme --- we no longer depend on verifying certificates in Coq --- as well as a more broadly applicable one. For instance, the self-certified F* checker is suitable for use in adversarial settings where Coq is not intended for use, such as run-time certification of mobile code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '12 conference proceedings.", } @Article{DHondt:2012:ISS, author = "Theo D'Hondt", title = "An interpreter for server-side {HOP}", journal = j-SIGPLAN, volume = "47", number = "2", pages = "1--12", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047851", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "HOP is a Scheme-based multi-tier programming language for the Web. The client-side of a program is compiled to JavaScript, while the server-side is executed by a mix of natively compiled code and interpreted code. At the time where HOP programs were basic scripts, the performance of the server-side interpreter was not a concern; an inefficient interpreter was acceptable. As HOP expanded, HOP programs got larger and more complex. A more efficient interpreter was necessary. This new interpreter is described in this paper. It is compact, its whole implementation counting no more than 2.5 KLOC. It is more than twice faster than the old interpreter and consumes less than a third of its memory. Although it cannot compete with static or JIT native compilers, our experimental results show that it is amongst the fastest interpreters for dynamic languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Chang:2012:IOT, author = "Mason Chang and Bernd Mathiske and Edwin Smith and Avik Chaudhuri and Andreas Gal and Michael Bebenita and Christian Wimmer and Michael Franz", title = "The impact of optional type information on {JIT} compilation of dynamically typed languages", journal = j-SIGPLAN, volume = "47", number = "2", pages = "13--24", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047853", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Optionally typed languages enable direct performance comparisons between untyped and type annotated source code. We present a comprehensive performance evaluation of two different JIT compilers in the context of ActionScript, a production-quality optionally typed language. One JIT compiler is optimized for quick compilation rather than JIT compiled code performance. The second JIT compiler is a more aggressively optimizing compiler, performing both high-level and low-level optimizations. We evaluate both JIT compilers directly on the same benchmark suite, measuring their performance changes across fully typed, partially typed, and untyped code. Such evaluations are especially relevant to dynamically typed languages such as JavaScript, which are currently evaluating the idea of adding optional type annotations. We demonstrate that low-level optimizations rarely accelerate the program enough to pay back the investment into performing them in an optionally typed language. Our experiments and data demonstrate that high-level optimizations are required to improve performance by any significant amount.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Homescu:2012:HTJ, author = "Andrei Homescu and Alex Suhan", title = "{HappyJIT}: a tracing {JIT} compiler for {PHP}", journal = j-SIGPLAN, volume = "47", number = "2", pages = "25--36", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047854", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Current websites are a combination of server-generated dynamic content with client-side interactive programs. Dynamically --- typed languages have gained a lot of ground in both of these domains. The growth of Web 2.0 has introduced a myriad of websites which contain personalized content, which is specific to the user. PHP or Python programs generate the actual HTML page after querying a database and processing the results, which are then presented by the browser. It is becoming more and more vital to accelerate the execution of these programs, as this is a significant part of the total time needed to present the page to the user. This paper presents a novel interpreter for the PHP language written in RPython, which the PyPy translator then translates into C. The translator integrates into the interpreter a tracing just-in-time compiler which optimizes the hottest loops in the interpreted programs. We also describe a data model that supports all the data types in the PHP language, such as references and iterators. We evaluate the performance of this interpreter, showing that speedups up to a factor of 8 are observed using this approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Zhao:2012:PTI, author = "Tian Zhao", title = "Polymorphic type inference for scripting languages with object extensions", journal = j-SIGPLAN, volume = "47", number = "2", pages = "37--50", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047855", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a polymorphic type inference algorithm for a small subset of JavaScript. The goal is to prevent accessing undefined members of objects. We define a type system that allows explicit extension of objects through add operation and implicit extension through method calls. The type system also permits strong updates and unrestricted extensions to new objects. The type inference algorithm is modular so that each function definition is only analyzed once and larger programs can be checked incrementally.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Hirschfeld:2012:EUC, author = "Robert Hirschfeld and Michael Perscheid and Michael Haupt", title = "Explicit use-case representation in object-oriented programming languages", journal = j-SIGPLAN, volume = "47", number = "2", pages = "51--60", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047856", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Use-cases are considered an integral part of most contemporary development processes since they describe a software system's expected behavior from the perspective of its prospective users. However, the presence of and traceability to use-cases is increasingly lost in later more code-centric development activities. Use-cases, being well-encapsulated at the level of requirements descriptions, eventually lead to crosscutting concerns in system design and source code. Tracing which parts of the system contribute to which use-cases is therefore hard and so limits understandability. In this paper, we propose an approach to making use-cases first-class entities in both the programming language and the runtime environment. Having use-cases present in the code and the running system will allow developers, maintainers, and operators to easily associate their units of work with what matters to the users. We suggest the combination of use-cases, acceptance tests, and dynamic analysis to automatically associate source code with use-cases. We present UseCasePy, an implementation of our approach to use-case-centered development in Python, and its application to the Django Web framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Chevalier-Boisvert:2012:BSH, author = "Maxime Chevalier-Boisvert and Erick Lavoie and Marc Feeley and Bruno Dufour", title = "Bootstrapping a self-hosted research virtual machine for {JavaScript}: an experience report", journal = j-SIGPLAN, volume = "47", number = "2", pages = "61--72", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047858", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript is one of the most widely used dynamic languages. The performance of existing JavaScript VMs, however, is lower than that of VMs for static languages. There is a need for a research VM to easily explore new implementation approaches. This paper presents the Tachyon JavaScript VM which was designed to be flexible and to allow experimenting with new approaches for the execution of JavaScript. The Tachyon VM is itself implemented in JavaScript and currently supports a subset of the full language that is sufficient to bootstrap itself. The paper discusses the architecture of the system and in particular the bootstrapping of a self-hosted VM. Preliminary performance results indicate that our VM, with few optimizations, can already execute code faster than a commercial JavaScript interpreter on some benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Klock:2012:BLR, author = "Felix S. {Klock II} and William D. Clinger", title = "Bounded-latency regional garbage collection", journal = j-SIGPLAN, volume = "47", number = "2", pages = "73--84", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047859", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Regional garbage collection is scalable, with theoretical worst-case bounds for gc latency, MMU, and throughput that are independent of mutator behavior and the volume of reachable storage. Regional collection improves upon the worst-case pause times and MMU seen in most other general-purpose collectors, including garbage-first and concurrent mark\slash sweep collectors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Tew:2012:PAM, author = "Kevin Tew and James Swaine and Matthew Flatt and Robert Bruce Findler and Peter Dinda", title = "{Places}: adding message-passing parallelism to {Racket}", journal = j-SIGPLAN, volume = "47", number = "2", pages = "85--96", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047860", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Places bring new support for message-passing parallelism to Racket. This paper gives an overview of the programming model and how we had to modify our existing, sequential runtime-system to support places. We show that the freedom to design the programming model helped us to make the implementation tractable; specifically, we avoided the conventional pain of adding just the right amount of locking to a big, legacy runtime system. The paper presents an evaluation of the design that includes both a real-world application and standard parallel benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Stuchlik:2012:SVD, author = "Andreas Stuchlik and Stefan Hanenberg", title = "Static vs. dynamic type systems: an empirical study about the relationship between type casts and development time", journal = j-SIGPLAN, volume = "47", number = "2", pages = "97--106", month = feb, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2168696.2047861", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Apr 20 17:34:09 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static type systems are essential in computer science. However, there is hardly any knowledge about the impact of type systems on the resulting piece of software. While there are authors that state that static types increase the development speed, other authors argue the other way around. A previous experiment suggests that there are multiple factors that play a role for a comparison of statically and dynamically typed language. As a follow-up, this paper presents an empirical study with 21 subjects that compares programming tasks performed in Java and Groovy --- programming tasks where the number of expected type casts vary in the statically typed language. The result of the study is, that the dynamically typed group solved the complete programming tasks significantly faster for most tasks --- but that for larger tasks with a higher number of type casts no significant difference could be found.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DSL '11 conference proceedings.", } @Article{Schultz:2012:MCP, author = "Ulrik P. Schultz", title = "Multilingual component programming in {Racket}", journal = j-SIGPLAN, volume = "47", number = "3", pages = "1--2", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047864", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the world of Racket, software systems consist of inter-operating components in different programming languages. A component's implementation language may provide the full functionality of Racket, or it may support a small domain-specific notation. Naturally, Racketeers construct languages as Racket components and compose them to create new languages. This talk will present the ideas behind Racket: language-specific components, the composition of components, and, most importantly, the rich support for building languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Rosenmuller:2012:TDS, author = "Marko Rosenm{\"u}ller and Norbert Siegmund and Mario Pukall and Sven Apel", title = "Tailoring dynamic software product lines", journal = j-SIGPLAN, volume = "47", number = "3", pages = "3--12", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047866", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software product lines (SPLs) and adaptive systems aim at variability to cope with changing requirements. Variability can be described in terms of features, which are central for development and configuration of SPLs. In traditional SPLs, features are bound statically before runtime. By contrast, adaptive systems support feature binding at runtime and are sometimes called dynamic SPLs (DSPLs). DSPLs are usually built from coarse-grained components, which reduces the number of possible application scenarios. To overcome this limitation, we closely integrate static binding of traditional SPLs and runtime adaptation of DSPLs. We achieve this integration by statically generating a tailor-made DSPL from a highly customizable SPL. The generated DSPL provides only the runtime variability required by a particular application scenario and the execution environment. The DSPL supports self-configuration based on coarse-grained modules. We provide a feature-based adaptation mechanism that reduces the effort of computing an optimal configuration at runtime. In a case study, we demonstrate the practicability of our approach and show that a seamless integration of static binding and runtime adaptation reduces the complexity of the adaptation process.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Batory:2012:FIP, author = "Don Batory and Peter H{\"o}fner and Jongwook Kim", title = "Feature interactions, products, and composition", journal = j-SIGPLAN, volume = "47", number = "3", pages = "13--22", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047867", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The relationship between feature modules and feature interactions is not well-understood. To explain classic examples of feature interaction, we show that features are not only composed sequentially, but also by cross-product and interaction operations that heretofore were implicit in the literature. Using the Colored IDE (CIDE) tool as our starting point, we (a) present a formal model of these operations, (b) show how it connects and explains previously unrelated results in Feature Oriented Software Development (FOSD), and (c) describe a tool, based on our formalism, that demonstrates how changes in composed documents can be back-propagated to their original feature module definitions, thereby improving FOSD tooling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Ribeiro:2012:IFD, author = "M{\'a}rcio Ribeiro and Felipe Queiroz and Paulo Borba and T{\'a}rsis Tol{\^e}do and Claus Brabrand and S{\'e}rgio Soares", title = "On the impact of feature dependencies when maintaining preprocessor-based software product lines", journal = j-SIGPLAN, volume = "47", number = "3", pages = "23--32", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047868", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "During Software Product Line (SPL) maintenance tasks, Virtual Separation of Concerns (VSoC) allows the programmer to focus on one feature and hide the others. However, since features depend on each other through variables and control-flow, feature modularization is compromised since the maintenance of one feature may break another. In this context, emergent interfaces can capture dependencies between the feature we are maintaining and the others, making developers aware of dependencies. To better understand the impact of code level feature dependencies during SPL maintenance, we have investigated the following two questions: how often methods with preprocessor directives contain feature dependencies? How feature dependencies impact maintenance effort when using VSoC and emergent interfaces? Answering the former is important for assessing how often we may face feature dependency problems. Answering the latter is important to better understand to what extent emergent interfaces complement VSoC during maintenance tasks. To answer them, we analyze 43 SPLs of different domains, size, and languages. The data we collect from them complement previous work on preprocessor usage. They reveal that the feature dependencies we consider in this paper are reasonably common in practice; and that emergent interfaces can reduce maintenance effort during the SPL maintenance tasks we regard here.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Neves:2012:ISE, author = "La{\'\i}s Neves and Leopoldo Teixeira and Dem{\'o}stenes Sena and Vander Alves and Uir{\'a} Kulezsa and Paulo Borba", title = "Investigating the safe evolution of software product lines", journal = j-SIGPLAN, volume = "47", number = "3", pages = "33--42", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047869", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The adoption of a product line strategy can bring significant productivity and time to market improvements. However, evolving a product line is risky because it might impact many products and their users. So when evolving a product line to introduce new features or to improve its design, it is important to make sure that the behavior of existing products is not affected. In fact, to preserve the behavior of existing products one usually has to analyze different artifacts, like feature models, configuration knowledge and the product line core assets. To better understand this process, in this paper we discover and analyze concrete product line evolution scenarios and, based on the results of this study, we describe a number of safe evolution templates that developers can use when working with product lines. For each template, we show examples of their use in existing product lines. We evaluate the templates by also analyzing the evolution history of two different product lines and demonstrating that they can express the corresponding modifications and then help to avoid the mistakes that we identified during our analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Hannousse:2012:SAA, author = "Abdelhakim Hannousse and R{\'e}mi Douence and Gilles Ardourel", title = "Static analysis of aspect interaction and composition in component models", journal = j-SIGPLAN, volume = "47", number = "3", pages = "43--52", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047871", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Component based software engineering and aspect orientation are claimed to be two complementary approaches. While the former ensures the modularity and the reusability of software entities, the latter enables the modularity of crosscutting concerns that cannot be modularized as regular components. Nowadays, several approaches and frameworks are dedicated to integrate aspects into component models. However, when several aspects are woven, aspects may interact with each other which often results in undesirable behavior. The contribution of this paper is twofold. First, we show how aspectized component models can be formally modeled in UPPAAL model checker in order to detect negative interactions (a.k.a., interferences) among aspects. Second, we provide an extendible catalog of composition operators used for aspect composition. We illustrate our general approach with an airport Internet service example.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Otte:2012:ICB, author = "William R. Otte and Aniruddha Gokhale and Douglas C. Schmidt and Johnny Willemsen", title = "Infrastructure for component-based {DDS} application development", journal = j-SIGPLAN, volume = "47", number = "3", pages = "53--62", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047872", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Enterprise distributed real-time and embedded (DRE) systems are increasingly being developed with the use of component-based software techniques. Unfortunately, commonly used component middleware platforms provide limited support for event-based publish/subscribe (pub/sub) mechanisms that meet both quality-of-service (QoS) and configurability requirements of DRE systems. On the other hand, although pub/sub technologies, such as OMG Data Distribution Service (DDS), support a wide range of QoS settings, the level of abstraction they provide make it hard to configure them due to the significant source-level configuration that must be hard-coded at compile time or tailored at run-time using proprietary, ad hoc configuration logic. Moreover, developers of applications using native pub/sub technologies must write large amounts of boilerplate ``glue'' code to support run-time configuration of QoS properties, which is tedious and error-prone. This paper describes a novel, generative approach that combines the strengths of QoS-enabled pub/sub middleware with component-based middleware technologies. In particular, this paper describes the design and implementation of DDS4CIAO which addresses a number of inherent and accidental complexities in the DDS4CCM standard. DDS4CIAO simplifies the development, deployment, and configuration of component-based DRE systems that leverage DDS's powerful QoS capabilities by provisioning DDS QoS policy settings and simplifying the development of DDS applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Li:2012:GGP, author = "Yulin Li and Gordon S. {Novak, Jr.}", title = "Generation of geometric programs specified by diagrams", journal = j-SIGPLAN, volume = "47", number = "3", pages = "63--72", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047874", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The GeoGram system [21] generates programs for geometric computations by combining generic software components as specified by diagrams constructed using a graphical interface. The user specifies known and desired quantities. As diagrams are constructed, the system maintains symbolic geometric facts describing the construction. Inferences based on the diagram are used to derive new facts and to introduce new objects based on geometric reasoning, to filter choices presented to the user, to interpret the user's intention in ambiguous cases, to detect over-specification, and to generate the program. A knowledge base of descriptions of generic software components is used to prove that features of the geometry can be computed from known values. These local proofs are combined to guide generation of a program that computes the desired values from inputs. The library of generic geometric program components is used to generate both in-line code and specialized subroutines; partial evaluation improves the efficiency of the generated code. The resulting program is automatically translated into the desired language. The program can also be run interactively to simulate the geometry by generating graphical traces on the diagram as input quantities are varied.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Steck:2012:MDE, author = "Andreas Steck and Alex Lotz and Christian Schlegel", title = "Model-driven engineering and run-time model-usage in service robotics", journal = j-SIGPLAN, volume = "47", number = "3", pages = "73--82", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047875", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The development of service robots has gained more and more attention over the last years. A major challenge on the way towards industrial-strength service robotic systems is to make the step from code-driven to model-driven engineering. In this work we propose to put models into the focus of the whole life-cycle of robotic systems covering design-time as well as run-time. We describe how to explicate parameters, properties and resource information in the models at design-time and how to take these information into account by the run-time system of the robot to support its decision making process. We underpin our work by an exhaustive real-world example which is completely developed with our tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Vermolen:2012:GDM, author = "Sander Dani{\"e}l Vermolen and Guido Wachsmuth and Eelco Visser", title = "Generating database migrations for evolving {Web} applications", journal = j-SIGPLAN, volume = "47", number = "3", pages = "83--92", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047876", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "WebDSL is a domain-specific language for the implementation of dynamic web applications with a rich data model. It provides developers with object-oriented data modeling concepts but abstracts over implementation details for persisting application data in relational databases. When the underlying data model of an application evolves, persisted application data has to be migrated. While implementing migration at the database level breaks the abstractions provided by WebDSL, an implementation at the data model level requires to intermingle migration with application code. In this paper, we present a domain-specific language for the coupled evolution of data models and application data. It allows to specify data model evolution as a separate concern at the data model level and can be compiled to migration code at the database level. Its linguistic integration with WebDSL enables static checks for evolution validity and correctness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Danvy:2012:PFS, author = "Olivier Danvy", title = "Pragmatics for formal semantics", journal = j-SIGPLAN, volume = "47", number = "3", pages = "93--94", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047878", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This tech talk describes how to write and how to inter-derive formal semantics for sequential programming languages. The progress reported here is (1) concrete guidelines to write each formal semantics to alleviate their proof obligations, and (2) simple calculational tools to obtain a formal semantics from another.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Shubert:2012:AMB, author = "Gary J. Shubert", title = "Application of model based development to flexible code generation", journal = j-SIGPLAN, volume = "47", number = "3", pages = "95--96", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047880", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This address will present the authors views and perspectives on the past, present and future use of model based development techniques to enable the automated generation of source code and other forms of programming. This address will discuss past and present use of model based development and automated code generation at Lockheed Martin, with special emphasis on NASA's Orion Multi-Purpose Crew Vehicle Program. This address will discuss the advantages and disadvantages, associated with the current state of the practice techniques and tools, used to automatically generate source code from general purpose and domain specific models. This address will discuss the obstacles and enablers, associated with achieving the desired future state of complete and efficient automated generation of programming through transformation of general purpose and domain specific models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Asai:2012:RDS, author = "Kenichi Asai", title = "Reflection in direct style", journal = j-SIGPLAN, volume = "47", number = "3", pages = "97--106", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047882", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A reflective language enables us to access, inspect, and/or modify the language semantics from within the same language framework. Although the degree of semantics exposure differs from one language to another, the most powerful approach, referred to as the behavioral reflection, exposes the entire language semantics (or the language interpreter) that defines behavior of user programs for user inspection/modification. In this paper, we deal with the behavioral reflection in the context of a functional language Scheme. In particular, we show how to construct a reflective interpreter where user programs are interpreted by the tower of metacircular interpreters and have the ability to change any parts of the interpreters during execution. Its distinctive feature compared to the previous work is that the metalevel interpreters observed by users are written in direct style. Based on the past attempt of the present author, the current work solves the level-shifting anomaly by defunctionalizing and inspecting the top of the continuation frames. The resulting system enables us to freely go up and down the levels and access/modify the direct-style metalevel interpreter. This is in contrast to the previous system where metalevel interpreters were written in continuation-passing style (CPS) and only CPS functions could be exposed to users for modification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Nystrom:2012:FRT, author = "Nathaniel Nystrom and Derek White and Kishen Das", title = "{Firepile}: run-time compilation for {GPUs} in {Scala}", journal = j-SIGPLAN, volume = "47", number = "3", pages = "107--116", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047883", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent advances have enabled GPUs to be used as general-purpose parallel processors on commodity hardware for little cost. However, the ability to program these devices has not kept up with their performance. The programming model for GPUs has a number of restrictions that make it difficult to program. For example, software running on the GPU cannot perform dynamic memory allocation, requiring the programmer to pre-allocate all memory the GPU might use. To achieve good performance, GPU programmers must also be aware of how data is moved between host and GPU memory and between the different levels of the GPU memory hierarchy. We describe Firepile, a library for GPU programming in Scala. The library enables a subset of Scala to be executed on the GPU. Code trees can be created from run-time function values, which can then be analyzed and transformed to generate GPU code. A key property of this mechanism is that it is modular: unlike with other meta-programming constructs, the use of code trees need not be exposed in the library interface. Code trees are general and can be used by library writers in other application domains. Our experiments show Firepile users can achieve performance comparable to C code targeted to the GPU with shorter, simpler, and easier-to-understand code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Esmaeilsabzali:2012:MAC, author = "Shahram Esmaeilsabzali and Bernd Fischer and Joanne M. Atlee", title = "Monitoring aspects for the customization of automatically generated code for big-step models", journal = j-SIGPLAN, volume = "47", number = "3", pages = "117--126", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047884", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The output of a code generator is assumed to be correct and not usually intended to be read or modified; yet programmers are often interested in this, e.g., to monitor a system property. Here, we consider code customization for a family of code generators associated with big-step executable modelling languages (e.g., statecharts). We introduce a customization language that allows us to express customization scenarios for the generated code independently of a specific big-step execution semantics. These customization scenarios are all different forms of runtime monitors, which lend themselves to a principled, uniform implementation for observation and code extension. A monitor is given in terms of the enabledness and execution of the transitions of a model and a reachability relation between two states of the execution of the model during a big step. For each monitor, we generate the aspect code that is incorporated into the output of a code generator to implement the monitor at the generated-code level. Thus, we provide means for code analysis through using the vocabulary of a model, rather than the detail of the generated code. Our technique not only requires the code generators to reveal only limited information about their code generation mechanisms, but also keeps the structure of the generated code intact. We demonstrate how various useful properties of a model, or a language, can be checked using our monitors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Lindeman:2012:DDD, author = "Ricky T. Lindeman and Lennart C. L. Kats and Eelco Visser", title = "Declaratively defining domain-specific language debuggers", journal = j-SIGPLAN, volume = "47", number = "3", pages = "127--136", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047885", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tool support is vital to the effectiveness of domain-specific languages. With language workbenches, domain-specific languages and their tool support can be generated from a combined, high-level specification. This paper shows how such a specification can be extended to describe a debugger for a language. To realize this, we introduce a meta-language for coordinating the debugger that abstracts over the complexity of writing a debugger by hand. We describe the implementation of a language-parametric infrastructure for debuggers that can be instantiated based on this specification. The approach is implemented in the Spoofax language workbench and validated through realistic case studies with the Stratego transformation language and the WebDSL web programming language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Arnoldus:2012:LMU, author = "B. J. Arnoldus and M. G. J. van den Brand and A. Serebrenik", title = "Less is more: unparser-completeness of metalanguages for template engines", journal = j-SIGPLAN, volume = "47", number = "3", pages = "137--146", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047887", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A code generator is a program translating an input model into code. In this paper we focus on template-based code generators in the context of the model view controller architecture (MVC). The language in which the code generator is written is known as a metalanguage in the code generation parlance. The metalanguage should be, on the one side, expressive enough to be of practical value, and, on the other side, restricted enough to enforce the separation between the view and the model, according to the MVC. In this paper we advocate the notion of unparser-complete metalanguages as providing the right level of expressivity. An unparser-complete metalanguage is capable of expressing an unparser, a code generator that translates any legal abstract syntax tree into an equivalent sentence of the corresponding context-free language. A metalanguage not able to express an unparser will fail to produce all sentences belonging to the corresponding context-free language. A metalanguage able to express more than an unparser will also be able to implement code violating the model/view separation. We further show that a metalanguage with the power of a linear deterministic tree-to-string transducer is unparser-complete. Moreover, this metalanguage has been successfully applied in a non-trivial case study where an existing code generator is refactored using templates.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Slaatten:2012:TAG, author = "Vidar Sl{\aa}tten and Frank Alexander Kraemer and Peter Herrmann", title = "Towards automatic generation of formal specifications to validate and verify reliable distributed systems: a method exemplified by an industrial case study", journal = j-SIGPLAN, volume = "47", number = "3", pages = "147--156", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047888", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The validation and verification of reliable systems is a difficult and complex task, mainly for two reasons: First, it is difficult to precisely state which formal properties a system needs to fulfil to be of high quality. Second, it is complex to automatically verify such properties, due to the size of the analysis state space which grows exponentially with the number of components. We tackle these problems by a tool-supported method which embeds application functionality in building blocks that use UML activities to describe their internal behaviour. To describe their externally visible behaviour, we use a combination of complementary interface contracts, so-called ESMs and EESMs. In this paper, we present an extension of the interface contracts, External Reliability Contracts (ERCs), that capture failure behaviour. This separation of different behavioural aspects in separate descriptions facilitates a two-step analysis, in which the first step is completely automated and the second step is facilitated by an automatic translation of the models to the input syntax of the model checker TLC. Further, the cascade of contracts is used to separate the work of domain and reliability experts. The concepts are proposed with the background of a real industry case, and we demonstrate how the use of interface contracts leads to significantly smaller state spaces in the analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Sobernig:2012:CCA, author = "Stefan Sobernig and Patrick Gaubatz and Mark Strembeck and Uwe Zdun", title = "Comparing complexity of {API} designs: an exploratory experiment on {DSL}-based framework integration", journal = j-SIGPLAN, volume = "47", number = "3", pages = "157--166", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047890", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded, textual DSLs are often provided as an API wrapped around object-oriented application frameworks to ease framework integration. While literature presents claims that DSL-based application development is beneficial, empirical evidence for this is rare. We present the results of an experiment comparing the complexity of three different object-oriented framework APIs and an embedded, textual DSL. For this comparative experiment, we implemented the same, non-trivial application scenario using these four different APIs. Then, we performed an Object-Points (OP) analysis, yielding indicators for the API complexity specific to each API variant. The main observation for our experiment is that the embedded, textual DSL incurs the smallest API complexity. Although the results are exploratory, as well as limited to the given application scenario and a single embedded DSL, our findings can direct future empirical work. The experiment design is applicable for similar API design evaluations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Erdweg:2012:GLE, author = "Sebastian Erdweg and Lennart C. L. Kats and Tillmann Rendel and Christian K{\"a}stner and Klaus Ostermann and Eelco Visser", title = "Growing a language environment with editor libraries", journal = j-SIGPLAN, volume = "47", number = "3", pages = "167--176", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047891", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large software projects consist of code written in a multitude of different (possibly domain-specific) languages, which are often deeply interspersed even in single files. While many proposals exist on how to integrate languages semantically and syntactically, the question of how to support this scenario in integrated development environments (IDEs) remains open: How can standard IDE services, such as syntax highlighting, outlining, or reference resolving, be provided in an extensible and compositional way, such that an open mix of languages is supported in a single file? Based on our library-based syntactic extension language for Java, SugarJ, we propose to make IDEs extensible by organizing editor services in editor libraries. Editor libraries are libraries written in the object language, SugarJ, and hence activated and composed through regular import statements on a file-by-file basis. We have implemented an IDE for editor libraries on top of SugarJ and the Eclipse-based Spoofax language workbench. We have validated editor libraries by evolving this IDE into a fully-fledged and schema-aware XML editor as well as an extensible Latex editor, which we used for writing this paper.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Freeman:2012:HPH, author = "John Freeman and Jaakko J{\"a}rvi and Wonseok Kim and Mat Marcus and Sean Parent", title = "Helping programmers help users", journal = j-SIGPLAN, volume = "47", number = "3", pages = "177--184", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047892", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "User interfaces exhibit a wide range of features that are designed to assist users. Interaction with one widget may trigger value changes, disabling, or other behaviors in other widgets. Such automatic behavior may be confusing or disruptive to users. Research literature on user interfaces offers a number of solutions, including interface features for explaining or controlling these behaviors. To help programmers help users, the implementation costs of these features need to be much lower. Ideally, they could be generated for free. This paper shows how several help and control mechanisms can be implemented as algorithms and reused across interfaces, making the cost of their adoption negligible. Specifically, we describe generic help mechanisms for visualizing data flow and explaining command deactivation, and a mechanism for controlling the flow of data. A reusable implementation of these features is enabled by our property model framework, where the data manipulated through a user interface is modeled as a constraint system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Launchbury:2012:TBC, author = "John Launchbury", title = "Theorem-based circuit derivation in {Cryptol}", journal = j-SIGPLAN, volume = "47", number = "3", pages = "185--186", month = mar, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2189751.2047894", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:00 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Even though step-by-step refinement has long been seen as desirable, it is hard to find compelling industrial applications of the technique. In theory, transforming a high-level specification into a high-performance implementation is an ideal means of producing a correct design, but in practice it is hard to make it work, and even harder to make it worthwhile. This talk describes an exception. We introduce the domain-specific language, Cryptol, and work up to a design experience in which theorem-based refinement played a crucial role in producing an industrial quality FPGA encryptor and decryptor for AES. Quite simply, we are unlikely to have succeeded without the technique. The Cryptol specification language was designed by Galois for the NSA as a public standard for specifying cryptographic algorithms. A Cryptol reference specification can serve as the formal documentation for a cryptographic module, eliminating the need for separate and voluminous English descriptions. Cryptol is fully executable, allowing designers to experiment with their programs incrementally as their designs evolve. Cryptol compilers can generate C, C++, and Haskell software implementations, and VHDL or Verilog HDL hardware implementations. These generators can significantly reduce overall life-cycle costs of cryptographic solutions. For example, Cryptol allows engineers and mathematicians to program cryptographic algorithms on FPGAs as if they were writing software. The design experience we describe runs as follows: we begin with a specification for AES written in Cryptol, and over a series of five design stages we produce an industrial grade encrypt core. In each stage, we state theorems which relate the component behaviors in one stage with the corresponding behaviors in the refinement. The resulting cores, running at 350Mhz-440Mhz depending on the FPGA part, bear little relationship to the original, except that the step-by-step theorems ensured we had not gone astray. We then repeat the pattern in generating a circuit for AES decrypt. While there are many similarities between encrypt and decrypt in AES, there are some crucial differences with regard to high performance. First concerns the generation of key material. The AES key is used as a seed for a specific pseudo-random number generator which produces key material for use in each of the AES rounds. For encrypt, the key-generator runs in sync with the action of encryption, so may be scheduled alongside it. For decrypt, they run counter to one-another, creating a major challenge to be overcome. Second, the generated key material has an additional transformation applied to it, which occurs deep in the middle of the high performing core. Using theorems as stepping stones along the way, we redesign the key expansion algorithm so that it will run in sync with the decryption. We then trace parallel steps to the derivation of encrypt, establishing a series of commuting diagrams along the way. Whenever we confronted bugs in the development process, we produced many theorems to isolate the bugs, using theorems as a principled kind of printf. When the bugs were found and eradicated, we elided many of the temporary theorems, leaving behind those that provided important insights into the behavior of the code. This talk is a story of the journey with demonstrations of the tool at work. Its ultimate message is to highlight the value of including a theorem facility within purely functional domain-specific languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GCPE '11 conference proceedings.", } @Article{Larus:2012:CWC, author = "James R. Larus", title = "The cloud will change everything", journal = j-SIGPLAN, volume = "47", number = "4", pages = "1--2", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud computing is fast on its way to becoming a meaningless, oversold marketing slogan. In the midst of this hype, it is easy to overlook the fundamental change that is occurring. Computation, which used to be confined to the machine beside your desk, is increasingly centralized in vast shared facilities and at the same time liberated by battery-powered, wireless devices. Performance, security, and reliability are no longer problems that can be considered in isolation --- the wires and software connecting pieces offer more challenges and opportunities than components themselves. The eXtreme Computing Group (XCG) in Microsoft Research is taking a holistic approach to research in this area, by bring together researchers and developers with expertise in data center design, computer architecture, operating systems, computer security, programming language, mobile computation, and user interfaces to tackle the challenges of cloud computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Yuan:2012:ISD, author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan Zhou and Stefan Savage", title = "Improving software diagnosability via log enhancement", journal = j-SIGPLAN, volume = "47", number = "4", pages = "3--14", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Diagnosing software failures in the field is notoriously difficult, in part due to the fundamental complexity of trouble-shooting any complex software system, but further exacerbated by the paucity of information that is typically available in the production setting. Indeed, for reasons of both overhead and privacy, it is common that only the run-time log generated by a system (e.g., syslog) can be shared with the developers. Unfortunately, the ad-hoc nature of such reports are frequently insufficient for detailed failure diagnosis. This paper seeks to improve this situation within the rubric of existing practice. We describe a tool, LogEnhancer that automatically ``enhances'' existing logging code to aid in future post-failure debugging. We evaluate LogEnhancer on eight large, real-world applications and demonstrate that it can dramatically reduce the set of potential root failure causes that must be considered during diagnosis while imposing negligible overheads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Veeraraghavan:2012:DPS, author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin Wester and Jessica Ouyang and Peter M. Chen and Jason Flinn and Satish Narayanasamy", title = "{DoublePlay}: parallelizing sequential logging and replay", journal = j-SIGPLAN, volume = "47", number = "4", pages = "15--26", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deterministic replay systems record and reproduce the execution of a hardware or software system. In contrast to replaying execution on uniprocessors, deterministic replay on multiprocessors is very challenging to implement efficiently because of the need to reproduce the order or values read by shared memory operations performed by multiple threads. In this paper, we present DoublePlay, a new way to efficiently guarantee replay on commodity multiprocessors. Our key insight is that one can use the simpler and faster mechanisms of single-processor record and replay, yet still achieve the scalability offered by multiple cores, by using an additional execution to parallelize the record and replay of an application. DoublePlay timeslices multiple threads on a single processor, then runs multiple time intervals (epochs) of the program concurrently on separate processors. This strategy, which we call uniparallelism, makes logging much easier because each epoch runs on a single processor (so threads in an epoch never simultaneously access the same memory) and different epochs operate on different copies of the memory. Thus, rather than logging the order of shared-memory accesses, we need only log the order in which threads in an epoch are timesliced on the processor. DoublePlay runs an additional execution of the program on multiple processors to generate checkpoints so that epochs run in parallel. We evaluate DoublePlay on a variety of client, server, and scientific parallel benchmarks; with spare cores, DoublePlay reduces logging overhead to an average of 15\% with two worker threads and 28\% with four threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Casper:2012:HAT, author = "Jared Casper and Tayo Oguntebi and Sungpack Hong and Nathan G. Bronson and Christos Kozyrakis and Kunle Olukotun", title = "Hardware acceleration of transactional memory on commodity systems", journal = j-SIGPLAN, volume = "47", number = "4", pages = "27--38", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The adoption of transactional memory is hindered by the high overhead of software transactional memory and the intrusive design changes required by previously proposed TM hardware. We propose that hardware to accelerate software transactional memory (STM) can reside outside an unmodified commodity processor core, thereby substantially reducing implementation costs. This paper introduces Transactional Memory Acceleration using Commodity Cores (TMACC), a hardware-accelerated TM system that does not modify the processor, caches, or coherence protocol. We present a complete hardware implementation of TMACC using a rapid prototyping platform. Using this hardware, we implement two unique conflict detection schemes which are accelerated using Bloom filters on an FPGA. These schemes employ novel techniques for tolerating the latency of fine-grained asynchronous communication with an out-of-core accelerator. We then conduct experiments to explore the feasibility of accelerating TM without modifying existing system hardware. We show that, for all but short transactions, it is not necessary to modify the processor to obtain substantial improvement in TM performance. In these cases, TMACC outperforms an STM by an average of 69\% in applications using moderate-length transactions, showing maximum speedup within 8\% of an upper bound on TM acceleration. Overall, we demonstrate that hardware can substantially accelerate the performance of an STM on unmodified commodity processors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Dalessandro:2012:HNC, author = "Luke Dalessandro and Fran{\c{c}}ois Carouge and Sean White and Yossi Lev and Mark Moir and Michael L. Scott and Michael F. Spear", title = "Hybrid {NOrec}: a case study in the effectiveness of best effort hardware transactional memory", journal = j-SIGPLAN, volume = "47", number = "4", pages = "39--52", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional memory (TM) is a promising synchronization mechanism for the next generation of multicore processors. Best-effort Hardware Transactional Memory (HTM) designs, such as Sun's prototype Rock processor and AMD's proposed Advanced Synchronization Facility (ASF), can efficiently execute many transactions, but abort in some cases due to various limitations. Hybrid TM systems can use a compatible software TM (STM) in such cases. We introduce a family of hybrid TMs built using the recent NOrec STM algorithm that, unlike existing hybrid approaches, provide both low overhead on hardware transactions and concurrent execution of hardware and software transactions. We evaluate implementations for Rock and ASF, exploring how the differing HTM designs affect optimization choices. Our investigation yields valuable input for designers of future best-effort HTMs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Singh:2012:EPS, author = "Abhayendra Singh and Daniel Marino and Satish Narayanasamy and Todd Millstein and Madan Musuvathi", title = "Efficient processor support for {DRFx}, a memory model with exceptions", journal = j-SIGPLAN, volume = "47", number = "4", pages = "53--66", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950375", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A longstanding challenge of shared-memory concurrency is to provide a memory model that allows for efficient implementation while providing strong and simple guarantees to programmers. The C++0x and Java memory models admit a wide variety of compiler and hardware optimizations and provide sequentially consistent (SC) semantics for data-race-free programs. However, they either do not provide any semantics (C++0x) or provide a hard-to-understand semantics (Java) for racy programs, compromising the safety and debuggability of such programs. In earlier work we proposed the DRFx memory model, which addresses this problem by dynamically detecting potential violations of SC due to the interaction of compiler or hardware optimizations with data races and halting execution upon detection. In this paper, we present a detailed micro-architecture design for supporting the DRFx memory model, formalize the design and prove its correctness, and evaluate the design using a hardware simulator. We describe a set of DRFx-compliant complexity-effective optimizations which allow us to attain performance close to that of TSO (Total Store Model) and DRF0 while providing strong guarantees for all programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Devietti:2012:RRC, author = "Joseph Devietti and Jacob Nelson and Tom Bergan and Luis Ceze and Dan Grossman", title = "{RCDC}: a relaxed consistency deterministic computer", journal = j-SIGPLAN, volume = "47", number = "4", pages = "67--78", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950376", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Providing deterministic execution significantly simplifies the debugging, testing, replication, and deployment of multithreaded programs. Recent work has developed deterministic multiprocessor architectures as well as compiler and runtime systems that enforce determinism in current hardware. Such work has incidentally imposed strong memory-ordering properties. Historically, memory ordering has been relaxed in favor of higher performance in shared memory multiprocessors and, interestingly, determinism exacerbates the cost of strong memory ordering. Consequently, we argue that relaxed memory ordering is vital to achieving faster deterministic execution. This paper introduces RCDC, a deterministic multiprocessor architecture that takes advantage of relaxed memory orderings to provide high-performance deterministic execution with low hardware complexity. RCDC has two key innovations: a hybrid HW/SW approach to enforcing determinism; and a new deterministic execution strategy that leverages data-race-free-based memory models (e.g., the models for Java and C++) to improve performance and scalability without sacrificing determinism, even in the presence of races. In our hybrid HW/SW approach, the only hardware mechanisms required are software-controlled store buffering and support for precise instruction counting; we do not require speculation. A runtime system uses these mechanisms to enforce determinism for arbitrary programs. We evaluate RCDC using PARSEC benchmarks and show that relaxing memory ordering leads to performance and scalability close to nondeterministic execution without requiring any form of speculation. We also compare our new execution strategy to one based on TSO (total-store-ordering) and show that some applications benefit significantly from the extra relaxation. We also evaluate a software-only implementation of our new deterministic execution strategy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Burnim:2012:SCS, author = "Jacob Burnim and George Necula and Koushik Sen", title = "Specifying and checking semantic atomicity for multithreaded programs", journal = j-SIGPLAN, volume = "47", number = "4", pages = "79--90", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950377", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In practice, it is quite difficult to write correct multithreaded programs due to the potential for unintended and nondeterministic interference between parallel threads. A fundamental correctness property for such programs is atomicity---a block of code in a program is atomic if, for any parallel execution of the program, there is an execution with the same overall program behavior in which the block is executed serially. We propose semantic atomicity, a generalization of atomicity with respect to a programmer-defined notion of equivalent behavior. We propose an assertion framework in which a programmer can use bridge predicates to specify noninterference properties at the level of abstraction of their application. Further, we propose a novel algorithm for systematically testing atomicity specifications on parallel executions with a bounded number of interruptions---i.e. atomic blocks whose execution is interleaved with that of other threads. We further propose a set of sound heuristics and optional user annotations that increase the efficiency of checking atomicity specifications in the common case where the specifications hold. We have implemented our assertion framework for specifying and checking semantic atomicity for parallel Java programs, and we have written semantic atomicity specifications for a number of benchmarks. We found that using bridge predicates allowed us to specify the natural and intended atomic behavior of a wider range of programs than did previous approaches. Further, in checking our specifications, we found several previously unknown bugs, including in the widely-used java.util.concurrent library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Volos:2012:MLP, author = "Haris Volos and Andres Jaan Tack and Michael M. Swift", title = "{Mnemosyne}: lightweight persistent memory", journal = j-SIGPLAN, volume = "47", number = "4", pages = "91--104", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950379", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "New storage-class memory (SCM) technologies, such as phase-change memory, STT-RAM, and memristors, promise user-level access to non-volatile storage through regular memory instructions. These memory devices enable fast user-mode access to persistence, allowing regular in-memory data structures to survive system crashes. In this paper, we present Mnemosyne, a simple interface for programming with persistent memory. Mnemosyne addresses two challenges: how to create and manage such memory, and how to ensure consistency in the presence of failures. Without additional mechanisms, a system failure may leave data structures in SCM in an invalid state, crashing the program the next time it starts. In Mnemosyne, programmers declare global persistent data with the keyword ``pstatic'' or allocate it dynamically. Mnemosyne provides primitives for directly modifying persistent variables and supports consistent updates through a lightweight transaction mechanism. Compared to past work on disk-based persistent memory, Mnemosyne reduces latency to storage by writing data directly to memory at the granularity of an update rather than writing memory pages back to disk through the file system. In tests emulating the performance characteristics of forthcoming SCMs, we show that Mnemosyne can persist data as fast as 3 microseconds. Furthermore, it provides a 35 percent performance increase when applied in the OpenLDAP directory server. In microbenchmark studies we find that Mnemosyne can be up to 1400\% faster than alternative persistence strategies, such as Berkeley DB or Boost serialization, that are designed for disks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Coburn:2012:NHM, author = "Joel Coburn and Adrian M. Caulfield and Ameen Akel and Laura M. Grupp and Rajesh K. Gupta and Ranjit Jhala and Steven Swanson", title = "{NV-Heaps}: making persistent objects fast and safe with next-generation, non-volatile memories", journal = j-SIGPLAN, volume = "47", number = "4", pages = "105--118", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950380", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Persistent, user-defined objects present an attractive abstraction for working with non-volatile program state. However, the slow speed of persistent storage (i.e., disk) has restricted their design and limited their performance. Fast, byte-addressable, non-volatile technologies, such as phase change memory, will remove this constraint and allow programmers to build high-performance, persistent data structures in non-volatile storage that is almost as fast as DRAM. Creating these data structures requires a system that is lightweight enough to expose the performance of the underlying memories but also ensures safety in the presence of application and system failures by avoiding familiar bugs such as dangling pointers, multiple free()s, and locking errors. In addition, the system must prevent new types of hard-to-find pointer safety bugs that only arise with persistent objects. These bugs are especially dangerous since any corruption they cause will be permanent. We have implemented a lightweight, high-performance persistent object system called NV-heaps that provides transactional semantics while preventing these errors and providing a model for persistence that is easy to use and reason about. We implement search trees, hash tables, sparse graphs, and arrays using NV-heaps, BerkeleyDB, and Stasis. Our results show that NV-heap performance scales with thread count and that data structures implemented using NV-heaps out-perform BerkeleyDB and Stasis implementations by 32x and 244x, respectively, by avoiding the operating system and minimizing other software overheads. We also quantify the cost of enforcing the safety guarantees that NV-heaps provide and measure the costs of NV-heap primitive operations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Schupbach:2012:DLA, author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy Roscoe and Simon Peter", title = "A declarative language approach to device configuration", journal = j-SIGPLAN, volume = "47", number = "4", pages = "119--132", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950382", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "C remains the language of choice for hardware programming (device drivers, bus configuration, etc.): it is fast, allows low-level access, and is trusted by OS developers. However, the algorithms required to configure and reconfigure hardware devices and interconnects are becoming more complex and diverse, with the added burden of legacy support, quirks, and hardware bugs to work around. Even programming PCI bridges in a modern PC is a surprisingly complex problem, and is getting worse as new functionality such as hotplug appears. Existing approaches use relatively simple algorithms, hard-coded in C and closely coupled with low-level register access code, generally leading to suboptimal configurations. We investigate the merits and drawbacks of a new approach: separating hardware configuration logic (algorithms to determine configuration parameter values) from mechanism (programming device registers). The latter we keep in C, and the former we encode in a declarative programming language with constraint-satisfaction extensions. As a test case, we have implemented full PCI configuration, resource allocation, and interrupt assignment in the Barrelfish research operating system, using a concise expression of efficient algorithms in constraint logic programming. We show that the approach is tractable, and can successfully configure a wide range of PCs with competitive runtime cost. Moreover, it requires about half the code of the C-based approach in Linux while offering considerably more functionality. Additionally it easily accommodates adaptations such as hotplug, fixed regions, and quirks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Ryzhyk:2012:IDD, author = "Leonid Ryzhyk and John Keys and Balachandra Mirla and Arun Raghunath and Mona Vij and Gernot Heiser", title = "Improved device driver reliability through hardware verification reuse", journal = j-SIGPLAN, volume = "47", number = "4", pages = "133--144", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950383", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Faulty device drivers are a major source of operating system failures. We argue that the underlying cause of many driver faults is the separation of two highly-related tasks: device verification and driver development. These two tasks have a lot in common, and result in software that is conceptually and functionally similar, yet kept totally separate. The result is a particularly bad case of duplication of effort: the verification code is correct, but is discarded after the device has been manufactured; the driver code is inferior, but used in actual device operation. We claim that the two tasks, and the software they produce, can and should be unified, and this will result in drastic improvement of device-driver quality and reduction in the development cost and time to market. In this paper we propose a device driver design and verification workflow that achieves such unification. We apply this workflow to develop and test drivers for four different I/O devices and demonstrate that it improves the driver test coverage and allows detecting driver defects that are extremely hard to find using conventional testing techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Hashmi:2012:CNI, author = "Atif Hashmi and Andrew Nere and James Jamal Thomas and Mikko Lipasti", title = "A case for neuromorphic {ISAs}", journal = j-SIGPLAN, volume = "47", number = "4", pages = "145--158", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950385", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The desire to create novel computing systems, paired with recent advances in neuroscientific understanding of the brain, has led researchers to develop neuromorphic architectures that emulate the brain. To date, such models are developed, trained, and deployed on the same substrate. However, excessive co-dependence between the substrate and the algorithm prevents portability, or at the very least requires reconstructing and retraining the model whenever the substrate changes. This paper proposes a well-defined abstraction layer --- the Neuromorphic instruction set architecture, or NISA --- that separates a neural application's algorithmic specification from the underlying execution substrate, and describes the Aivo framework, which demonstrates the concrete advantages of such an abstraction layer. Aivo consists of a NISA implementation for a rate-encoded neuromorphic system based on the cortical column abstraction, a state-of-the-art integrated development and runtime environment (IDE), and various profile-based optimization tools. Aivo's IDE generates code for emulating cortical networks on the host CPU, multiple GPGPUs, or as boolean functions. Its runtime system can deploy and adaptively optimize cortical networks in a manner similar to conventional just-in-time compilers in managed runtime systems (e.g. Java, C\#). We demonstrate the abilities of the NISA abstraction by constructing a cortical network model of the mammalian visual cortex, deploying on multiple execution substrates, and utilizing the various optimization tools we have created. For this hierarchical configuration, Aivo's profiling based network optimization tools reduce the memory footprint by 50\% and improve the execution time by a factor of 3x on the host CPU. Deploying the same network on a single GPGPU results in a 30x speedup. We further demonstrate that a speedup of 480x can be achieved by deploying a massively scaled cortical network across three GPGPUs. Finally, converting a trained hierarchical network to C/C++ boolean constructs on the host CPU results in 44x speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Ransford:2012:MSS, author = "Benjamin Ransford and Jacob Sorber and Kevin Fu", title = "{Mementos}: system support for long-running computation on {RFID}-scale devices", journal = j-SIGPLAN, volume = "47", number = "4", pages = "159--170", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950386", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transiently powered computing devices such as RFID tags, kinetic energy harvesters, and smart cards typically rely on programs that complete a task under tight time constraints before energy starvation leads to complete loss of volatile memory. Mementos is a software system that transforms general-purpose programs into interruptible computations that are protected from frequent power losses by automatic, energy-aware state checkpointing. Mementos comprises a collection of optimization passes for the LLVM compiler infrastructure and a linkable library that exercises hardware support for energy measurement while managing state checkpoints stored in nonvolatile memory. We evaluate Mementos against diverse test cases in a trace-driven simulator of transiently powered RFID-scale devices. Although Mementos's energy checks increase run time when energy is plentiful, they allow Mementos to safely suspend execution when energy dwindles, effectively spreading computation across zero or more power failures. This paper's contributions are: a study of the runtime environment for programs on RFID-scale devices; an energy-aware state checkpointing system for these devices that is implemented for the MSP430 family of microcontrollers; and a trace-driven simulator of transiently powered RFID-scale devices.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Koukoumidis:2012:PC, author = "Emmanouil Koukoumidis and Dimitrios Lymberopoulos and Karin Strauss and Jie Liu and Doug Burger", title = "Pocket cloudlets", journal = j-SIGPLAN, volume = "47", number = "4", pages = "171--184", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud services accessed through mobile devices suffer from high network access latencies and are constrained by energy budgets dictated by the devices' batteries. Radio and battery technologies will improve over time, but are still expected to be the bottlenecks in future systems. Non-volatile memories (NVM), however, may continue experiencing significant and steady improvements in density for at least ten more years. In this paper, we propose to leverage the abundance in memory capacity of mobile devices to mitigate latency and energy issues when accessing cloud services. We first analyze NVM technology scaling trends, and then propose a cloud service cache architecture that resides on the mobile device's NVM (pocket cloudlet). This architecture utilizes both individual user and community access models to maximize its hit rate, and subsequently reduce overall service latency and energy consumption. As a showcase we present the design, implementation and evaluation of PocketSearch, a search and advertisement pocket cloudlet. We perform mobile search characterization to guide the design of PocketSearch and evaluate it with 200 million mobile queries from the search logs of m.bing.com. We show that PocketSearch can serve, on average, 66\% of the web search queries submitted by an individual user without having to use the slow 3G link, leading to 16x service access speedup. Finally, based on experience with PocketSearch we provide additional insight and guidelines on how future pocket cloudlets should be organized, from both an architectural and an operating system perspective.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Sharma:2012:BMS, author = "Navin Sharma and Sean Barker and David Irwin and Prashant Shenoy", title = "{Blink}: managing server clusters on intermittent power", journal = j-SIGPLAN, volume = "47", number = "4", pages = "185--198", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950389", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reducing the energy footprint of data centers continues to receive significant attention due to both its financial and environmental impact. There are numerous methods that limit the impact of both factors, such as expanding the use of renewable energy or participating in automated demand-response programs. To take advantage of these methods, servers and applications must gracefully handle intermittent constraints in their power supply. In this paper, we propose blinking---metered transitions between a high-power active state and a low-power inactive state---as the primary abstraction for conforming to intermittent power constraints. We design Blink, an application-independent hardware-software platform for developing and evaluating blinking applications, and define multiple types of blinking policies. We then use Blink to design BlinkCache, a blinking version of memcached, to demonstrate the effect of blinking on an example application. Our results show that a load-proportional blinking policy combines the advantages of both activation and synchronous blinking for realistic Zipf-like popularity distributions and wind/solar power signals by achieving near optimal hit rates (within 15\% of an activation policy), while also providing fairer access to the cache (within 2\% of a synchronous policy) for equally popular objects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Hoffmann:2012:DKR, author = "Henry Hoffmann and Stelios Sidiroglou and Michael Carbin and Sasa Misailovic and Anant Agarwal and Martin Rinard", title = "Dynamic knobs for responsive power-aware computing", journal = j-SIGPLAN, volume = "47", number = "4", pages = "199--212", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950390", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present PowerDial, a system for dynamically adapting application behavior to execute successfully in the face of load and power fluctuations. PowerDial transforms static configuration parameters into dynamic knobs that the PowerDial control system can manipulate to dynamically trade off the accuracy of the computation in return for reductions in the computational resources that the application requires to produce its results. These reductions translate directly into performance improvements and power savings. Our experimental results show that PowerDial can enable our benchmark applications to execute responsively in the face of power caps that would otherwise significantly impair responsiveness. They also show that PowerDial can significantly reduce the number of machines required to service intermittent load spikes, enabling reductions in power and capital costs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Liu:2012:FSD, author = "Song Liu and Karthik Pattabiraman and Thomas Moscibroda and Benjamin G. Zorn", title = "{Flikker}: saving {DRAM} refresh-power through critical data partitioning", journal = j-SIGPLAN, volume = "47", number = "4", pages = "213--224", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950391", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy has become a first-class design constraint in computer systems. Memory is a significant contributor to total system power. This paper introduces Flikker, an application-level technique to reduce refresh power in DRAM memories. Flikker enables developers to specify critical and non-critical data in programs and the runtime system allocates this data in separate parts of memory. The portion of memory containing critical data is refreshed at the regular refresh-rate, while the portion containing non-critical data is refreshed at substantially lower rates. This partitioning saves energy at the cost of a modest increase in data corruption in the non-critical data. Flikker thus exposes and leverages an interesting trade-off between energy consumption and hardware correctness. We show that many applications are naturally tolerant to errors in the non-critical data, and in the vast majority of cases, the errors have little or no impact on the application's final outcome. We also find that Flikker can save between 20-25\% of the power consumed by the memory sub-system in a mobile device, with negligible impact on application performance. Flikker is implemented almost entirely in software, and requires only modest changes to the hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Deng:2012:MAL, author = "Qingyuan Deng and David Meisner and Luiz Ramos and Thomas F. Wenisch and Ricardo Bianchini", title = "{MemScale}: active low-power modes for main memory", journal = j-SIGPLAN, volume = "47", number = "4", pages = "225--238", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950392", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Main memory is responsible for a large and increasing fraction of the energy consumed by servers. Prior work has focused on exploiting DRAM low-power states to conserve energy. However, these states require entire DRAM ranks to be idled, which is difficult to achieve even in lightly loaded servers. In this paper, we propose to conserve memory energy while improving its energy-proportionality by creating active low-power modes for it. Specifically, we propose MemScale, a scheme wherein we apply dynamic voltage and frequency scaling (DVFS) to the memory controller and dynamic frequency scaling (DFS) to the memory channels and DRAM devices. MemScale is guided by an operating system policy that determines the DVFS/DFS mode of the memory subsystem based on the current need for memory bandwidth, the potential energy savings, and the performance degradation that applications are willing to withstand. Our results demonstrate that MemScale reduces energy consumption significantly compared to modern memory energy management approaches. We conclude that the potential benefits of the MemScale mechanisms and policy more than compensate for their small hardware cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Gao:2012:TMH, author = "Qi Gao and Wenbin Zhang and Zhezhe Chen and Mai Zheng and Feng Qin", title = "{2ndStrike}: toward manifesting hidden concurrency typestate bugs", journal = j-SIGPLAN, volume = "47", number = "4", pages = "239--250", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950394", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency bugs are becoming increasingly prevalent in the multi-core era. Recently, much research has focused on data races and atomicity violation bugs, which are related to low-level memory accesses. However, a large number of concurrency typestate bugs such as ``invalid reads to a closed file from a different thread'' are under-studied. These concurrency typestate bugs are important yet challenging to study since they are mostly relevant to high-level program semantics. This paper presents 2ndStrike, a method to manifest hidden concurrency typestate bugs in software testing. Given a state machine describing correct program behavior on certain object typestates, 2ndStrike profiles runtime events related to the typestates and thread synchronization. Based on the profiling results, 2ndStrike then identifies bug candidates, each of which is a pair of runtime events that would cause typestate violation if the event order is reversed. Finally, 2ndStrike re-executes the program with controlled thread interleaving to manifest bug candidates. We have implemented a prototype of 2ndStrike on Linux and have illustrated our idea using three types of concurrency typestate bugs, including invalid file operation, invalid pointer dereference, and invalid lock operation. We have evaluated 2ndStrike with six real world bugs (including one previously unknown bug) from three open-source server and desktop programs (i.e., MySQL, Mozilla, pbzip2). Our experimental results show that 2ndStrike can effectively and efficiently manifest all six software bugs, most of which are difficult or impossible to manifest using stress testing or active testing techniques that are based on data race/atomicity violation. Additionally, 2ndStrike reports no false positives, provides detailed bug reports for each manifested bug, and can consistently reproduce the bug after manifesting it once.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Zhang:2012:CDC, author = "Wei Zhang and Junghee Lim and Ramya Olichandran and Joel Scherpelz and Guoliang Jin and Shan Lu and Thomas Reps", title = "{ConSeq}: detecting concurrency bugs through sequential errors", journal = j-SIGPLAN, volume = "47", number = "4", pages = "251--264", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950395", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency bugs are caused by non-deterministic interleavings between shared memory accesses. Their effects propagate through data and control dependences until they cause software to crash, hang, produce incorrect output, etc. The lifecycle of a bug thus consists of three phases: (1) triggering, (2) propagation, and (3) failure. Traditional techniques for detecting concurrency bugs mostly focus on phase (1)--i.e., on finding certain structural patterns of interleavings that are common triggers of concurrency bugs, such as data races. This paper explores a consequence-oriented approach to improving the accuracy and coverage of state-space search and bug detection. The proposed approach first statically identifies potential failure sites in a program binary (i.e., it first considers a phase (3) issue). It then uses static slicing to identify critical read instructions that are highly likely to affect potential failure sites through control and data dependences (phase (2)). Finally, it monitors a single (correct) execution of a concurrent program and identifies suspicious interleavings that could cause an incorrect state to arise at a critical read and then lead to a software failure (phase (1)). ConSeq's backwards approach, (3)!(2)!(1), provides advantages in bug-detection coverage and accuracy but is challenging to carry out. ConSeq makes it feasible by exploiting the empirical observation that phases (2) and (3) usually are short and occur within one thread. Our evaluation on large, real-world C/C++ applications shows that ConSeq detects more bugs than traditional approaches and has a much lower false-positive rate.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Chipounov:2012:SPV, author = "Vitaly Chipounov and Volodymyr Kuznetsov and George Candea", title = "{S2E}: a platform for in-vivo multi-path analysis of software systems", journal = j-SIGPLAN, volume = "47", number = "4", pages = "265--278", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950396", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents S2E, a platform for analyzing the properties and behavior of software systems. We demonstrate S2E's use in developing practical tools for comprehensive performance profiling, reverse engineering of proprietary software, and bug finding for both kernel-mode and user-mode binaries. Building these tools on top of S2E took less than 770 LOC and 40 person-hours each. S2E's novelty consists of its ability to scale to large real systems, such as a full Windows stack. S2E is based on two new ideas: selective symbolic execution, a way to automatically minimize the amount of code that has to be executed symbolically given a target analysis, and relaxed execution consistency models, a way to make principled performance/accuracy trade-offs in complex analyses. These techniques give S2E three key abilities: to simultaneously analyze entire families of execution paths, instead of just one execution at a time; to perform the analyses in-vivo within a real software stack--user programs, libraries, kernel, drivers, etc.--instead of using abstract models of these layers; and to operate directly on binaries, thus being able to analyze even proprietary software. Conceptually, S2E is an automated path explorer with modular path analyzers: the explorer drives the target system down all execution paths of interest, while analyzers check properties of each such path (e.g., to look for bugs) or simply collect information (e.g., count page faults). Desired paths can be specified in multiple ways, and S2E users can either combine existing analyzers to build a custom analysis tool, or write new analyzers using the S2E API.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Hofmann:2012:EOS, author = "Owen S. Hofmann and Alan M. Dunn and Sangman Kim and Indrajit Roy and Emmett Witchel", title = "Ensuring operating system kernel integrity with {OSck}", journal = j-SIGPLAN, volume = "47", number = "4", pages = "279--290", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950398", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Kernel rootkits that modify operating system state to avoid detection are a dangerous threat to system security. This paper presents OSck, a system that discovers kernel rootkits by detecting malicious modifications to operating system data. OSck integrates and extends existing techniques for detecting rootkits, and verifies safety properties for large portions of the kernel heap with minimal overhead. We deduce type information for verification by analyzing unmodified kernel source code and in-memory kernel data structures. High-performance integrity checks that execute concurrently with a running operating system create data races, and we demonstrate a deterministic solution for ensuring kernel memory is in a consistent state. We introduce two new classes of kernel rootkits that are undetectable by current systems, motivating the need for the OSck API that allows kernel developers to conveniently specify arbitrary integrity properties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Porter:2012:RLT, author = "Donald E. Porter and Silas Boyd-Wickizer and Jon Howell and Reuben Olinsky and Galen C. Hunt", title = "Rethinking the library {OS} from the top down", journal = j-SIGPLAN, volume = "47", number = "4", pages = "291--304", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950399", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper revisits an old approach to operating system construction, the library OS, in a new context. The idea of the library OS is that the personality of the OS on which an application depends runs in the address space of the application. A small, fixed set of abstractions connects the library OS to the host OS kernel, offering the promise of better system security and more rapid independent evolution of OS components. We describe a working prototype of a Windows 7 library OS that runs the latest releases of major applications such as Microsoft Excel, PowerPoint, and Internet Explorer. We demonstrate that desktop sharing across independent, securely isolated, library OS instances can be achieved through the pragmatic reuse of net-working protocols. Each instance has significantly lower overhead than a full VM bundled with an application: a typical application adds just 16MB of working set and 64MB of disk footprint. We contribute a new ABI below the library OS that enables application mobility. We also show that our library OS can address many of the current uses of hardware virtual machines at a fraction of the overheads. This paper describes the first working prototype of a full commercial OS redesigned as a library OS capable of running significant applications. Our experience shows that the long-promised benefits of the library OS approach better protection of system integrity and rapid system evolution are readily obtainable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Palix:2012:FLT, author = "Nicolas Palix and Ga{\"e}l Thomas and Suman Saha and Christophe Calv{\`e}s and Julia Lawall and Gilles Muller", title = "Faults in {Linux}: ten years later", journal = j-SIGPLAN, volume = "47", number = "4", pages = "305--318", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950401", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In 2001, Chou et al. published a study of faults found by applying a static analyzer to Linux versions 1.0 through 2.4.1. A major result of their work was that the drivers directory contained up to 7 times more of certain kinds of faults than other directories. This result inspired a number of development and research efforts on improving the reliability of driver code. Today Linux is used in a much wider range of environments, provides a much wider range of services, and has adopted a new development and release model. What has been the impact of these changes on code quality? Are drivers still a major problem? To answer these questions, we have transported the experiments of Chou et al. to Linux versions 2.6.0 to 2.6.33, released between late 2003 and early 2010. We find that Linux has more than doubled in size during this period, but that the number of faults per line of code has been decreasing. And, even though drivers still accounts for a large part of the kernel code and contains the most faults, its fault rate is now below that of other directories, such as arch (HAL) and fs (file systems). These results can guide further development and research efforts. To enable others to continually update these results as Linux evolves, we define our experimental protocol and make our checkers and results available in a public archive.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Esmaeilzadeh:2012:LBL, author = "Hadi Esmaeilzadeh and Ting Cao and Yang Xi and Stephen M. Blackburn and Kathryn S. McKinley", title = "Looking back on the language and hardware revolutions: measured power, performance, and scaling", journal = j-SIGPLAN, volume = "47", number = "4", pages = "319--332", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950402", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper reports and analyzes measured chip power and performance on five process technology generations executing 61 diverse benchmarks with a rigorous methodology. We measure representative Intel IA32 processors with technologies ranging from 130nm to 32nm while they execute sequential and parallel benchmarks written in native and managed languages. During this period, hardware and software changed substantially: (1) hardware vendors delivered chip multiprocessors instead of uniprocessors, and independently (2) software developers increasingly chose managed languages instead of native languages. This quantitative data reveals the extent of some known and previously unobserved hardware and software trends. Two themes emerge. (I) Workload: The power, performance, and energy trends of native workloads do not approximate managed workloads. For example, (a) the SPEC CPU2006 native benchmarks on the i7 (45) and i5 (32) draw significantly less power than managed or scalable native benchmarks; and (b) managed runtimes exploit parallelism even when running single-threaded applications. The results recommend architects always include native and managed workloads when designing and evaluating energy efficient hardware. (II) Architecture: Clock scaling, microarchitecture, simultaneous multithreading, and chip multiprocessors each elicit a huge variety of power, performance, and energy responses. This variety and the difficulty of obtaining power measurements recommends exposing on-chip power meters and when possible structure specific power meters for cores, caches, and other structures. Just as hardware event counters provide a quantitative grounding for performance innovations, power meters are necessary for optimizing energy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Nguyen:2012:SCS, author = "Donald Nguyen and Keshav Pingali", title = "Synthesizing concurrent schedulers for irregular algorithms", journal = j-SIGPLAN, volume = "47", number = "4", pages = "333--344", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950404", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scheduling is the assignment of tasks or activities to processors for execution, and it is an important concern in parallel programming. Most prior work on scheduling has focused either on static scheduling of applications in which the dependence graph is known at compile-time or on dynamic scheduling of independent loop iterations such as in OpenMP. In irregular algorithms, dependences between activities are complex functions of runtime values so these algorithms are not amenable to compile-time analysis nor do they consist of independent activities. Moreover, the amount of work can vary dramatically with the scheduling policy. To handle these complexities, implementations of irregular algorithms employ carefully handcrafted, algorithm-specific schedulers but these schedulers are themselves parallel programs, complicating the parallel programming problem further. In this paper, we present a flexible and efficient approach for specifying and synthesizing scheduling policies for irregular algorithms. We develop a simple compositional specification language and show how it can concisely encode scheduling policies in the literature. Then, we show how to synthesize efficient parallel schedulers from these specifications. We evaluate our approach for five irregular algorithms on three multicore architectures and show that (1) the performance of some algorithms can improve by orders of magnitude with the right scheduling policy, and (2) for the same policy, the overheads of our synthesized schedulers are comparable to those of fixed-function schedulers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Hoang:2012:ECT, author = "Giang Hoang and Robby Bruce Findler and Russ Joseph", title = "Exploring circuit timing-aware language and compilation", journal = j-SIGPLAN, volume = "47", number = "4", pages = "345--356", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950405", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "By adjusting the design of the ISA and enabling circuit timing-sensitive optimizations in a compiler, we can more effectively exploit timing speculation. While there has been growing interest in systems that leverage circuit-level timing speculation to improve the performance and power-efficiency of processors, most of the innovation has been at the microarchitectural level. We make the observation that some code sequences place greater demand on circuit timing deadlines than others. Furthermore, by selectively replacing these codes with instruction sequences which are semantically equivalent but reduce activity on timing critical circuit paths, we can trigger fewer timing errors and hence reduce recovery costs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Farhad:2012:OAM, author = "Sardar M. Farhad and Yousun Ko and Bernd Burgstaller and Bernhard Scholz", title = "Orchestration by approximation: mapping stream programs onto multicore architectures", journal = j-SIGPLAN, volume = "47", number = "4", pages = "357--368", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950406", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel 2-approximation algorithm for deploying stream graphs on multicore computers and a stream graph transformation that eliminates bottlenecks. The key technical insight is a data rate transfer model that enables the computation of a ``closed form'', i.e., the data rate transfer function of an actor depending on the arrival rate of the stream program. A combinatorial optimization problem uses the closed form to maximize the throughput of the stream program. Although the problem is inherently NP-hard, we present an efficient and effective 2-approximation algorithm that provides a lower bound on the quality of the solution. We introduce a transformation that uses the closed form to identify and eliminate bottlenecks. We show experimentally that state-of-the art integer linear programming approaches for orchestrating stream graphs are (1) intractable or at least impractical for larger stream graphs and larger number of processors and (2) our 2-approximation algorithm is highly efficient and its results are close to the optimal solution for a standard set of StreamIt benchmark programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Zhang:2012:FED, author = "Eddy Z. Zhang and Yunlian Jiang and Ziyu Guo and Kai Tian and Xipeng Shen", title = "On-the-fly elimination of dynamic irregularities for {GPU} computing", journal = j-SIGPLAN, volume = "47", number = "4", pages = "369--380", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950408", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The power-efficient massively parallel Graphics Processing Units (GPUs) have become increasingly influential for general-purpose computing over the past few years. However, their efficiency is sensitive to dynamic irregular memory references and control flows in an application. Experiments have shown great performance gains when these irregularities are removed. But it remains an open question how to achieve those gains through software approaches on modern GPUs. This paper presents a systematic exploration to tackle dynamic irregularities in both control flows and memory references. It reveals some properties of dynamic irregularities in both control flows and memory references, their interactions, and their relations with program data and threads. It describes several heuristics-based algorithms and runtime adaptation techniques for effectively removing dynamic irregularities through data reordering and job swapping. It presents a framework, G-Streamline, as a unified software solution to dynamic irregularities in GPU computing. G-Streamline has several distinctive properties. It is a pure software solution and works on the fly, requiring no hardware extensions or offline profiling. It treats both types of irregularities at the same time in a holistic fashion, maximizing the whole-program performance by resolving conflicts among optimizations. Its optimization overhead is largely transparent to GPU kernel executions, jeopardizing no basic efficiency of the GPU application. Finally, it is robust to the presence of various complexities in GPU applications. Experiments show that G-Streamline is effective in reducing dynamic irregularities in GPU computing, producing speedups between 1.07 and 2.5 for a variety of applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Hormati:2012:SPS, author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and Trevor Mudge and Scott Mahlke", title = "{Sponge}: portable stream programming on graphics engines", journal = j-SIGPLAN, volume = "47", number = "4", pages = "381--392", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics processing units (GPUs) provide a low cost platform for accelerating high performance computations. The introduction of new programming languages, such as CUDA and OpenCL, makes GPU programming attractive to a wide variety of programmers. However, programming GPUs is still a cumbersome task for two primary reasons: tedious performance optimizations and lack of portability. First, optimizing an algorithm for a specific GPU is a time-consuming task that requires a thorough understanding of both the algorithm and the underlying hardware. Unoptimized CUDA programs typically only achieve a small fraction of the peak GPU performance. Second, GPU code lacks efficient portability as code written for one GPU can be inefficient when executed on another. Moving code from one GPU to another while maintaining the desired performance is a non-trivial task often requiring significant modifications to account for the hardware differences. In this work, we propose Sponge, a compilation framework for GPUs using synchronous data flow streaming languages. Sponge is capable of performing a wide variety of optimizations to generate efficient code for graphics engines. Sponge alleviates the problems associated with current GPU programming methods by providing portability across different generations of GPUs and CPUs, and a better abstraction of the hardware details, such as the memory hierarchy and threading model. Using streaming, we provide a write-once software paradigm and rely on the compiler to automatically create optimized CUDA code for a wide variety of GPU targets. Sponge's compiler optimizations improve the performance of the baseline CUDA implementations by an average of 3.2x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Kamruzzaman:2012:ICP, author = "Md Kamruzzaman and Steven Swanson and Dean M. Tullsen", title = "Inter-core prefetching for multicore processors using migrating helper threads", journal = j-SIGPLAN, volume = "47", number = "4", pages = "393--404", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950411", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multicore processors have become ubiquitous in today's systems, but exploiting the parallelism they offer remains difficult, especially for legacy application and applications with large serial components. The challenge, then, is to develop techniques that allow multiple cores to work in concert to accelerate a single thread. This paper describes inter-core prefetching, a technique to exploit multiple cores to accelerate a single thread. Inter-core prefetching extends existing work on helper threads for SMT machines to multicore machines. Inter-core prefetching uses one compute thread and one or more prefetching threads. The prefetching threads execute on cores that would otherwise be idle, prefetching the data that the compute thread will need. The compute thread then migrates between cores, following the path of the prefetch threads, and finds the data already waiting for it. Inter-core prefetching works with existing hardware and existing instruction set architectures. Using a range of state-of-the-art multiprocessors, this paper characterizes the potential benefits of the technique with microbenchmarks and then measures its impact on a range of memory intensive applications. The results show that inter-core prefetching improves performance by an average of 31 to 63\%, depending on the architecture, and speeds up some applications by as much as 2.8$ \times $. It also demonstrates that inter-core prefetching reduces energy consumption by between 11 and 26\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Hayashizaki:2012:IPT, author = "Hiroshige Hayashizaki and Peng Wu and Hiroshi Inoue and Mauricio J. Serrano and Toshio Nakatani", title = "Improving the performance of trace-based systems by false loop filtering", journal = j-SIGPLAN, volume = "47", number = "4", pages = "405--418", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950412", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Trace-based compilation is a promising technique for language compilers and binary translators. It offers the potential to expand the compilation scopes that have traditionally been limited by method boundaries. Detecting repeating cyclic execution paths and capturing the detected repetitions into traces is a key requirement for trace selection algorithms to achieve good optimization and performance with small amounts of code. One important class of repetition detection is cyclic-path-based repetition detection, where a cyclic execution path (a path that starts and ends at the same instruction address) is detected as a repeating cyclic execution path. However, we found many cyclic paths that are not repeating cyclic execution paths, which we call false loops. A common class of false loops occurs when a method is invoked from multiple call-sites. A cycle is formed between two invocations of the method from different call-sites, but which does not represent loops or recursion. False loops can result in shorter traces and smaller compilation scopes, and degrade the performance. We propose false loop filtering, an approach to reject false loops in the repetition detection step of trace selection, and a technique called false loop filtering by call-stack-comparison, which rejects a cyclic path as a false loop if the call stacks at the beginning and the end of the cycle are different. We applied false loop filtering to our trace-based Java\TM{} JIT compiler that is based on IBM's J9 JVM. We found that false loop filtering achieved an average improvement of 16\% and 10\% for the DaCapo benchmark when applied to two baseline trace selection algorithms, respectively, with up to 37\% improvement for individual benchmarks. In the end, with false loop filtering, our trace-based JIT achieves a performance comparable to that of the method-based J9 JVM/JIT using the corresponding optimization level.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '12 conference proceedings.", } @Article{Xue:2012:RJC, author = "Jingling Xue", title = "Rethinking {Java} call stack design for tiny embedded devices", journal = j-SIGPLAN, volume = "47", number = "5", pages = "1--10", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248420", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "The ability of tiny embedded devices to run large feature-rich programs is typically constrained by the amount of memory installed on such devices. Furthermore, the useful operation of these devices in wireless sensor applications is limited by their battery life. This paper presents a call stack redesign targeted at an efficient use of RAM storage and CPU cycles by a Java program running on a wireless sensor mote. Without compromising the application programs, our call stack redesign saves 30\% of RAM, on average, evaluated over a large number of benchmarks. On the same set of bench-marks, our design also avoids frequent RAM allocations and deallocations, resulting in average 80\% fewer memory operations and 23\% faster program execution. These may be critical improvements for tiny embedded devices that are equipped with small amount of RAM and limited battery life. However, our call stack redesign is equally effective for any complex multi-threaded object oriented program developed for desktop computers. We describe the redesign, measure its performance and report the resulting savings in RAM and execution time for a wide variety of programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sallenave:2012:LGE, author = "Olivier Sallenave and Roland Ducournau", title = "Lightweight generics in embedded systems through static analysis", journal = j-SIGPLAN, volume = "47", number = "5", pages = "11--20", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248421", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Low-end embedded systems are still programmed in C and assembly, and adopting high-level languages such as C\# should reduce the length of their development cycles. For these systems, code size is a major concern, but run-time efficiency should also be reasonable --- programmers will not migrate to C\# unless the overhead compared with C is insignificant. In this paper, we propose a static approach based on whole program optimization for implementing {.NET} generics in such systems. Indeed, the implementation of run-time generics involves a tradeoff between size and run-time efficiency. In this proposal, generic instances are detected through a generalization of RTA to parametric polymorphism. Also, we propose an implementation scheme which employs code sharing and more effective coercions than boxing. Unlike existing implementation schemes, it is scalable in the number of generic instances without involving boxing and unboxing in a systematic way.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kyle:2012:EPI, author = "Stephen Kyle and Igor B{\"o}hm and Bj{\"o}rn Franke and Hugh Leather and Nigel Topham", title = "Efficiently parallelizing instruction set simulation of embedded multi-core processors using region-based just-in-time dynamic binary translation", journal = j-SIGPLAN, volume = "47", number = "5", pages = "21--30", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248422", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Embedded systems, as typified by modern mobile phones, are already seeing a drive toward using multi-core processors. The number of cores will likely increase rapidly in the future. Engineers and researchers need to be able to simulate systems, as they are expected to be in a few generations time, running simulations of many-core devices on today's multi-core machines. These requirements place heavy demands on the scalability of simulation engines, the fastest of which have typically evolved from just-in-time (Jit) dynamic binary translators (Dbt). Existing work aimed at parallelizing Dbt simulators has focused exclusively on trace-based Dbt, wherein linear execution traces or perhaps trees thereof are the units of translation. Region-based Dbt simulators have not received the same attention and require different techniques than their trace-based cousins. In this paper we develop an innovative approach to scaling multi-core, embedded simulation through region-based Dbt. We initially modify the Jit code generator of such a simulator to emit code that does not depend on a particular thread with its thread-specific context and is, therefore, thread-agnostic. We then demonstrate that this thread-agnostic code generation is comparable to thread-specific code with respect to performance, but also enables the sharing of JIT-compiled regions between different threads. This sharing optimisation, in turn, leads to significant performance improvements for multi-threaded applications. In fact, our results confirm that an average of 76\% of all JIT-compiled regions can be shared between 128 threads in representative, parallel workloads. We demonstrate that this translates into an overall performance improvement by 1.44x on average and up to 2.40x across 12 multi-threaded benchmarks taken from the Splash-2 benchmark suite, targeting our high-performance multi-core Dbt simulator for embedded Arc processors running on a 4-core Intel host machine.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Huang:2012:WAR, author = "Yazhi Huang and Mengying Zhao and Chun Jason Xue", title = "{WCET}-aware re-scheduling register allocation for real-time embedded systems with clustered {VLIW} architecture", journal = j-SIGPLAN, volume = "47", number = "5", pages = "31--40", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248424", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Worst-Case Execution Time (WCET) is one of the most important metrics in real-time embedded system design. For embedded systems with clustered VLIW architecture, register allocation, instruction scheduling, and cluster assignment are three key activities to pursue code optimization which have profound impact on WCET. At the same time, these three activities exhibit a phase ordering problem: Independently performing register allocation, scheduling and cluster assignment could have a negative effect on the other phases, thereby generating sub-optimal compiled codes. In this paper, a compiler level optimization, namely WCET-aware Re-scheduling Register Allocation (WRRA), is proposed to achieve WCET minimization for real-time embedded systems with clustered VLIW architecture. The novelty of the proposed approach is that the effects of register allocation, instruction scheduling and cluster assignment on the quality of generated code are taken into account for WCET minimization. These three compilation processes are integrated into a single phase to obtain a balanced result. The proposed technique is implemented in Trimaran 4.0. The experimental results show that the proposed technique can reduce WCET effectively, by 33\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wan:2012:WAD, author = "Qing Wan and Hui Wu and Jingling Xue", title = "{WCET}-aware data selection and allocation for scratchpad memory", journal = j-SIGPLAN, volume = "47", number = "5", pages = "41--50", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248425", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "In embedded systems, SPM (scratchpad memory) is an attractive alternative to cache memory due to its lower energy consumption and higher predictability of program execution. This paper studies the problem of placing variables of a program into an SPM such that its WCET (worst-case execution time) is minimized. We propose an efficient dynamic approach that comprises two novel heuristics. The first heuristic iteratively selects a most beneficial variable as an SPM resident candidate based on its impact on the k longest paths of the program. The second heuristic incrementally allocates each SPM resident candidate to the SPM based on graph coloring and acyclic graph orientation. We have evaluated our approach by comparing with an ILP-based approach and a longest-path-based greedy approach using the eight benchmarks selected from Powerstone and M{\"a}lardalen WCET Benchmark suites under three different SPM configurations. Our approach achieves up to 21\% and 43\% improvements in WCET reduction over the ILP-based approach and the greedy approach, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gerard:2012:MMO, author = "L{\'e}onard G{\'e}rard and Adrien Guatto and C{\'e}dric Pasteur and Marc Pouzet", title = "A modular memory optimization for synchronous data-flow languages: application to arrays in a {Lustre} compiler", journal = j-SIGPLAN, volume = "47", number = "5", pages = "51--60", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248426", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "The generation of efficient sequential code for synchronous data-flow languages raises two intertwined issues: control and memory optimization. While the former has been extensively studied, for instance in the compilation of Lustre and Signal, the latter has only been addressed in a restricted manner. Yet, memory optimization becomes a pressing issue when arrays are added to such languages. This article presents a two-level solution to the memory optimization problem. It combines a compile-time optimization algorithm, reminiscent of register allocation, paired with language annotations on the source given by the designer. Annotations express in-place modifications and control where allocation is performed. Moreover, they allow external functions performing in-place modifications to be safely imported. Soundness of annotations is guaranteed by a semilinear type system and additional scheduling constraints. A key feature is that annotations for well-typed programs do not change the semantics of the language: removing them may lead to less efficient code but will not alter the semantics. The method has been implemented in a new compiler for a LUSTRE-like synchronous language extended with hierarchical automata and arrays. Experiments show that the proposed approach removes most of the unnecessary array copies, resulting in faster code that uses less memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sbirlea:2012:MDF, author = "Alina Sb{\^\i}rlea and Yi Zou and Zoran Budiml{\'\i}c and Jason Cong and Vivek Sarkar", title = "Mapping a data-flow programming model onto heterogeneous platforms", journal = j-SIGPLAN, volume = "47", number = "5", pages = "61--70", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248428", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "In this paper we explore mapping of a high-level macro data-flow programming model called Concurrent Collections (CnC) onto heterogeneous platforms in order to achieve high performance and low energy consumption while preserving the ease of use of data-flow programming. Modern computing platforms are becoming increasingly heterogeneous in order to improve energy efficiency. This trend is clearly seen across a diverse spectrum of platforms, from small-scale embedded SOCs to large-scale super-computers. However, programming these heterogeneous platforms poses a serious challenge for application developers. We have designed a software flow for converting high-level CnC programs to the Habanero-C language. CnC programs have a clear separation between the application description, the implementation of each of the application components and the abstraction of hardware platform, making it an excellent programming model for domain experts. Domain experts can later employ the help of a tuning expert (either a compiler or a person) to tune their applications with minimal effort. We also extend the Habanero-C runtime system to support work-stealing across heterogeneous computing devices and introduce task affinity for these heterogeneous components to allow users to fine tune the runtime scheduling decisions. We demonstrate a working example that maps a pipeline of medical image-processing algorithms onto a prototype heterogeneous platform that includes CPUs, GPUs and FPGAs. For the medical imaging domain, where obtaining fast and accurate results is a critical step in diagnosis and treatment of patients, we show that our model offers up to 17.72X speedup and an estimated usage of 0.52X of the power used by CPUs alone, when using accelerators (GPUs and FPGAs) and CPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hashemi:2012:FSU, author = "Matin Hashemi and Mohammad H. Foroozannejad and Soheil Ghiasi and Christoph Etzel", title = "{FORMLESS}: scalable utilization of embedded manycores in streaming applications", journal = j-SIGPLAN, volume = "47", number = "5", pages = "71--78", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248429", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Variants of dataflow specification models are widely used to synthesize streaming applications for distributed-memory parallel processors. We argue that current practice of specifying streaming applications using rigid dataflow models, implicitly prohibits a number of platform oriented optimizations and hence limits portability and scalability with respect to number of processors. We motivate Functionally-cOnsistent stRucturally-MalLEabe Streaming Specification, dubbed FORMLESS, which refers to raising the abstraction level beyond fixed-structure dataflow to address its portability and scalability limitations. To demonstrate the potential of the idea, we develop a design space exploration scheme to customize the application specification to better fit the target platform. Experiments with several common streaming case studies demonstrate improved portability and scalability over conventional dataflow specification models, and confirm the effectiveness of our approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Farhad:2012:PGD, author = "S. M. Farhad and Yousun Ko and Bernd Burgstaller and Bernhard Scholz", title = "Profile-guided deployment of stream programs on multicores", journal = j-SIGPLAN, volume = "47", number = "5", pages = "79--88", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248430", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Because multicore architectures have become the industry standard, programming abstractions for concurrent programming are of key importance. Stream programming languages facilitate application domains characterized by regular sequences of data, such as multimedia, graphics, signal processing and networking. With stream programs, computations are expressed through independent actors that interact through FIFO data channels. A major challenge with stream programs is to load-balance actors among available processing cores. The workload of a stream program is determined by actor execution times and the communication overhead induced by data channels. Estimating communication costs on cache-coherent shared-memory multiprocessors is difficult, because data movements are abstracted away by the cache coherence protocol. Standard execution time profiling techniques cannot separate actor execution times from communication costs, because communication costs manifest in terms of execution time overhead. In this work we present a unified Integer Linear Programming (ILP) formulation that balances the workload of stream programs on cache-coherent multicore architectures. For estimating the communication costs of data channels, we devise a novel profiling scheme that minimizes the number of profiling steps. We conduct experiments across a range of StreamIt benchmarks and show that our method achieves a speedup of up to 4.02x on 6 processors. The number of profiling steps is on average only 17\% of an exhaustive profiling run over all data channels of a stream program.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fang:2012:IDP, author = "Zhenman Fang and Jiaxin Li and Weihua Zhang and Yi Li and Haibo Chen and Binyu Zang", title = "Improving dynamic prediction accuracy through multi-level phase analysis", journal = j-SIGPLAN, volume = "47", number = "5", pages = "89--98", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248432", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Phase analysis, which classifies the set of execution intervals with similar execution behavior and resource requirements, has been widely used in a variety of dynamic systems, including dynamic cache reconfiguration, prefetching and race detection. While phase granularity has been a major factor to the accuracy of phase prediction, it has not been well investigated yet and most dynamic systems usually adopt a fine-grained prediction scheme. However, such a scheme can only take account of recent local phase information and could be frequently interfered by temporary noises due to instant phase changes, which might notably limit the prediction accuracy. In this paper, we make the first investigation on the potential of multi-level phase analysis (MLPA), where different granularity phase analysis are combined together to improve the overall accuracy. The key observation is that a coarse-grained interval, which usually consists of stably-distributed fine-grained intervals, can be accurately identified based on the fine-grained intervals at the beginning of its execution. Based on the observation, we design and implement a MLPA scheme. In such a scheme, a coarse-grained phase is first identified based on the fine-grained intervals at the beginning of its execution. The following fine-grained phases in it are then predicted based on the sequence of fine-grained phases in the coarse-grained phase. Experimental results show such a scheme can notably improve the prediction accuracy. Using Markov fine-grained phase predictor as the baseline, MLPA can improve prediction accuracy by 20\%, 39\% and 29\% for next phase, phase change and phase length prediction for SPEC2000 accordingly, yet incur only about 2\% time overhead and 40\% space overhead (about 360 bytes in total). To demonstrate the effectiveness of MLPA, we apply it to a dynamic cache reconfiguration system which dynamically adjusts the cache size to reduce the power consumption and access time of data cache. Experimental results show that MLPA can further reduce the average cache size by 15\% compared to the fine-grained scheme.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Khudia:2012:ESE, author = "Daya Shanker Khudia and Griffin Wright and Scott Mahlke", title = "Efficient soft error protection for commodity embedded microprocessors using profile information", journal = j-SIGPLAN, volume = "47", number = "5", pages = "99--108", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248433", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Successive generations of processors use smaller transistors in the quest to make more powerful computing systems. It has been previously studied that smaller transistors make processors more susceptible to soft errors (transient faults caused by high energy particle strikes). Such errors can result in unexpected behavior and incorrect results. With smaller and cheaper transistors becoming pervasive in mainstream computing, it is necessary to protect these devices against soft errors; an increasing rate of faults necessitates the protection of applications running on commodity processors against soft errors. The existing methods of protecting against such faults generally have high area or performance overheads and thus are not directly applicable in the embedded design space. In order to protect against soft errors, the detection of these errors is a necessary first step so that a recovery can be triggered. To solve the problem of detecting soft errors cheaply, we propose a profiling-based software-only application analysis and transformation solution. The goal is to develop a low cost solution which can be deployed for off-the-shelf embedded processors. The solution works by intelligently duplicating instructions that are likely to affect the program output, and comparing results between original and duplicated instructions. The intelligence of our solution is garnered through the use of control flow, memory dependence, and value profiling to understand and exploit the common-case behavior of applications. Our solution is able to achieve 92\% fault coverage with a 20\% instruction overhead. This represents a 41\% lower performance overhead than the best prior approaches with approximately the same fault coverage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2012:CAP, author = "Qingan Li and Mengying Zhao and Chun Jason Xue and Yanxiang He", title = "Compiler-assisted preferred caching for embedded systems with {STT--RAM} based hybrid cache", journal = j-SIGPLAN, volume = "47", number = "5", pages = "109--118", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248434", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "As technology scales down, energy consumption is becoming a big problem for traditional SRAM-based cache hierarchies. The emerging Spin-Torque Transfer RAM (STT-RAM) is a promising replacement for large on-chip cache due to its ultra low leakage power and high storage density. However, write operations on STT-RAM suffer from considerably higher energy consumption and longer latency than SRAM. Hybrid cache consisting of both SRAM and STT-RAM has been proposed recently for both performance and energy efficiency. Most management strategies for hybrid caches employ migration-based techniques to dynamically move write-intensive data from STT-RAM to SRAM. These techniques lead to extra overheads. In this paper, we propose a compiler-assisted approach, preferred caching, to significantly reduce the migration overhead by giving migration-intensive memory blocks the preference for the SRAM part of the hybrid cache. Furthermore, a data assignment technique is proposed to improve the efficiency of preferred caching. The reduction of migration overhead can in turn improve the performance and energy efficiency of STT-RAM based hybrid cache. The experimental results show that, with the proposed techniques, on average, the number of migrations is reduced by 21.3\%, the total latency is reduced by 8.0\% and the total dynamic energy is reduced by 10.8\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zuluaga:2012:SDS, author = "Marcela Zuluaga and Andreas Krause and Peter Milder and Markus P{\"u}schel", title = "``Smart'' design space sampling to predict {Pareto}-optimal solutions", journal = j-SIGPLAN, volume = "47", number = "5", pages = "119--128", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248436", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "Many high-level synthesis tools offer degrees of freedom in mapping high-level specifications to Register-Transfer Level descriptions. These choices do not affect the functional behavior but span a design space of different cost-performance tradeoffs. In this paper we present a novel machine learning-based approach that efficiently determines the Pareto-optimal designs while only sampling and synthesizing a fraction of the design space. The approach combines three key components: (1) A regression model based on Gaussian processes to predict area and throughput based on synthesis training data. (2) A ``smart'' sampling strategy, GP-PUCB, to iteratively refine the model by carefully selecting the next design to synthesize to maximize progress. (3) A stopping criterion based on assessing the accuracy of the model without access to complete synthesis data. We demonstrate the effectiveness of our approach using IP generators for discrete Fourier transforms and sorting networks. However, our algorithm is not specific to this application and can be applied to a wide range of Pareto front prediction problems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bouissou:2012:OSS, author = "Olivier Bouissou and Alexandre Chapoutot", title = "An operational semantics for {Simulink}'s simulation engine", journal = j-SIGPLAN, volume = "47", number = "5", pages = "129--138", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248437", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "The industrial tool Matlab/Simulink is widely used in the design of embedded systems. The main feature of this tool is its ability to model in a common formalism the software and its physical environment. This makes it very useful for validating the design of embedded software using numerical simulation. However, the formal verification of such models is still problematic as Simulink is a programming language for which no formal semantics exists. In this article, we present an operational semantics of a representative subset of Simulink which includes both continuous-time and discrete-time blocks. We believe that this work gives a better understanding of Simulink and it defines the foundations of a general framework to apply formal methods on Simulink's high level descriptions of embedded systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yu:2012:SCC, author = "Fang Yu and Shun-Ching Yang and Farn Wang and Guan-Cheng Chen and Che-Chang Chan", title = "Symbolic consistency checking of {OpenMP} parallel programs", journal = j-SIGPLAN, volume = "47", number = "5", pages = "139--148", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248438", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "We present a symbolic approach for checking consistency of OpenMP parallel programs. A parallel program is consistent if it yields the same result as its sequential version despite the execution order among threads. We find race conditions of an OpenMP parallel program, construct the formal model of its raced segments under relaxed memory models, and perform guided symbolic simulation to search consistency violations. The simulation terminates when (1) a witness has been found (the program is inconsistent), or (2) all reachable states have been explored (the program is consistent). We have developed the tool Pathg by incorporating Omega library to solve race constraints and Red symbolic simulator to perform guided search. We show that Pathg can prove consistency of programs, identify races that modern OpenMP checkers failed to report, and find inconsistency witnesses effectively against benchmarks from the OpenMP Source Code Repository and the NAS Parallel benchmark suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gal-On:2012:CPR, author = "Shay Gal-On and Markus Levy", title = "Creating portable, repeatable, realistic benchmarks for embedded systems and the challenges thereof", journal = j-SIGPLAN, volume = "47", number = "5", pages = "149--152", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248440", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "LCTES '12 proceedings.", abstract = "To appreciate the challenges of analysing embedded processor behaviour, step back in time to understand the evolution of embedded processors. Only a few decades ago, embedded processors were relatively simple devices (compared to today), represented by a host of 8- and 16-bit microcontrollers, and 32-bit microprocessors, with minimal integration. Today, these processors (even the so-called, low-end microcontrollers), have evolved into highly-integrated SoCs with a wide variety of architectures capable of tackling both specific and general-purpose tasks. Associated with these transformations, the benchmarks used to quantify the capabilities have also grown in complexity and range. At the simplest level, benchmarks such as CoreMark analyse the fundamental processor cores. At the other end of the spectrum, system benchmarks, such BrowsingBench, analyse the entire SoC as well as the system software stack and even the physical interfaces. This paper examines some of the challenges of applying such benchmarks, and explains the methodologies used at EEMBC to manage portability, repeatability, and realism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hosking:2012:CHL, author = "Tony Hosking", title = "Compiling a high-level language for {GPUs}: (via language support for architectures and compilers)", journal = j-SIGPLAN, volume = "47", number = "6", pages = "1--12", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254066", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Languages such as OpenCL and CUDA offer a standard interface for general-purpose programming of GPUs. However, with these languages, programmers must explicitly manage numerous low-level details involving communication and synchronization. This burden makes programming GPUs difficult and error-prone, rendering these powerful devices inaccessible to most programmers. We desire a higher-level programming model that makes GPUs more accessible while also effectively exploiting their computational power. This paper presents features of Lime, a new Java-compatible language targeting heterogeneous systems, that allow an optimizing compiler to generate high quality GPU code. The key insight is that the language type system enforces isolation and immutability invariants that allow the compiler to optimize for a GPU without heroic compiler analysis. Our compiler attains GPU speedups between 75\% and 140\% of the performance of native OpenCL code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Samadi:2012:AIA, author = "Mehrzad Samadi and Amir Hormati and Mojtaba Mehrara and Janghaeng Lee and Scott Mahlke", title = "Adaptive input-aware compilation for graphics engines", journal = j-SIGPLAN, volume = "47", number = "6", pages = "13--22", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254067", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "While graphics processing units (GPUs) provide low-cost and efficient platforms for accelerating high performance computations, the tedious process of performance tuning required to optimize applications is an obstacle to wider adoption of GPUs. In addition to the programmability challenges posed by GPU's complex memory hierarchy and parallelism model, a well-known application design problem is target portability across different GPUs. However, even for a single GPU target, changing a program's input characteristics can make an already-optimized implementation of a program perform poorly. In this work, we propose Adaptic, an adaptive input-aware compilation system to tackle this important, yet overlooked, input portability problem. Using this system, programmers develop their applications in a high-level streaming language and let Adaptic undertake the difficult task of input portable optimizations and code generation. Several input-aware optimizations are introduced to make efficient use of the memory hierarchy and customize thread composition. At runtime, a properly optimized version of the application is executed based on the actual program input. We perform a head-to-head comparison between the Adaptic generated and hand-optimized CUDA programs. The results show that Adaptic is capable of generating codes that can perform on par with their hand-optimized counterparts over certain input ranges and outperform them when the input falls out of the hand-optimized programs' ``comfort zone''. Furthermore, we show that input-aware results are sustainable across different GPU targets making it possible to write and optimize applications once and run them anywhere.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bacon:2012:TTW, author = "David F. Bacon and Perry Cheng and Sunil Shukla", title = "And then there were none: a stall-free real-time garbage collector for reconfigurable hardware", journal = j-SIGPLAN, volume = "47", number = "6", pages = "23--34", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254068", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Programmers are turning to radical architectures such as reconfigurable hardware (FPGAs) to achieve performance. But such systems, programmed at a very low level in languages with impoverished abstractions, are orders of magnitude more complex to use than conventional CPUs. The continued exponential increase in transistors, combined with the desire to implement ever more sophisticated algorithms, makes it imperative that such systems be programmed at much higher levels of abstraction. One of the fundamental high-level language features is automatic memory management in the form of garbage collection. We present the first implementation of a complete garbage collector in hardware (as opposed to previous ``hardware-assist'' techniques), using an FPGA and its on-chip memory. Using a completely concurrent snapshot algorithm, it provides single-cycle access to the heap, and never stalls the mutator for even a single cycle, achieving a deterministic mutator utilization (MMU) of 100\%. We have synthesized the collector to hardware and show that it never consumes more than 1\% of the logic resources of a high-end FPGA. For comparison we also implemented explicit (malloc/free) memory management, and show that real-time collection is about 4\% to 17\% slower than malloc, with comparable energy consumption. Surprisingly, in hardware real-time collection is superior to stop-the-world collection on every performance axis, and even for stressful micro-benchmarks can achieve 100\% MMU with heaps as small as 1.01 to 1.4 times the absolute minimum.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Oliveira:2012:ICN, author = "Bruno C. d. S. Oliveira and Tom Schrijvers and Wontae Choi and Wonchan Lee and Kwangkeun Yi", title = "The implicit calculus: a new foundation for generic programming", journal = j-SIGPLAN, volume = "47", number = "6", pages = "35--44", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254070", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Generic programming (GP) is an increasingly important trend in programming languages. Well-known GP mechanisms, such as type classes and the C++0x concepts proposal, usually combine two features: (1) a special type of interfaces; and (2) implicit instantiation of implementations of those interfaces. Scala implicits are a GP language mechanism, inspired by type classes, that break with the tradition of coupling implicit instantiation with a special type of interface. Instead, implicits provide only implicit instantiation, which is generalized to work for any types. This turns out to be quite powerful and useful to address many limitations that show up in other GP mechanisms. This paper synthesizes the key ideas of implicits formally in a minimal and general core calculus called the implicit calculus $ (\lambda \implies) $, and it shows how to build source languages supporting implicit instantiation on top of it. A novelty of the calculus is its support for partial resolution and higher-order rules (a feature that has been proposed before, but was never formalized or implemented). Ultimately, the implicit calculus provides a formal model of implicits, which can be used by language designers to study and inform implementations of similar mechanisms in their own languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kawaguchi:2012:DPL, author = "Ming Kawaguchi and Patrick Rondon and Alexander Bakst and Ranjit Jhala", title = "Deterministic parallelism via liquid effects", journal = j-SIGPLAN, volume = "47", number = "6", pages = "45--54", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254071", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Shared memory multithreading is a popular approach to parallel programming, but also fiendishly hard to get right. We present Liquid Effects, a type-and-effect system based on refinement types which allows for fine-grained, low-level, shared memory multi-threading while statically guaranteeing that a program is deterministic. Liquid Effects records the effect of an expression as a for- mula in first-order logic, making our type-and-effect system highly expressive. Further, effects like Read and Write are recorded in Liquid Effects as ordinary uninterpreted predicates, leaving the effect system open to extension by the user. By building our system as an extension to an existing dependent refinement type system, our system gains precise value- and branch-sensitive reasoning about effects. Finally, our system exploits the Liquid Types refinement type inference technique to automatically infer refinement types and effects. We have implemented our type-and-effect checking techniques in CSOLVE, a refinement type inference system for C programs. We demonstrate how CSOLVE uses Liquid Effects to prove the determinism of a variety of benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Guerraoui:2012:SL, author = "Rachid Guerraoui and Viktor Kuncak and Giuliano Losa", title = "Speculative linearizability", journal = j-SIGPLAN, volume = "47", number = "6", pages = "55--66", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254072", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Linearizability is a key design methodology for reasoning about implementations of concurrent abstract data types in both shared memory and message passing systems. It provides the illusion that operations execute sequentially and fault-free, despite the asynchrony and faults inherent to a concurrent system, especially a distributed one. A key property of linearizability is inter-object composability: a system composed of linearizable objects is itself linearizable. However, devising linearizable objects is very difficult, requiring complex algorithms to work correctly under general circumstances, and often resulting in bad average-case behavior. Concurrent algorithm designers therefore resort to speculation: optimizing algorithms to handle common scenarios more efficiently. The outcome are even more complex protocols, for which it is no longer tractable to prove their correctness. To simplify the design of efficient yet robust linearizable protocols, we propose a new notion: speculative linearizability. This property is as general as linearizability, yet it allows intra-object composability: the correctness of independent protocol phases implies the correctness of their composition. In particular, it allows the designer to focus solely on the proof of an optimization and derive the correctness of the overall protocol from the correctness of the existing, non-optimized one. Our notion of protocol phases allows processes to independently switch from one phase to another, without requiring them to reach agreement to determine the change of a phase. To illustrate the applicability of our methodology, we show how examples of speculative algorithms for shared memory and asynchronous message passing naturally fit into our framework. We rigorously define speculative linearizability and prove our intra-object composition theorem in a trace-based as well as an automaton-based model. To obtain a further degree of confidence, we also formalize and mechanically check the theorem in the automaton-based model, using the I/O automata framework within the Isabelle interactive proof assistant. We expect our framework to enable, for the first time, scalable specifications and mechanical proofs of speculative implementations of linearizable objects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zaparanuks:2012:AP, author = "Dmitrijs Zaparanuks and Matthias Hauswirth", title = "Algorithmic profiling", journal = j-SIGPLAN, volume = "47", number = "6", pages = "67--76", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254074", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Traditional profilers identify where a program spends most of its resources. They do not provide information about why the program spends those resources or about how resource consumption would change for different program inputs. In this paper we introduce the idea of algorithmic profiling. While a traditional profiler determines a set of measured cost values, an algorithmic profiler determines a cost function. It does that by automatically determining the ``inputs'' of a program, by measuring the program's ``cost'' for any given input, and by inferring an empirical cost function.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jin:2012:UDR, author = "Guoliang Jin and Linhai Song and Xiaoming Shi and Joel Scherpelz and Shan Lu", title = "Understanding and detecting real-world performance bugs", journal = j-SIGPLAN, volume = "47", number = "6", pages = "77--88", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254075", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Developers frequently use inefficient code sequences that could be fixed by simple patches. These inefficient code sequences can cause significant performance degradation and resource waste, referred to as performance bugs. Meager increases in single threaded performance in the multi-core era and increasing emphasis on energy efficiency call for more effort in tackling performance bugs. This paper conducts a comprehensive study of 110 real-world performance bugs that are randomly sampled from five representative software suites (Apache, Chrome, GCC, Mozilla, and MySQL). The findings of this study provide guidance for future work to avoid, expose, detect, and fix performance bugs. Guided by our characteristics study, efficiency rules are extracted from 25 patches and are used to detect performance bugs. 332 previously unknown performance problems are found in the latest versions of MySQL, Apache, and Mozilla applications, including 219 performance problems found by applying rules across applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Coppa:2012:ISP, author = "Emilio Coppa and Camil Demetrescu and Irene Finocchi", title = "Input-sensitive profiling", journal = j-SIGPLAN, volume = "47", number = "6", pages = "89--98", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254076", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "In this paper we present a profiling methodology and toolkit for helping developers discover hidden asymptotic inefficiencies in the code. From one or more runs of a program, our profiler automatically measures how the performance of individual routines scales as a function of the input size, yielding clues to their growth rate. The output of the profiler is, for each executed routine of the program, a set of tuples that aggregate performance costs by input size. The collected profiles can be used to produce performance plots and derive trend functions by statistical curve fitting or bounding techniques. A key feature of our method is the ability to automatically measure the size of the input given to a generic code fragment: to this aim, we propose an effective metric for estimating the input size of a routine and show how to compute it efficiently. We discuss several case studies, showing that our approach can reveal asymptotic bottlenecks that other profilers may fail to detect and characterize the workload and behavior of individual routines in the context of real applications. To prove the feasibility of our techniques, we implemented a Valgrind tool called aprof and performed an extensive experimental evaluation on the SPEC CPU2006 benchmarks. Our experiments show that aprof delivers comparable performance to other prominent Valgrind tools, and can generate informative plots even from single runs on typical workloads for most algorithmically-critical routines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2012:LBC, author = "Danfeng Zhang and Aslan Askarov and Andrew C. Myers", title = "Language-based control and mitigation of timing channels", journal = j-SIGPLAN, volume = "47", number = "6", pages = "99--110", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254078", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "We propose a new language-based approach to mitigating timing channels. In this language, well-typed programs provably leak only a bounded amount of information over time through external timing channels. By incorporating mechanisms for predictive mitigation of timing channels, this approach also permits a more expressive programming model. Timing channels arising from interaction with underlying hardware features such as instruction caches are controlled. Assumptions about the underlying hardware are explicitly formalized, supporting the design of hardware that efficiently controls timing channels. One such hardware design is modeled and used to show that timing channels can be controlled in some simple programs of real-world significance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chiw:2012:DPD, author = "Charisee Chiw and Gordon Kindlmann and John Reppy and Lamont Samuels and Nick Seltzer", title = "{Diderot}: a parallel {DSL} for image analysis and visualization", journal = j-SIGPLAN, volume = "47", number = "6", pages = "111--120", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254079", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Research scientists and medical professionals use imaging technology, such as computed tomography (CT) and magnetic resonance imaging (MRI) to measure a wide variety of biological and physical objects. The increasing sophistication of imaging technology creates demand for equally sophisticated computational techniques to analyze and visualize the image data. Analysis and visualization codes are often crafted for a specific experiment or set of images, thus imaging scientists need support for quickly developing codes that are reliable, robust, and efficient. In this paper, we present the design and implementation of Diderot, which is a parallel domain-specific language for biomedical image analysis and visualization. Diderot supports a high-level model of computation that is based on continuous tensor fields. These tensor fields are reconstructed from discrete image data using separable convolution kernels, but may also be defined by applying higher-order operations, such as differentiation ({$ \Delta $}). Early experiments demonstrate that Diderot provides both a high-level concise notation for image analysis and visualization algorithms, as well as high sequential and parallel performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cartey:2012:SGC, author = "Luke Cartey and Rune Lyngs{\o} and Oege de Moor", title = "Synthesising graphics card programs from {DSLs}", journal = j-SIGPLAN, volume = "47", number = "6", pages = "121--132", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254080", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Over the last five years, graphics cards have become a tempting target for scientific computing, thanks to unrivaled peak performance, often producing a runtime speed-up of x10 to x25 over comparable CPU solutions. However, this increase can be difficult to achieve, and doing so often requires a fundamental rethink. This is especially problematic in scientific computing, where experts do not want to learn yet another architecture. In this paper we develop a method for automatically parallelising recursive functions of the sort found in scientific papers. Using a static analysis of the function dependencies we identify sets --- partitions --- of independent elements, which we use to synthesise an efficient GPU implementation using polyhedral code generation techniques. We then augment our language with DSL extensions to support a wider variety of applications, and demonstrate the effectiveness of this with three case studies, showing significant performance improvement over equivalent CPU methods, and similar efficiency to hand-tuned GPU implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Raman:2012:PSF, author = "Arun Raman and Ayal Zaks and Jae W. Lee and David I. August", title = "{Parcae}: a system for flexible parallel execution", journal = j-SIGPLAN, volume = "47", number = "6", pages = "133--144", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254082", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Workload, platform, and available resources constitute a parallel program's execution environment. Most parallelization efforts statically target an anticipated range of environments, but performance generally degrades outside that range. Existing approaches address this problem with dynamic tuning but do not optimize a multiprogrammed system holistically. Further, they either require manual programming effort or are limited to array-based data-parallel programs. This paper presents Parcae, a generally applicable automatic system for platform-wide dynamic tuning. Parcae includes (i) the Nona compiler, which creates flexible parallel programs whose tasks can be efficiently reconfigured during execution; (ii) the Decima monitor, which measures resource availability and system performance to detect change in the environment; and (iii) the Morta executor, which cuts short the life of executing tasks, replacing them with other functionally equivalent tasks better suited to the current environment. Parallel programs made flexible by Parcae outperform original parallel implementations in many interesting scenarios.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tripp:2012:JEP, author = "Omer Tripp and Roman Manevich and John Field and Mooly Sagiv", title = "{JANUS}: exploiting parallelism via hindsight", journal = j-SIGPLAN, volume = "47", number = "6", pages = "145--156", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254083", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "This paper addresses the problem of reducing unnecessary conflicts in optimistic synchronization. Optimistic synchronization must ensure that any two concurrently executing transactions that commit are properly synchronized. Conflict detection is an approximate check for this condition. For efficiency, the traditional approach to conflict detection conservatively checks that the memory locations mutually accessed by two concurrent transactions are accessed only for reading. We present JANUS, a parallelization system that performs conflict detection by considering sequences of operations and their composite effect on the system's state. This is done efficiently, such that the runtime overhead due to conflict detection is on a par with that of write-conflict-based detection. In certain common scenarios, this mode of refinement dramatically improves the precision of conflict detection, thereby reducing the number of false conflicts. Our empirical evaluation of JANUS shows that this precision gain reduces the abort rate by an order of magnitude (22x on average), and achieves a speedup of up to 2.5x, on a suite of real-world benchmarks where no parallelism is exploited by the standard approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Turon:2012:REC, author = "Aaron Turon", title = "{Reagents}: expressing and composing fine-grained concurrency", journal = j-SIGPLAN, volume = "47", number = "6", pages = "157--168", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254084", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Efficient communication and synchronization is crucial for fine grained parallelism. Libraries providing such features, while indispensable, are difficult to write, and often cannot be tailored or composed to meet the needs of specific users. We introduce reagents, a set of combinators for concisely expressing concurrency algorithms. Reagents scale as well as their hand-coded counterparts, while providing the composability existing libraries lack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Carbin:2012:PAP, author = "Michael Carbin and Deokhwan Kim and Sasa Misailovic and Martin C. Rinard", title = "Proving acceptability properties of relaxed nondeterministic approximate programs", journal = j-SIGPLAN, volume = "47", number = "6", pages = "169--180", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254086", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Approximate program transformations such as skipping tasks [29, 30], loop perforation [21, 22, 35], reduction sampling [38], multiple selectable implementations [3, 4, 16, 38], dynamic knobs [16], synchronization elimination [20, 32], approximate function memoization [11],and approximate data types [34] produce programs that can execute at a variety of points in an underlying performance versus accuracy tradeoff space. These transformed programs have the ability to trade accuracy of their results for increased performance by dynamically and nondeterministically modifying variables that control their execution. We call such transformed programs relaxed programs because they have been extended with additional nondeterminism to relax their semantics and enable greater flexibility in their execution. We present language constructs for developing and specifying relaxed programs. We also present proof rules for reasoning about properties [28] which the program must satisfy to be acceptable. Our proof rules work with two kinds of acceptability properties: acceptability properties [28], which characterize desired relationships between the values of variables in the original and relaxed programs, and unary acceptability properties, which involve values only from a single (original or relaxed) program. The proof rules support a staged reasoning approach in which the majority of the reasoning effort works with the original program. Exploiting the common structure that the original and relaxed programs share, relational reasoning transfers reasoning effort from the original program to prove properties of the relaxed program. We have formalized the dynamic semantics of our target programming language and the proof rules in Coq and verified that the proof rules are sound with respect to the dynamic semantics. Our Coq implementation enables developers to obtain fully machine-checked verifications of their relaxed programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dillig:2012:AED, author = "Isil Dillig and Thomas Dillig and Alex Aiken", title = "Automated error diagnosis using abductive inference", journal = j-SIGPLAN, volume = "47", number = "6", pages = "181--192", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254087", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "When program verification tools fail to verify a program, either the program is buggy or the report is a false alarm. In this situation, the burden is on the user to manually classify the report, but this task is time-consuming, error-prone, and does not utilize facts already proven by the analysis. We present a new technique for assisting users in classifying error reports. Our technique computes small, relevant queries presented to a user that capture exactly the information the analysis is missing to either discharge or validate the error. Our insight is that identifying these missing facts is an instance of the abductive inference problem in logic, and we present a new algorithm for computing the smallest and most general abductions in this setting. We perform the first user study to rigorously evaluate the accuracy and effort involved in manual classification of error reports. Our study demonstrates that our new technique is very useful for improving both the speed and accuracy of error report classification. Specifically, our approach improves classification accuracy from 33\% to 90\% and reduces the time programmers take to classify error reports from approximately 5 minutes to under 1 minute.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kuznetsov:2012:ESM, author = "Volodymyr Kuznetsov and Johannes Kinder and Stefan Bucur and George Candea", title = "Efficient state merging in symbolic execution", journal = j-SIGPLAN, volume = "47", number = "6", pages = "193--204", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254088", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Symbolic execution has proven to be a practical technique for building automated test case generation and bug finding tools. Nevertheless, due to state explosion, these tools still struggle to achieve scalability. Given a program, one way to reduce the number of states that the tools need to explore is to merge states obtained on different paths. Alas, doing so increases the size of symbolic path conditions (thereby stressing the underlying constraint solver) and interferes with optimizations of the exploration process (also referred to as search strategies). The net effect is that state merging may actually lower performance rather than increase it. We present a way to automatically choose when and how to merge states such that the performance of symbolic execution is significantly increased. First, we present query count estimation, a method for statically estimating the impact that each symbolic variable has on solver queries that follow a potential merge point; states are then merged only when doing so promises to be advantageous. Second, we present dynamic state merging, a technique for merging states that interacts favorably with search strategies in automated test case generation and bug finding tools. Experiments on the 96 GNU Coreutils show that our approach consistently achieves several orders of magnitude speedup over previously published results. Our code and experimental data are publicly available at http://cloud9.epfl.ch.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wu:2012:SPA, author = "Jingyue Wu and Yang Tang and Gang Hu and Heming Cui and Junfeng Yang", title = "Sound and precise analysis of parallel programs through schedule specialization", journal = j-SIGPLAN, volume = "47", number = "6", pages = "205--216", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254090", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Parallel programs are known to be difficult to analyze. A key reason is that they typically have an enormous number of execution interleavings, or schedules. Static analysis over all schedules requires over-approximations, resulting in poor precision; dynamic analysis rarely covers more than a tiny fraction of all schedules. We propose an approach called schedule specialization to analyze a parallel program over only a small set of schedules for precision, and then enforce these schedules at runtime for soundness of the static analysis results. We build a schedule specialization framework for C/C++ multithreaded programs that use Pthreads. Our framework avoids the need to modify every analysis to be schedule-aware by specializing a program into a simpler program based on a schedule, so that the resultant program can be analyzed with stock analyses for improved precision. Moreover, our framework provides a precise schedule-aware def-use analysis on memory locations, enabling us to build three highly precise analyses: an alias analyzer, a data-race detector, and a path slicer. Evaluation on 17 programs, including 2 real-world programs and 15 popular benchmarks, shows that analyses using our framework reduced may-aliases by 61.9\%, false race reports by 69\%, and path slices by 48.7\%; and detected 7 unknown bugs in well-checked programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Albarghouthi:2012:PTI, author = "Aws Albarghouthi and Rahul Kumar and Aditya V. Nori and Sriram K. Rajamani", title = "Parallelizing top-down interprocedural analyses", journal = j-SIGPLAN, volume = "47", number = "6", pages = "217--228", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254091", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Modularity is a central theme in any scalable program analysis. The core idea in a modular analysis is to build summaries at procedure boundaries, and use the summary of a procedure to analyze the effect of calling it at its calling context. There are two ways to perform a modular program analysis: (1) top-down and (2) bottomup. A bottom-up analysis proceeds upwards from the leaves of the call graph, and analyzes each procedure in the most general calling context and builds its summary. In contrast, a top-down analysis starts from the root of the call graph, and proceeds downward, analyzing each procedure in its calling context. Top-down analyses have several applications in verification and software model checking. However, traditionally, bottom-up analyses have been easier to scale and parallelize than top-down analyses. In this paper, we propose a generic framework, BOLT, which uses MapReduce style parallelism to scale top-down analyses. In particular, we consider top-down analyses that are demand driven, such as the ones used for software model checking. In such analyses, each intraprocedural analysis happens in the context of a reachability query. A query Q over a procedure P results in query tree that consists of sub-queries over the procedures called by P. The key insight in BOLT is that the query tree can be explored in parallel using MapReduce style parallelism --- the map stage can be used to run a set of enabled queries in parallel, and the reduce stage can be used to manage inter-dependencies between queries. Iterating the map and reduce stages alternately, we can exploit the parallelism inherent in top-down analyses. Another unique feature of BOLT is that it is parameterized by the algorithm used for intraprocedural analysis. Several kinds of analyses, including may analyses, must analyses, and may-must-analyses can be parallelized using BOLT. We have implemented the BOLT framework and instantiated the intraprocedural parameter with a may-must-analysis. We have run BOLT on a test suite consisting of 45 Microsoft Windows device drivers and 150 safety properties. Our results demonstrate an average speedup of 3.71x and a maximum speedup of 7.4x (with 8 cores) over a sequential analysis. Moreover, in several checks where a sequential analysis fails, BOLT is able to successfully complete its analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Oh:2012:DIS, author = "Hakjoo Oh and Kihong Heo and Wonchan Lee and Woosuk Lee and Kwangkeun Yi", title = "Design and implementation of sparse global analyses for {C}-like languages", journal = j-SIGPLAN, volume = "47", number = "6", pages = "229--238", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254092", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "In this article we present a general method for achieving global static analyzers that are precise, sound, yet also scalable. Our method generalizes the sparse analysis techniques on top of the abstract interpretation framework to support relational as well as non-relational semantics properties for C-like languages. We first use the abstract interpretation framework to have a global static analyzer whose scalability is unattended. Upon this underlying sound static analyzer, we add our generalized sparse analysis techniques to improve its scalability while preserving the precision of the underlying analysis. Our framework determines what to prove to guarantee that the resulting sparse version should preserve the precision of the underlying analyzer. We formally present our framework; we present that existing sparse analyses are all restricted instances of our framework; we show more semantically elaborate design examples of sparse non-relational and relational static analyses; we present their implementation results that scale to analyze up to one million lines of C programs. We also show a set of implementation techniques that turn out to be critical to economically support the sparse analysis process.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hackett:2012:FPH, author = "Brian Hackett and Shu-yu Guo", title = "Fast and precise hybrid type inference for {JavaScript}", journal = j-SIGPLAN, volume = "47", number = "6", pages = "239--250", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254094", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "JavaScript performance is often bound by its dynamically typed nature. Compilers do not have access to static type information, making generation of efficient, type-specialized machine code difficult. We seek to solve this problem by inferring types. In this paper we present a hybrid type inference algorithm for JavaScript based on points-to analysis. Our algorithm is fast, in that it pays for itself in the optimizations it enables. Our algorithm is also precise, generating information that closely reflects the program's actual behavior even when analyzing polymorphic code, by augmenting static analysis with run-time type barriers. We showcase an implementation for Mozilla Firefox's JavaScript engine, demonstrating both performance gains and viability. Through integration with the just-in-time (JIT) compiler in Firefox, we have improved performance on major benchmarks and JavaScript-heavy websites by up to 50\%. Inference-enabled compilation is the default compilation mode as of Firefox 9.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Petrov:2012:RDW, author = "Boris Petrov and Martin Vechev and Manu Sridharan and Julian Dolby", title = "Race detection for {Web} applications", journal = j-SIGPLAN, volume = "47", number = "6", pages = "251--262", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254095", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Modern web pages are becoming increasingly full-featured, and this additional functionality often requires greater use of asynchrony. Unfortunately, this asynchrony can trigger unexpected concurrency errors, even though web page scripts are executed sequentially. We present the first formulation of a happens-before relation for common web platform features. Developing this relation was a non-trivial task, due to complex feature interactions and browser differences. We also present a logical memory access model for web applications that abstracts away browser implementation details. Based on the above, we implemented WebRacer, the first dynamic race detector for web applications. WebRacer is implemented atop the production-quality WebKit engine, enabling testing of full-featured web sites. WebRacer can also simulate certain user actions, exposing more races. We evaluated WebRacer by testing a large set of Fortune 100 company web sites. We discovered many harmful races, and also gained insights into how developers handle asynchrony in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fischer:2012:EDM, author = "Jeffrey Fischer and Rupak Majumdar and Shahram Esmaeilsabzali", title = "{Engage}: a deployment management system", journal = j-SIGPLAN, volume = "47", number = "6", pages = "263--274", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254096", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Many modern applications are built by combining independently developed packages and services that are distributed over many machines with complex inter-dependencies. The assembly, installation, and management of such applications is hard, and usually performed either manually or by writing customized scripts. We present Engage, a system for configuring, installing, and managing complex application stacks. Engage consists of three components: a domain-specific model to describe component metadata and inter-component dependencies; a constraint-based algorithm that takes a partial installation specification and computes a full installation plan; and a runtime system that co-ordinates the deployment of the application across multiple machines and manages the deployed system. By explicitly modeling configuration metadata and inter-component dependencies, Engage enables static checking of application configurations and automated, constraint-driven, generation of installation plans across multiple machines. This reduces the tedious manual process of application configuration, installation, and management. We have implemented Engage and we have used it to successfully host a number of applications. We describe our experiences in using Engage to manage a generic platform that hosts Django applications in the cloud or on premises.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Perelman:2012:TDC, author = "Daniel Perelman and Sumit Gulwani and Thomas Ball and Dan Grossman", title = "Type-directed completion of partial expressions", journal = j-SIGPLAN, volume = "47", number = "6", pages = "275--286", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254098", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Modern programming frameworks provide enormous libraries arranged in complex structures, so much so that a large part of modern programming is searching for APIs that surely exist ``somewhere in an unfamiliar part of the framework.'' We present a novel way of phrasing a search for an unknown API: the programmer simply writes an expression leaving holes for the parts they do not know. We call these expressions partial expressions. We present an efficient algorithm that produces likely completions ordered by a ranking scheme based primarily on the similarity of the types of the APIs suggested to the types of the known expressions. This gives a powerful language for both API discovery and code completion with a small impedance mismatch from writing code. In an automated experiment on mature C\# projects, we show our algorithm can place the intended expression in the top 10 choices over 80\% of the time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{hunEom:2012:SSJ, author = "Yong hun Eom and Brian Demsky", title = "Self-stabilizing {Java}", journal = j-SIGPLAN, volume = "47", number = "6", pages = "287--298", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254099", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Self-stabilizing programs automatically recover from state corruption caused by software bugs and other sources to reach the correct state. A number of applications are inherently self-stabilizing---such programs typically overwrite all non-constant data with new input data. We present a type system and static analyses that together check whether a program is self-stabilizing. We combine this with a code generation strategy that ensures that a program continues executing long enough to self-stabilize. Our experience using SJava indicates that (1) SJava annotations are easy to write once one understands a program and (2) SJava successfully checked that several benchmarks were self-stabilizing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2012:TDA, author = "Yan Chen and Joshua Dunfield and Umut A. Acar", title = "Type-directed automatic incrementalization", journal = j-SIGPLAN, volume = "47", number = "6", pages = "299--310", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254100", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Application data often changes slowly or incrementally over time. Since incremental changes to input often result in only small changes in output, it is often feasible to respond to such changes asymptotically more efficiently than by re-running the whole computation. Traditionally, realizing such asymptotic efficiency improvements requires designing problem-specific algorithms known as dynamic or incremental algorithms, which are often significantly more complicated than conventional algorithms to design, analyze, implement, and use. A long-standing open problem is to develop techniques that automatically transform conventional programs so that they correctly and efficiently respond to incremental changes. In this paper, we describe a significant step towards solving the problem of automatic incrementalization: a programming language and a compiler that can, given a few type annotations describing what can change over time, compile a conventional program that assumes its data to be static (unchanging over time) to an incremental program. Based on recent advances in self-adjusting computation, including a theoretical proposal for translating purely functional programs to self-adjusting programs, we develop techniques for translating conventional Standard ML programs to self-adjusting programs. By extending the Standard ML language, we design a fully featured programming language with higher-order features, a module system, and a powerful type system, and implement a compiler for this language. The resulting programming language, LML, enables translating conventional programs decorated with simple type annotations into incremental programs that can respond to changes in their data correctly and efficiently. We evaluate the effectiveness of our approach by considering a range of benchmarks involving lists, vectors, and matrices, as well as a ray tracer. For these benchmarks, our compiler incrementalizes existing code with only trivial amounts of annotation. The resulting programs are often asymptotically more efficient, leading to orders of magnitude speedups in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sarkar:2012:SCC, author = "Susmit Sarkar and Kayvan Memarian and Scott Owens and Mark Batty and Peter Sewell and Luc Maranget and Jade Alglave and Derek Williams", title = "Synchronising {C\slash C++} and {POWER}", journal = j-SIGPLAN, volume = "47", number = "6", pages = "311--322", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254102", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Shared memory concurrency relies on synchronisation primitives: compare-and-swap, load-reserve/store-conditional (aka LL/SC), language-level mutexes, and so on. In a sequentially consistent setting, or even in the TSO setting of x86 and Sparc, these have well-understood semantics. But in the very relaxed settings of IBM\reg{}, POWER\reg{}, ARM, or C/C++, it remains surprisingly unclear exactly what the programmer can depend on. This paper studies relaxed-memory synchronisation. On the hardware side, we give a clear semantic characterisation of the load-reserve/store-conditional primitives as provided by POWER multiprocessors, for the first time since they were introduced 20 years ago; we cover their interaction with relaxed loads, stores, barriers, and dependencies. Our model, while not officially sanctioned by the vendor, is validated by extensive testing, comparing actual implementation behaviour against an oracle generated from the model, and by detailed discussion with IBM staff. We believe the ARM semantics to be similar. On the software side, we prove sound a proposed compilation scheme of the C/C++ synchronisation constructs to POWER, including C/C++ spinlock mutexes, fences, and read-modify-write operations, together with the simpler atomic operations for which soundness is already known from our previous work; this is a first step in verifying concurrent algorithms that use load-reserve/store-conditional with respect to a realistic semantics. We also build confidence in the C/C++ model in its own terms, fixing some omissions and contributing to the C standards committee adoption of the C++11 concurrency model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gazzillo:2012:SPA, author = "Paul Gazzillo and Robert Grimm", title = "{SuperC}: parsing all of {C} by taming the preprocessor", journal = j-SIGPLAN, volume = "47", number = "6", pages = "323--334", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254103", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "C tools, such as source browsers, bug finders, and automated refactorings, need to process two languages: C itself and the preprocessor. The latter improves expressivity through file includes, macros, and static conditionals. But it operates only on tokens, making it hard to even parse both languages. This paper presents a complete, performant solution to this problem. First, a configuration-preserving preprocessor resolves includes and macros yet leaves static conditionals intact, thus preserving a program's variability. To ensure completeness, we analyze all interactions between preprocessor features and identify techniques for correctly handling them. Second, a configuration-preserving parser generates a well-formed AST with static choice nodes for conditionals. It forks new subparsers when encountering static conditionals and merges them again after the conditionals. To ensure performance, we present a simple algorithm for table-driven Fork-Merge LR parsing and four novel optimizations. We demonstrate the effectiveness of our approach on the x86 Linux kernel.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Regehr:2012:TCR, author = "John Regehr and Yang Chen and Pascal Cuoq and Eric Eide and Chucky Ellison and Xuejun Yang", title = "Test-case reduction for {C} compiler bugs", journal = j-SIGPLAN, volume = "47", number = "6", pages = "335--346", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254104", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "To report a compiler bug, one must often find a small test case that triggers the bug. The existing approach to automated test-case reduction, delta debugging, works by removing substrings of the original input; the result is a concatenation of substrings that delta cannot remove. We have found this approach less than ideal for reducing C programs because it typically yields test cases that are too large or even invalid (relying on undefined behavior). To obtain small and valid test cases consistently, we designed and implemented three new, domain-specific test-case reducers. The best of these is based on a novel framework in which a generic fixpoint computation invokes modular transformations that perform reduction operations. This reducer produces outputs that are, on average, more than 25 times smaller than those produced by our other reducers or by the existing reducer that is most commonly used by compiler developers. We conclude that effective program reduction requires more than straightforward delta debugging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2012:CFE, author = "Jun Liu and Yuanrui Zhang and Ohyoung Jang and Wei Ding and Mahmut Kandemir", title = "A compiler framework for extracting superword level parallelism", journal = j-SIGPLAN, volume = "47", number = "6", pages = "347--358", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254106", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "SIMD (single-instruction multiple-data) instruction set extensions are quite common today in both high performance and embedded microprocessors, and enable the exploitation of a specific type of data parallelism called SLP (Superword Level Parallelism). While prior research shows that significant performance savings are possible when SLP is exploited, placing SIMD instructions in an application code manually can be very difficult and error prone. In this paper, we propose a novel automated compiler framework for improving superword level parallelism exploitation. The key part of our framework consists of two stages: superword statement generation and data layout optimization. The first stage is our main contribution and has two phases, statement grouping and statement scheduling, of which the primary goals are to increase SIMD parallelism and, more importantly, capture more superword reuses among the superword statements through global data access and reuse pattern analysis. Further, as a complementary optimization, our data layout optimization organizes data in memory space such that the price of memory operations for SLP is minimized. The results from our compiler implementation and tests on two systems indicate performance improvements as high as 15.2\% over a state-of-the-art SLP optimization algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Johnson:2012:SSP, author = "Nick P. Johnson and Hanjun Kim and Prakash Prabhu and Ayal Zaks and David I. August", title = "Speculative separation for privatization and reductions", journal = j-SIGPLAN, volume = "47", number = "6", pages = "359--370", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254107", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Automatic parallelization is a promising strategy to improve application performance in the multicore era. However, common programming practices such as the reuse of data structures introduce artificial constraints that obstruct automatic parallelization. Privatization relieves these constraints by replicating data structures, thus enabling scalable parallelization. Prior privatization schemes are limited to arrays and scalar variables because they are sensitive to the layout of dynamic data structures. This work presents Privateer, the first fully automatic privatization system to handle dynamic and recursive data structures, even in languages with unrestricted pointers. To reduce sensitivity to memory layout, Privateer speculatively separates memory objects. Privateer's lightweight runtime system validates speculative separation and speculative privatization to ensure correct parallel execution. Privateer enables automatic parallelization of general-purpose C/C++ applications, yielding a geomean whole-program speedup of 11.4x over best sequential execution on 24 cores, while non-speculative parallelization yields only 0.93x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Holewinski:2012:DTB, author = "Justin Holewinski and Ragavendar Ramamurthi and Mahesh Ravishankar and Naznin Fauzia and Louis-No{\"e}l Pouchet and Atanas Rountev and P. Sadayappan", title = "Dynamic trace-based analysis of vectorization potential of applications", journal = j-SIGPLAN, volume = "47", number = "6", pages = "371--382", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254108", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Recent hardware trends with GPUs and the increasing vector lengths of SSE-like ISA extensions for multicore CPUs imply that effective exploitation of SIMD parallelism is critical for achieving high performance on emerging and future architectures. A vast majority of existing applications were developed without any attention by their developers towards effective vectorizability of the codes. While developers of production compilers such as GNU gcc, Intel icc, PGI pgcc, and IBM xlc have invested considerable effort and made significant advances in enhancing automatic vectorization capabilities, these compilers still cannot effectively vectorize many existing scientific and engineering codes. It is therefore of considerable interest to analyze existing applications to assess the inherent latent potential for SIMD parallelism, exploitable through further compiler advances and/or via manual code changes. In this paper we develop an approach to infer a program's SIMD parallelization potential by analyzing the dynamic data-dependence graph derived from a sequential execution trace. By considering only the observed run-time data dependences for the trace, and by relaxing the execution order of operations to allow any dependence-preserving reordering, we can detect potential SIMD parallelism that may otherwise be missed by more conservative compile-time analyses. We show that for several benchmarks our tool discovers regions of code within computationally-intensive loops that exhibit high potential for SIMD parallelism but are not vectorized by state-of-the-art compilers. We present several case studies of the use of the tool, both in identifying opportunities to enhance the transformation capabilities of vectorizing compilers, as well as in pointing to code regions to manually modify in order to enable auto-vectorization and performance improvement by existing compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Leung:2012:VGK, author = "Alan Leung and Manish Gupta and Yuvraj Agarwal and Rajesh Gupta and Ranjit Jhala and Sorin Lerner", title = "Verifying {GPU} kernels by test amplification", journal = j-SIGPLAN, volume = "47", number = "6", pages = "383--394", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254110", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "We present a novel technique for verifying properties of data parallel GPU programs via test amplification. The key insight behind our work is that we can use the technique of static information flow to amplify the result of a single test execution over the set of all inputs and interleavings that affect the property being verified. We empirically demonstrate the effectiveness of test amplification for verifying race-freedom and determinism over a large number of standard GPU kernels, by showing that the result of verifying a single dynamic execution can be amplified over the massive space of possible data inputs and thread interleavings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morrisett:2012:RBF, author = "Greg Morrisett and Gang Tan and Joseph Tassarotti and Jean-Baptiste Tristan and Edward Gan", title = "{RockSalt}: better, faster, stronger {SFI} for the x86", journal = j-SIGPLAN, volume = "47", number = "6", pages = "395--404", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254111", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Software-based fault isolation (SFI), as used in Google's Native Client (NaCl), relies upon a conceptually simple machine-code analysis to enforce a security policy. But for complicated architectures such as the x86, it is all too easy to get the details of the analysis wrong. We have built a new checker that is smaller, faster, and has a much reduced trusted computing base when compared to Google's original analysis. The key to our approach is automatically generating the bulk of the analysis from a declarative description which we relate to a formal model of a subset of the x86 instruction set architecture. The x86 model, developed in Coq, is of independent interest and should be usable for a wide range of machine-level verification tasks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Grebenshchikov:2012:SSV, author = "Sergey Grebenshchikov and Nuno P. Lopes and Corneliu Popeea and Andrey Rybalchenko", title = "Synthesizing software verifiers from proof rules", journal = j-SIGPLAN, volume = "47", number = "6", pages = "405--416", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254112", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Automatically generated tools can significantly improve programmer productivity. For example, parsers and dataflow analyzers can be automatically generated from declarative specifications in the form of grammars, which tremendously simplifies the task of implementing a compiler. In this paper, we present a method for the automatic synthesis of software verification tools. Our synthesis procedure takes as input a description of the employed proof rule, e.g., program safety checking via inductive invariants, and produces a tool that automatically discovers the auxiliary assertions required by the proof rule, e.g., inductive loop invariants and procedure summaries. We rely on a (standard) representation of proof rules using recursive equations over the auxiliary assertions. The discovery of auxiliary assertions, i.e., solving the equations, is based on an iterative process that extrapolates solutions obtained for finitary unrollings of equations. We show how our method synthesizes automatic safety and liveness verifiers for programs with procedures, multi-threaded programs, and functional programs. Our experimental comparison of the resulting verifiers with existing state-of-the-art verification tools confirms the practicality of the approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hawkins:2012:CDR, author = "Peter Hawkins and Alex Aiken and Kathleen Fisher and Martin Rinard and Mooly Sagiv", title = "Concurrent data representation synthesis", journal = j-SIGPLAN, volume = "47", number = "6", pages = "417--428", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254114", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "We describe an approach for synthesizing data representations for concurrent programs. Our compiler takes as input a program written using concurrent relations and synthesizes a representation of the relations as sets of cooperating data structures as well as the placement and acquisition of locks to synchronize concurrent access to those data structures. The resulting code is correct by construction: individual relational operations are implemented correctly and the aggregate set of operations is serializable and deadlock free. The relational specification also permits a high-level optimizer to choose the best performing of many possible legal data representations and locking strategies, which we demonstrate with an experiment autotuning a graph benchmark.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2012:DSR, author = "Feng Liu and Nayden Nedev and Nedyalko Prisadnikov and Martin Vechev and Eran Yahav", title = "Dynamic synthesis for relaxed memory models", journal = j-SIGPLAN, volume = "47", number = "6", pages = "429--440", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254115", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Modern architectures implement relaxed memory models which may reorder memory operations or execute them non-atomically. Special instructions called memory fences are provided, allowing control of this behavior. To implement a concurrent algorithm for a modern architecture, the programmer is forced to manually reason about subtle relaxed behaviors and figure out ways to control these behaviors by adding fences to the program. Not only is this process time consuming and error-prone, but it has to be repeated every time the implementation is ported to a different architecture. In this paper, we present the first scalable framework for handling real-world concurrent algorithms running on relaxed architectures. Given a concurrent C program, a safety specification, and a description of the memory model, our framework tests the program on the memory model to expose violations of the specification, and synthesizes a set of necessary ordering constraints that prevent these violations. The ordering constraints are then realized as additional fences in the program. We implemented our approach in a tool called DFence based on LLVM and used it to infer fences in a number of concurrent algorithms. Using DFence, we perform the first in-depth study of the interaction between fences in real-world concurrent C programs, correctness criteria such as sequential consistency and linearizability, and memory models such as TSO and PSO, yielding many interesting observations. We believe that this is the first tool that can handle programs at the scale and complexity of a lock-free memory allocator.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Godefroid:2012:ASS, author = "Patrice Godefroid and Ankur Taly", title = "Automated synthesis of symbolic instruction encodings from {I/O} samples", journal = j-SIGPLAN, volume = "47", number = "6", pages = "441--452", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254116", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Symbolic execution is a key component of precise binary program analysis tools. We discuss how to automatically boot-strap the construction of a symbolic execution engine for a processor instruction set such as x86, x64 or ARM. We show how to automatically synthesize symbolic representations of individual processor instructions from input/output examples and express them as bit-vector constraints. We present and compare various synthesis algorithms and instruction sampling strategies. We introduce a new synthesis algorithm based on smart sampling which we show is one to two orders of magnitude faster than previous synthesis algorithms in our context. With this new algorithm, we can automatically synthesize bit-vector circuits for over 500 x86 instructions (8/16/32-bits, outputs, EFLAGS) using only 6 synthesis templates and in less than two hours using the Z3 SMT solver on a regular machine. During this work, we also discovered several inconsistencies across x86 processors, errors in the x86 Intel spec, and several bugs in previous manually-written x86 instruction handlers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Benz:2012:DPA, author = "Florian Benz and Andreas Hildebrandt and Sebastian Hack", title = "A dynamic program analysis to find floating-point accuracy problems", journal = j-SIGPLAN, volume = "47", number = "6", pages = "453--462", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254118", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Programs using floating-point arithmetic are prone to accuracy problems caused by rounding and catastrophic cancellation. These phenomena provoke bugs that are notoriously hard to track down: the program does not necessarily crash and the results are not necessarily obviously wrong, but often subtly inaccurate. Further use of these values can lead to catastrophic errors. In this paper, we present a dynamic program analysis that supports the programmer in finding accuracy problems. Our analysis uses binary translation to perform every floating-point computation side by side in higher precision. Furthermore, we use a lightweight slicing approach to track the evolution of errors. We evaluate our analysis by demonstrating that it catches well-known floating-point accuracy problems and by analyzing the Spec CFP2006 floating-point benchmark. In the latter, we show how our tool tracks down a catastrophic cancellation that causes a complete loss of accuracy leading to a meaningless program result. Finally, we apply our program to a complex, real-world bioinformatics application in which our program detected a serious cancellation. Correcting the instability led not only to improved quality of the result, but also to an improvement of the program's run time.In this paper, we present a dynamic program analysis that supports the programmer in finding accuracy problems. Our analysis uses binary translation to perform every floating-point computation side by side in higher precision. Furthermore, we use a lightweight slicing approach to track the evolution of errors. We evaluate our analysis by demonstrating that it catches well-known floating-point accuracy problems and by analyzing the SpecfiCFP2006 floating-point benchmark. In the latter, we show how our tool tracks down a catastrophic cancellation that causes a complete loss of accuracy leading to a meaningless program result. Finally, we apply our program to a complex, real-world bioinformatics application in which our program detected a serious cancellation. Correcting the instability led not only to improved quality of the result, but also to an improvement of the program's run time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lee:2012:CHP, author = "Dongyoon Lee and Peter M. Chen and Jason Flinn and Satish Narayanasamy", title = "{Chimera}: hybrid program analysis for determinism", journal = j-SIGPLAN, volume = "47", number = "6", pages = "463--474", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254119", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Chimera uses a new hybrid program analysis to provide deterministic replay for commodity multiprocessor systems. Chimera leverages the insight that it is easy to provide deterministic multiprocessor replay for data-race-free programs (one can just record non-deterministic inputs and the order of synchronization operations), so if we can somehow transform an arbitrary program to be data-race-free, then we can provide deterministic replay cheaply for that program. To perform this transformation, Chimera uses a sound static data-race detector to find all potential data-races. It then instruments pairs of potentially racing instructions with a weak-lock, which provides sufficient guarantees to allow deterministic replay but does not guarantee mutual exclusion. Unsurprisingly, a large fraction of data-races found by the static tool are false data-races, and instrumenting them each of them with a weak-lock results in prohibitively high overhead. Chimera drastically reduces this cost from 53x to 1.39x by increasing the granularity of weak-locks without significantly compromising on parallelism. This is achieved by employing a combination of profiling and symbolic analysis techniques that target the sources of imprecision in the static data-race detector. We find that performance overhead for deterministic recording is 2.4\% on average for Apache and desktop applications and about 86\% for scientific applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{deKruijf:2012:SAC, author = "Marc A. de Kruijf and Karthikeyan Sankaralingam and Somesh Jha", title = "Static analysis and compiler design for idempotent processing", journal = j-SIGPLAN, volume = "47", number = "6", pages = "475--486", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254120", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Recovery functionality has many applications in computing systems, from speculation recovery in modern microprocessors to fault recovery in high-reliability systems. Modern systems commonly recover using checkpoints. However, checkpoints introduce overheads, add complexity, and often save more state than necessary. This paper develops a novel compiler technique to recover program state without the overheads of explicit checkpoints. The technique breaks programs into idempotent regions ---regions that can be freely re-executed---which allows recovery without checkpointed state. Leveraging the property of idempotence, recovery can be obtained by simple re-execution. We develop static analysis techniques to construct these regions and demonstrate low overheads and large region sizes for an LLVM-based implementation. Across a set of diverse benchmark suites, we construct idempotent regions close in size to those that could be obtained with perfect runtime information. Although the resulting code runs more slowly, typical performance overheads are in the range of just 2-12\%. The paradigm of executing entire programs as a series of idempotent regions we call idempotent processing, and it has many applications in computer systems. As a concrete example, we demonstrate it applied to the problem of compiler-automated hardware fault recovery. In comparison to two other state-of-the-art techniques, redundant execution and checkpoint-logging, our idempotent processing technique outperforms both by over 15\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Feng:2012:EPL, author = "Min Feng and Rajiv Gupta and Iulian Neamtiu", title = "Effective parallelization of loops in the presence of {I/O} operations", journal = j-SIGPLAN, volume = "47", number = "6", pages = "487--498", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254122", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Software-based thread-level parallelization has been widely studied for exploiting data parallelism in purely computational loops to improve program performance on multiprocessors. However, none of the previous efforts deal with efficient parallelization of hybrid loops, i.e., loops that contain a mix of computation and I/O operations. In this paper, we propose a set of techniques for efficiently parallelizing hybrid loops. Our techniques apply DOALL parallelism to hybrid loops by breaking the cross-iteration dependences caused by I/O operations. We also support speculative execution of I/O operations to enable speculative parallelization of hybrid loops. Helper threading is used to reduce the I/O bus contention caused by the improved parallelism. We provide an easy-to-use programming model for exploiting parallelism in loops with I/O operations. Parallelizing hybrid loops using our model requires few modifications to the code. We have developed a prototype implementation of our programming model. We have evaluated our implementation on a 24-core machine using eight applications, including a widely-used genomic sequence assembler and a multi-player game server, and others from PARSEC and SPEC CPU2000 benchmark suites. The hybrid loops in these applications take 23\%-99\% of the total execution time on our 24-core machine. The parallelized applications achieve speedups of 3.0x-12.8x with hybrid loop parallelization over the sequential versions of the same applications. Compared to the versions of applications where only computation loops are parallelized, hybrid loop parallelization improves the application performance by 68\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2012:PSR, author = "Chun Chen", title = "Polyhedra scanning revisited", journal = j-SIGPLAN, volume = "47", number = "6", pages = "499--508", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254123", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "This paper presents a new polyhedra scanning system called CodeGen+ to address the challenge of generating high-performance code for complex iteration spaces resulting from compiler optimization and autotuning systems. The strength of our approach lies in two new algorithms. First, a loop overhead removal algorithm provides precise control of trade-offs between loop overhead and code size based on actual loop nesting depth. Second, an if-statement simplification algorithm further reduces the number of comparisons in the code. These algorithms combined with the expressive power of Presburger arithmetic enable CodeGen+ to support complex optimization strategies expressed in iteration spaces. We compare with the state-of-the-art polyhedra scanning tool CLooG on five loop nest computations, demonstrating that CodeGen+ generates code that is simpler and up to 1.15x faster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Oancea:2012:LIT, author = "Cosmin E. Oancea and Lawrence Rauchwerger", title = "Logical inference techniques for loop parallelization", journal = j-SIGPLAN, volume = "47", number = "6", pages = "509--520", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254124", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "This paper presents a fully automatic approach to loop parallelization that integrates the use of static and run-time analysis and thus overcomes many known difficulties such as nonlinear and indirect array indexing and complex control flow. Our hybrid analysis framework validates the parallelization transformation by verifying the independence of the loop's memory references. To this end it represents array references using the USR (uniform set representation) language and expresses the independence condition as an equation, S =0, where S is a set expression representing array indexes. Using a language instead of an array-abstraction representation for S results in a smaller number of conservative approximations but exhibits a potentially-high runtime cost. To alleviate this cost we introduce a language translation F from the USR set-expression language to an equally rich language of predicates ($F(S) \implies S = 0$). Loop parallelization is then validated using a novel logic inference algorithm that factorizes the obtained complex predicates (F( S )) into a sequence of sufficient independence conditions that are evaluated first statically and, when needed, dynamically, in increasing order of their estimated complexities. We evaluate our automated solution on 26 benchmarks from PERFECT-Club and SPEC suites and show that our approach is effective in parallelizing large, complex loops and obtains much better full program speedups than the Intel and IBM Fortran compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pradel:2012:FAP, author = "Michael Pradel and Thomas R. Gross", title = "Fully automatic and precise detection of thread safety violations", journal = j-SIGPLAN, volume = "47", number = "6", pages = "521--530", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254126", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Concurrent, object-oriented programs often use thread-safe library classes. Existing techniques for testing a thread-safe class either rely on tests using the class, on formal specifications, or on both. Unfortunately, these techniques often are not fully automatic as they involve the user in analyzing the output. This paper presents an automatic testing technique that reveals concurrency bugs in supposedly thread-safe classes. The analysis requires as input only the class under test and reports only true positives. The key idea is to generate tests in which multiple threads call methods on a shared instance of the tested class. If a concurrent test exhibits an exception or a deadlock that cannot be triggered in any linearized execution of the test, the analysis reports a thread safety violation. The approach is easily applicable, because it is independent of hand-written tests and explicit specifications. The analysis finds 15 concurrency bugs in popular Java libraries, including two previously unknown bugs in the Java standard library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Raman:2012:SPD, author = "Raghavan Raman and Jisheng Zhao and Vivek Sarkar and Martin Vechev and Eran Yahav", title = "Scalable and precise dynamic datarace detection for structured parallelism", journal = j-SIGPLAN, volume = "47", number = "6", pages = "531--542", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254127", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Existing dynamic race detectors suffer from at least one of the following three limitations: (i) space overhead per memory location grows linearly with the number of parallel threads [13], severely limiting the parallelism that the algorithm can handle; (ii) sequentialization: the parallel program must be processed in a sequential order, usually depth-first [12, 24]. This prevents the analysis from scaling with available hardware parallelism, inherently limiting its performance; (iii) inefficiency: even though race detectors with good theoretical complexity exist, they do not admit efficient implementations and are unsuitable for practical use [4, 18]. We present a new precise dynamic race detector that leverages structured parallelism in order to address these limitations. Our algorithm requires constant space per memory location, works in parallel, and is efficient in practice. We implemented and evaluated our algorithm on a set of 15 benchmarks. Our experimental results indicate an average (geometric mean) slowdown of 2.78x on a 16-core SMP system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nagarakatte:2012:MAP, author = "Santosh Nagarakatte and Sebastian Burckhardt and Milo M. K. Martin and Madanlal Musuvathi", title = "Multicore acceleration of priority-based schedulers for concurrency bug detection", journal = j-SIGPLAN, volume = "47", number = "6", pages = "543--554", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254128", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Testing multithreaded programs is difficult as threads can interleave in a nondeterministic fashion. Untested interleavings can cause failures, but testing all interleavings is infeasible. Many interleaving exploration strategies for bug detection have been proposed, but their relative effectiveness and performance remains unclear as they often lack publicly available implementations and have not been evaluated using common benchmarks. We describe NeedlePoint, an open-source framework that allows selection and comparison of a wide range of interleaving exploration policies for bug detection proposed by prior work. Our experience with NeedlePoint indicates that priority-based probabilistic concurrency testing (the PCT algorithm) finds bugs quickly, but it runs only one thread at a time, which destroys parallelism by serializing executions. To address this problem we propose a parallel version of the PCT algorithm (PPCT). We show that the new algorithm outperforms the original by a factor of 5x when testing parallel programs on an eight-core machine. We formally prove that parallel PCT provides the same probabilistic coverage guarantees as PCT. Moreover, PPCT is the first algorithm that runs multiple threads while providing coverage guarantees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nieh:2012:CBR, author = "Jason Nieh", title = "Challenges in building a real, large private cloud", journal = j-SIGPLAN, volume = "47", number = "7", pages = "1--2", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151026", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Virtualization and internal cloud are often touted as the solution to many challenging problems, from resource underutilization to data-center optimization and carbon emission reduction. However, the hidden costs of cloud-scale virtualization, largely stemming from the complex and difficult system administration challenges it poses, are often overlooked. Reaping the fruits of internal Infrastructure as a Service cloud requires the enterprise to navigate scalability limitations, revamp traditional operational practices, manage performance, and achieve unprecedented cross-silo collaboration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kundu:2012:MVA, author = "Sajib Kundu and Raju Rangaswami and Ajay Gulati and Ming Zhao and Kaushik Dutta", title = "Modeling virtualized applications using machine learning techniques", journal = j-SIGPLAN, volume = "47", number = "7", pages = "3--14", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151028", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "With the growing adoption of virtualized datacenters and cloud hosting services, the allocation and sizing of resources such as CPU, memory, and I/O bandwidth for virtual machines (VMs) is becoming increasingly important. Accurate performance modeling of an application would help users in better VM sizing, thus reducing costs. It can also benefit cloud service providers who can offer a new charging model based on the VMs' performance instead of their configured sizes. In this paper, we present techniques to model the performance of a VM-hosted application as a function of the resources allocated to the VM and the resource contention it experiences. To address this multi-dimensional modeling problem, we propose and refine the use of two machine learning techniques: artificial neural network (ANN) and support vector machine (SVM). We evaluate these modeling techniques using five virtualized applications from the RUBiS and Filebench suite of benchmarks and demonstrate that their median and 90th percentile prediction errors are within 4.36\% and 29.17\% respectively. These results are substantially better than regression based approaches as well as direct applications of machine learning techniques without our refinements. We also present a simple and effective approach to VM sizing and empirically demonstrate that it can deliver optimal results for 65\% of the sizing problems that we studied and produces close-to-optimal sizes for the remaining 35\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lv:2012:VCV, author = "Hui Lv and Yaozu Dong and Jiangang Duan and Kevin Tian", title = "Virtualization challenges: a view from server consolidation perspective", journal = j-SIGPLAN, volume = "47", number = "7", pages = "15--26", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151030", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Server consolidation, by running multiple virtual machines on top of a single platform with virtualization, provides an efficient solution to parallelism and utilization of modern multi-core processors system. However, the performance and scalability of server consolidation solution on modern massive advanced server is not well addressed. In this paper, we conduct a comprehensive study of Xen performance and scalability characterization running SPECvirt\_sc2010, and identify that large memory and cache footprint, due to the unnecessary high frequent context switch, introduce additional challenges to the system performance and scalability. We propose two optimizations (dynamically-allocable tasklets and context-switch rate controller) to improve the performance. The results show the improved memory and cache efficiency with a reduction of the overall CPI, resulting in an improvement of server consolidation capability by 15\% in SPECvirt\_sc2010. In the meantime, our optimization achieves an up to 50\% acceleration of service response, which greatly improves the QoS of Xen virtualization solution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wang:2012:RCV, author = "Wei Wang and Tanima Dey and Ryan W. Moore and Mahmut Aktasoglu and Bruce R. Childers and Jack W. Davidson and Mary Jane Irwin and Mahmut Kandemir and Mary Lou Soffa", title = "{REEact}: a customizable virtual execution manager for multicore platforms", journal = j-SIGPLAN, volume = "47", number = "7", pages = "27--38", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151031", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "With the shift to many-core chip multiprocessors (CMPs), a critical issue is how to effectively coordinate and manage the execution of applications and hardware resources to overcome performance, power consumption, and reliability challenges stemming from hardware and application variations inherent in this new computing environment. Effective resource and application management on CMPs requires consideration of user/application/hardware-specific requirements and dynamic adaption of management decisions based on the actual run-time environment. However, designing an algorithm to manage resources and applications that can dynamically adapt based on the run-time environment is difficult because most resource and application management and monitoring facilities are only available at the operating system level. This paper presents REEact, an infrastructure that provides the capability to specify user-level management policies with dynamic adaptation. REEact is a virtual execution environment that provides a framework and core services to quickly enable the design of custom management policies for dynamically managing resources and applications. To demonstrate the capabilities and usefulness of REEact, this paper describes three case studies--each illustrating the use of REEact to apply a specific dynamic management policy on a real CMP. Through these case studies, we demonstrate that REEact can effectively and efficiently implement policies to dynamically manage resources and adapt application execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ma:2012:DTD, author = "Zhiqiang Ma and Zhonghua Sheng and Lin Gu and Liufei Wen and Gong Zhang", title = "{DVM}: towards a datacenter-scale virtual machine", journal = j-SIGPLAN, volume = "47", number = "7", pages = "39--50", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151032", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "As cloud-based computation becomes increasingly important, providing a general computational interface to support datacenter-scale programming has become an imperative research agenda. Many cloud systems use existing virtual machine monitor (VMM) technologies, such as Xen, VMware, and Windows Hypervisor, to multiplex a physical host into multiple virtual hosts and isolate computation on the shared cluster platform. However, traditional multiplexing VMMs do not scale beyond one single physical host, and it alone cannot provide the programming interface and cluster-wide computation that a datacenter system requires. We design a new instruction set architecture, DISA, to unify myriads of compute nodes to form a big virtual machine called DVM, and present programmers the view of a single computer where thousands of tasks run concurrently in a large, unified, and snapshotted memory space. The DVM provides a simple yet scalable programming model and mitigates the scalability bottleneck of traditional distributed shared memory systems. Along with an efficient execution engine, the capacity of a DVM can scale up to support large clusters. We have implemented and tested DVM on three platforms, and our evaluation shows that DVM has excellent performance in terms of execution time and speedup. On one physical host, the system overhead of DVM is comparable to that of traditional VMMs. On 16 physical hosts, the DVM runs 10 times faster than MapReduce/Hadoop and X10. On 256 EC2 instances, DVM shows linear speedup on a parallelizable workload.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yu:2012:SCO, author = "Tingting Yu and Witawas Srisa-an and Gregg Rothermel", title = "{SimTester}: a controllable and observable testing framework for embedded systems", journal = j-SIGPLAN, volume = "47", number = "7", pages = "51--62", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151034", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "In software for embedded systems, the frequent use of interrupts for timing, sensing, and I/O processing can cause concurrency faults to occur due to interactions between applications, device drivers, and interrupt handlers. This type of fault is considered by many practitioners to be among the most difficult to detect, isolate, and correct, in part because it can be sensitive to execution interleavings and often occurs without leaving any observable incorrect output. As such, commonly used testing techniques that inspect program outputs to detect failures are often ineffective at detecting them. To test for these concurrency faults, test engineers need to be able to control interleavings so that they are deterministic. Furthermore, they also need to be able to observe faults as they occur instead of relying on observable incorrect outputs. In this paper, we introduce SimTester, a framework that allows engineers to effectively test for subtle and non-deterministic concurrency faults by providing them with greater controllability and observability. We implemented our framework on a commercial virtual platform that is widely used to support hardware/software co-designs to promote ease of adoption. We then evaluated its effectiveness by using it to test for data races and deadlocks. The result shows that our framework can be effective and efficient at detecting these faults.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2012:SRB, author = "Yuan Zhang and Min Yang and Bo Zhou and Zhemin Yang and Weihua Zhang and Binyu Zang", title = "{Swift}: a register-based {JIT} compiler for embedded {JVMs}", journal = j-SIGPLAN, volume = "47", number = "7", pages = "63--74", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151035", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Code quality and compilation speed are two challenges to JIT compilers, while selective compilation is commonly used to trade-off these two issues. Meanwhile, with more and more Java applications running in mobile devices, selective compilation meets many problems. Since these applications always have flat execution profile and short live time, a lightweight JIT technique without losing code quality is extremely needed. However, the overhead of compiling stack-based Java bytecode to heterogeneous register-based machine code is significant in embedded devices. This paper presents a fast and effective JIT technique for mobile devices, building on a register-based Java bytecode format which is more similar to the underlying machine architecture. Through a comprehensive study on the characteristics of Java applications, we observe that virtual registers used by more than 90\% Java methods can be directly fulfilled by 11 physical registers. Based on this observation, this paper proposes Swift, a novel JIT compiler on register-based bytecode, which generates native code for RISC machines. After mapping virtual registers to physical registers, the code is generated efficiently by looking up a translation table. And the code quality is guaranteed by the static compiler which is used to generate register-based bytecode. Besides, we design two lightweight optimizations and an efficient code unloader to make Swift more suitable for embedded environment. As the prevalence of Android, a prototype of Swift is implemented upon DEX bytecode which is the official distribution format of Android applications. Swift is evaluated with three benchmarks (SPECjvm98, EmbeddedCaffeineMark3 and JemBench2) on two different ARM SOCs: S3C6410 (armv6) and OMAP3530 (armv7). The results show that Swift achieves a speedup of 3.13 over the best-performing interpreter on the selected benchmarks. Compared with the state-of-the-art JIT compiler in Android, JITC-Droid, Swift achieves a speedup of 1.42.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shan:2012:FIA, author = "Zhiyong Shan and Xin Wang and Tzi-cker Chiueh and Xiaofeng Meng", title = "Facilitating inter-application interactions for {OS}-level virtualization", journal = j-SIGPLAN, volume = "47", number = "7", pages = "75--86", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151036", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "OS-level virtualization generates a minimal start-up and run-time overhead on the host OS and thus suits applications that require both good isolation and high efficiency. However, multiple-member applications required for forming a system may need to occasionally communicate across this isolation barrier to cooperate with each other while they are separated in different VMs to isolate intrusion or fault. Such application scenarios are often critical to enterprise-class servers, HPC clusters and intrusion/fault-tolerant systems, etc. We make the first effort to support the inter-application interactions in an OS-level virtualization system without causing a significant compromise on VM isolation. We identify all interactive operations that impact inter-application interactions, including inter-process communications, application invocations, resource name transfers and application dependencies. We propose Shuttle, a novel approach for facilitating inter-application interactions within and across OS-level virtual machines. Our results demonstrate that Shuttle can correctly address all necessary inter-application interactions while providing good isolation capability to all sample applications on different versions of Windows OS.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gerofi:2012:ETT, author = "Balazs Gerofi and Yutaka Ishikawa", title = "Enhancing {TCP} throughput of highly available virtual machines via speculative communication", journal = j-SIGPLAN, volume = "47", number = "7", pages = "87--96", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151038", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Checkpoint-recovery based virtual machine (VM) replication is an attractive technique for accommodating VM installations with high-availability. It provides seamless failover for the entire software stack executed in the VM regardless the application or the underlying operating system (OS), it runs on commodity hardware, and it is inherently capable of dealing with shared memory non-determinism of symmetric multiprocessing (SMP) configurations. There have been several studies aiming at alleviating the overhead of replication, however, due to consistency requirements, network performance of the basic replication mechanism remains extremely poor., In this paper we revisit the replication protocol and extend it with speculative communication. Speculative communication silently acknowledges TCP packets of the VM, enabling the guest's TCP stack to progress with transmission without exposing the messages to the clients before the corresponding execution state is checkpointed to the backup host. Furthermore, we propose replication aware congestion control, an extension to the guest's TCP stack that aggressively fills up the VMM's replication buffer so that speculative packets can be backed up and released earlier to the clients. We observe up to an order of magnitude improvement in bulk data transfer with speculative communication, and close to native VM network performance when replication awareness is enabled in the guest OS. We provide results of micro-, as well as application-level benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Rajagopalan:2012:SDT, author = "Shriram Rajagopalan and Brendan Cully and Ryan O'Connor and Andrew Warfield", title = "{SecondSite}: disaster tolerance as a service", journal = j-SIGPLAN, volume = "47", number = "7", pages = "97--108", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151039", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "This paper describes the design and implementation of SecondSite, a cloud-based service for disaster tolerance. SecondSite extends the Remus virtualization-based high availability system by allowing groups of virtual machines to be replicated across data centers over wide-area Internet links. The goal of the system is to commodify the property of availability, exposing it as a simple tick box when configuring a new virtual machine. To achieve this in the wide area, we have had to tackle the related issues of replication traffic bandwidth, reliable failure detection across geographic regions and traffic redirection over a wide-area network without compromising on transparency and consistency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pan:2012:CLM, author = "Zhenhao Pan and Yaozu Dong and Yu Chen and Lei Zhang and Zhijiao Zhang", title = "{CompSC}: live migration with pass-through devices", journal = j-SIGPLAN, volume = "47", number = "7", pages = "109--120", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151040", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Live migration is one of the most important features of virtualization technology. With regard to recent virtualization techniques, performance of network I/O is critical. Current network I/O virtualization (e.g. Para-virtualized I/O, VMDq) has a significant performance gap with native network I/O. Pass-through network devices have near native performance, however, they have thus far prevented live migration. No existing methods solve the problem of live migration with pass-through devices perfectly. In this paper, we propose CompSC: a solution of hardware state migration that will enable the live migration support of pass-through devices. We go on to apply CompSC to SR-IOV network interface controllers. We discuss the attributes of different hardware states in pass-through devices and migrate them with corresponding techniques. Our experiments show that CompSC enables live migration on an Intel 82599 VF with a throughput 282.66\% higher than para-virtualized devices. In addition, service downtime during live migration is 42.9\% less than para-virtualized devices.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kemerlis:2012:LPD, author = "Vasileios P. Kemerlis and Georgios Portokalidis and Kangkook Jee and Angelos D. Keromytis", title = "{{\tt libdft}}: practical dynamic data flow tracking for commodity systems", journal = j-SIGPLAN, volume = "47", number = "7", pages = "121--132", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151042", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Dynamic data flow tracking (DFT) deals with tagging and tracking data of interest as they propagate during program execution. DFT has been repeatedly implemented by a variety of tools for numerous purposes, including protection from zero-day and cross-site scripting attacks, detection and prevention of information leaks, and for the analysis of legitimate and malicious software. We present {\tt libdft}, a dynamic DFT framework that unlike previous work is at once fast, reusable, and works with commodity software and hardware. {\tt libdft} provides an API for building DFT-enabled tools that work on unmodified binaries, running on common operating systems and hardware, thus facilitating research and rapid prototyping. We explore different approaches for implementing the low-level aspects of instruction-level data tracking, introduce a more efficient and 64-bit capable shadow memory, and identify (and avoid) the common pitfalls responsible for the excessive performance overhead of previous studies. We evaluate {\tt libdft} using real applications with large codebases like the Apache and MySQL servers, and the Firefox web browser. We also use a series of benchmarks and utilities to compare {\tt libdft} with similar systems. Our results indicate that it performs at least as fast, if not faster, than previous solutions, and to the best of our knowledge, we are the first to evaluate the performance overhead of a fast dynamic DFT implementation in such depth. Finally, {\tt libdft} is freely available as open source software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bruening:2012:TDI, author = "Derek Bruening and Qin Zhao and Saman Amarasinghe", title = "Transparent dynamic instrumentation", journal = j-SIGPLAN, volume = "47", number = "7", pages = "133--144", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151043", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Process virtualization provides a virtual execution environment within which an unmodified application can be monitored and controlled while it executes. The provided layer of control can be used for purposes ranging from sandboxing to compatibility to profiling. The additional operations required for this layer are performed clandestinely alongside regular program execution. Software dynamic instrumentation is one method for implementing process virtualization which dynamically instruments an application such that the application's code and the inserted code are interleaved together. DynamoRIO is a process virtualization system implemented using software code cache techniques that allows users to build customized dynamic instrumentation tools. There are many challenges to building such a runtime system. One major obstacle is transparency. In order to support executing arbitrary applications, DynamoRIO must be fully transparent so that an application cannot distinguish between running inside the virtual environment and native execution. In addition, any desired extra operations for a particular tool must avoid interfering with the behavior of the application. Transparency has historically been provided on an ad-hoc basis, as a reaction to observed problems in target applications. This paper identifies a necessary set of transparency requirements for running mainstream Windows and Linux applications. We discuss possible solutions to each transparency issue, evaluate tradeoffs between different choices, and identify cases where maintaining transparency is not practically solvable. We believe this will provide a guideline for better design and implementation of transparent dynamic instrumentation, as well as other similar process virtualization systems using software code caches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lefebvre:2012:EM, author = "Geoffrey Lefebvre and Brendan Cully and Christopher Head and Mark Spear and Norm Hutchinson and Mike Feeley and Andrew Warfield", title = "Execution mining", journal = j-SIGPLAN, volume = "47", number = "7", pages = "145--158", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151044", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Operating systems represent large pieces of complex software that are carefully tested and broadly deployed. Despite this, developers frequently have little more than their source code to understand how they behave. This static representation of a system results in limited insight into execution dynamics, such as what code is important, how data flows through a system, or how threads interact with one another. We describe Tralfamadore, a system that preserves complete traces of machine execution as an artifact that can be queried and analyzed with a library of simple, reusable operators, making it easy to develop and run new dynamic analyses. We demonstrate the benefits of this approach with several example applications, including a novel unified source and execution browser.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pavlou:2012:DBD, author = "Demos Pavlou and Enric Gibert and Fernando Latorre and Antonio Gonzalez", title = "{DDGacc}: boosting dynamic {DDG}-based binary optimizations through specialized hardware support", journal = j-SIGPLAN, volume = "47", number = "7", pages = "159--168", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151046", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Dynamic Binary Translators (DBT) and Dynamic Binary Optimization (DBO) by software are used widely for several reasons including performance, design simplification and virtualization. However, the software layer in such systems introduces non-negligible overheads which affect performance and user experience. Hence, reducing DBT/DBO overheads is of paramount importance. In addition, reduced overheads have interesting collateral effects in the rest of the software layer, such as allowing optimizations to be applied earlier. A cost-effective solution to this problem is to provide hardware support to speed up the primitives of the software layer, paying special attention to automate DBT/DBO mechanisms and leave the heuristics to the software, which is more flexible. In this work, we have characterized the overheads of a DBO system using DynamoRIO implementing several basic optimizations. We have seen that the computation of the Data Dependence Graph (DDG) accounts for 5\%-10\% of the execution time. For this reason, we propose to add hardware support for this task in the form of a new functional unit, called DDGacc, which is integrated in a conventional pipeline processor and is operated through new ISA instructions. Our evaluation shows that DDGacc reduces the cost of computing the DDG by 32x, which reduces overall execution time by 5\%-10\% on average and up to 18\% for applications where the DBO optimizes large code footprints.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ishizaki:2012:ADT, author = "Kazuaki Ishizaki and Takeshi Ogasawara and Jose Castanos and Priya Nagpurkar and David Edelsohn and Toshio Nakatani", title = "Adding dynamically-typed language support to a statically-typed language compiler: performance evaluation, analysis, and tradeoffs", journal = j-SIGPLAN, volume = "47", number = "7", pages = "169--180", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151047", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Applications written in dynamically typed scripting languages are increasingly popular for Web software development. Even on the server side, programmers are using dynamically typed scripting languages such as Ruby and Python to build complex applications quickly. As the number and complexity of dynamically typed scripting language applications grows, optimizing their performance is becoming important. Some of the best performing compilers and optimizers for dynamically typed scripting languages are developed entirely from scratch and target a specific language. This approach is not scalable, given the variety of dynamically typed scripting languages, and the effort involved in developing and maintaining separate infrastructures for each. In this paper, we evaluate the feasibility of adapting and extending an existing production-quality method-based Just-In-Time (JIT) compiler for a language with dynamic types. Our goal is to identify the challenges and shortcomings with the current infrastructure, and to propose and evaluate runtime techniques and optimizations that can be incorporated into a common optimization infrastructure for static and dynamic languages. We discuss three extensions to the compiler to support dynamically typed languages: (1) simplification of control flow graphs, (2) mapping of memory locations to stack-allocated variables, and (3) reduction of runtime overhead using language semantics. We also propose four new optimizations for Python in (2) and (3). These extensions are effective in reduction of compiler working memory and improvement of runtime performance. We present a detailed performance evaluation of our approach for Python, finding an overall improvement of 1.69x on average (up to 2.74x) over our JIT compiler without any optimization for dynamically typed languages and Python.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lin:2012:UKT, author = "Yi Lin and Stephen M. Blackburn and Daniel Frampton", title = "Unpicking the knot: teasing apart {VM}\slash application interdependencies", journal = j-SIGPLAN, volume = "47", number = "7", pages = "181--190", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151048", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Flexible and efficient runtime design requires an understanding of the dependencies among the components internal to the runtime and those between the application and the runtime. These dependencies are frequently unclear. This problem exists in all runtime design, and is most vivid in a metacircular runtime --- one that is implemented in terms of itself. Metacircularity blurs boundaries between application and runtime implementation, making it harder to understand and make guarantees about overall system behavior, affecting isolation, security, and resource management, as well as reducing opportunities for optimization. Our goal is to shed new light on VM interdependencies, helping all VM designers understand these dependencies and thereby engineer better runtimes. We explore these issues in the context of a high-performance Java-in-Java virtual machine. Our approach is to identify and instrument transition points into and within the runtime, which allows us to establish a dynamic execution context. Our contributions are: (1) implementing and measuring a system that dynamically maintains execution context with very low overhead, (2) demonstrating that such a framework can be used to improve the software engineering of an existing runtime, and (3) analyzing the behavior and runtime characteristics of our runtime across a wide range of benchmarks. Our solution provides clarity about execution state and allowable transitions, making it easier to develop, debug, and understand managed runtimes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tuch:2012:BSV, author = "Harvey Tuch and Cyprien Laplace and Kenneth C. Barr and Bi Wu", title = "Block storage virtualization with commodity secure digital cards", journal = j-SIGPLAN, volume = "47", number = "7", pages = "191--202", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151050", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Smartphones, tablets and other mobile platforms typically accommodate bulk data storage with low-cost, FAT-formatted Secure Digital cards. When one uses a mobile device to run a full-system virtual machine (VM), there can be a mismatch between (1) the VM's I/O mixture, security and reliability requirements and (2) the properties of the storage media available for VM block storage and checkpoint images. To resolve this mismatch, this paper presents a new VM disk image format called the Logging Block Store (LBS). After motivating the need for a new format, LBS is described in detail with experimental results demonstrating its efficacy. As a result of this work, recommendations are made for future optimizations throughout the stack that may simplify and improve the performance of storage virtualization systems on mobile platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ghosh:2012:RAA, author = "Sudeep Ghosh and Jason Hiser and Jack W. Davidson", title = "Replacement attacks against {VM}-protected applications", journal = j-SIGPLAN, volume = "47", number = "7", pages = "203--214", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151051", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Process-level virtualization is increasingly being used to enhance the security of software applications from reverse engineering and unauthorized modification (called software protection). Process-level virtual machines (PVMs) can safeguard the application code at run time and hamper the adversary's ability to launch dynamic attacks on the application. This dynamic protection, combined with its flexibility, ease in handling legacy systems and low performance overhead, has made process-level virtualization a popular approach for providing software protection. While there has been much research on using process-level virtualization to provide such protection, there has been less research on attacks against PVM-protected software. In this paper, we describe an attack on applications protected using process-level virtualization, called a replacement attack. In a replacement attack, the adversary replaces the protecting PVM with an attack VM thereby rendering the application vulnerable to analysis and modification. We present a general description of the replacement attack methodology and two attack implementations against a protected application using freely available tools. The generality and simplicity of replacement attacks demonstrates that there is a strong need to develop techniques that meld applications more tightly to the protecting PVM to prevent such attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Payer:2012:PAA, author = "Mathias Payer and Thomas R. Gross", title = "Protecting applications against {TOCTTOU} races by user-space caching of file metadata", journal = j-SIGPLAN, volume = "47", number = "7", pages = "215--226", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151052", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Time Of Check To Time Of Use (TOCTTOU) race conditions for file accesses in user-space applications are a common problem in Unix-like systems. The mapping between filename and inode and device is volatile and can provide the necessary preconditions for an exploit. Applications use filenames as the primary attribute to identify files but the mapping between filenames and inode and device can be changed by an attacker. DynaRace is an approach that protects unmodified applications from file-based TOCTTOU race conditions. DynaRace uses a transparent mapping cache that keeps additional state and metadata for each accessed file in the application. The combination of file state and the current system call type are used to decide if (i) the metadata is updated or (ii) the correctness of the metadata is enforced between consecutive system calls. DynaRace uses user-mode path resolution internally to resolve individual file atoms. Each file atom is verified or updated according to the associated state in the mapping cache. More specifically, DynaRace protects against race conditions for all file-based system calls, by replacing the unsafe system calls with a set of safe system calls that utilize the mapping cache. The system call is executed only if the state transition is allowed and the information in the mapping cache matches. DynaRace deterministically solves the problem of file-based race conditions for unmodified applications and removes an attacker's ability to exploit the TOCTTOU race condition. DynaRace detects injected alternate inode and device pairs and terminates the application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yan:2012:VCH, author = "Lok-Kwong Yan and Manjukumar Jayachandra and Mu Zhang and Heng Yin", title = "{V2E}: combining hardware virtualization and software emulation for transparent and extensible malware analysis", journal = j-SIGPLAN, volume = "47", number = "7", pages = "227--238", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151053", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "A transparent and extensible malware analysis platform is essential for defeating malware. This platform should be transparent so malware cannot easily detect and bypass it. It should also be extensible to provide strong support for heavyweight instrumentation and analysis efficiency. However, no existing platform can meet both requirements. Leveraging hardware virtualization technology, analysis platforms like Ether can achieve good transparency, but its instrumentation support and analysis efficiency is poor. In contrast, software emulation provides strong support for code instrumentation and good analysis efficiency by using dynamic binary translation. However, analysis platforms based on software emulation can be easily detected by malware and thus is poor in transparency. To achieve both transparency and extensibility, we propose a new analysis platform that combines hardware virtualization and software emulation. The essence is precise heterogeneous replay: the malware execution is recorded via hardware virtualization and then replayed in software. Our design ensures the execution replay is precise. Moreover, with page-level recording granularity, the platform can easily adjust to analyze various forms of malware (a process, a kernel module, or a shared library). We implemented a prototype called V2E and demonstrated its capability and efficiency by conducting an extensive evaluation with both synthetic samples and 14 realworld emulation-resistant malware samples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Huynh:2012:SFM, author = "Huynh Phung Huynh and Andrei Hagiescu and Weng-Fai Wong and Rick Siow Mong Goh", title = "Scalable framework for mapping streaming applications onto multi-{GPU} systems", journal = j-SIGPLAN, volume = "47", number = "8", pages = "1--10", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145818", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Graphics processing units leverage on a large array of parallel processing cores to boost the performance of a specific streaming computation pattern frequently found in graphics applications. Unfortunately, while many other general purpose applications do exhibit the required streaming behavior, they also possess unfavorable data layout and poor computation-to-communication ratios that penalize any straight-forward execution on the GPU. In this paper we describe an efficient and scalable code generation framework that can map general purpose streaming applications onto a multi-GPU system. This framework spans the entire core and memory hierarchy exposed by the multi-GPU system. Several key features in our framework ensure the scalability required by complex streaming applications. First, we propose an efficient stream graph partitioning algorithm that partitions the complex application to achieve the best performance under a given shared memory constraint. Next, the resulting partitions are mapped to multiple GPUs using an efficient architecture-driven strategy. The mapping balances the workload while considering the communication overhead. Finally, a highly effective pipeline execution is employed for the execution of the partitions on the multi-GPU system. The framework has been implemented as a back-end of the StreamIt programming language compiler. Our comprehensive experiments show its scalability and significant performance speedup compared with a previous state-of-the-art solution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sim:2012:PAF, author = "Jaewoong Sim and Aniruddha Dasgupta and Hyesoon Kim and Richard Vuduc", title = "A performance analysis framework for identifying potential benefits in {GPGPU} applications", journal = j-SIGPLAN, volume = "47", number = "8", pages = "11--22", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145819", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Tuning code for GPGPU and other emerging many-core platforms is a challenge because few models or tools can precisely pinpoint the root cause of performance bottlenecks. In this paper, we present a performance analysis framework that can help shed light on such bottlenecks for GPGPU applications. Although a handful of GPGPU profiling tools exist, most of the traditional tools, unfortunately, simply provide programmers with a variety of measurements and metrics obtained by running applications, and it is often difficult to map these metrics to understand the root causes of slowdowns, much less decide what next optimization step to take to alleviate the bottleneck. In our approach, we first develop an analytical performance model that can precisely predict performance and aims to provide programmer-interpretable metrics. Then, we apply static and dynamic profiling to instantiate our performance model for a particular input code and show how the model can predict the potential performance benefits. We demonstrate our framework on a suite of micro-benchmarks as well as a variety of computations extracted from real codes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Baghsorkhi:2012:EPE, author = "Sara S. Baghsorkhi and Isaac Gelado and Matthieu Delahaye and Wen-mei W. Hwu", title = "Efficient performance evaluation of memory hierarchy for highly multithreaded graphics processors", journal = j-SIGPLAN, volume = "47", number = "8", pages = "23--34", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145820", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "With the emergence of highly multithreaded architectures, performance monitoring techniques face new challenges in efficiently locating sources of performance discrepancies in the program source code. For example, the state-of-the-art performance counters in highly multithreaded graphics processing units (GPUs) report only the overall occurrences of microarchitecture events at the end of program execution. Furthermore, even if supported, any fine-grained sampling of performance counters will distort the actual program behavior and will make the sampled values inaccurate. On the other hand, it is difficult to achieve high resolution performance information at low sampling rates in the presence of thousands of concurrently running threads. In this paper, we present a novel software-based approach for monitoring the memory hierarchy performance in highly multithreaded general-purpose graphics processors. The proposed analysis is based on memory traces collected for snapshots of an application execution. A trace-based memory hierarchy model with a Monte Carlo experimental methodology generates statistical bounds of performance measures without being concerned about the exact inter-thread ordering of individual events but rather studying the behavior of the overall system. The statistical approach overcomes the classical problem of disturbed execution timing due to fine-grained instrumentation. The approach scales well as we deploy an efficient parallel trace collection technique to reduce the trace generation overhead and a simple memory hierarchy model to reduce the simulation time. The proposed scheme also keeps track of individual memory operations in the source code and can quantify their efficiency with respect to the memory system. A cross-validation of our results shows close agreement with the values read from the hardware performance counters on an NVIDIA Tesla C2050 GPU. Based on the high resolution profile data produced by our model we optimized memory accesses in the sparse matrix vector multiply kernel and achieved speedups ranging from 2.4 to 14.8 depending on the characteristics of the input matrices.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ballard:2012:CAS, author = "Grey Ballard and James Demmel and Nicholas Knight", title = "Communication avoiding successive band reduction", journal = j-SIGPLAN, volume = "47", number = "8", pages = "35--44", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145822", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The running time of an algorithm depends on both arithmetic and communication (i.e., data movement) costs, and the relative costs of communication are growing over time. In this work, we present both theoretical and practical results for tridiagonalizing a symmetric band matrix: we present an algorithm that asymptotically reduces communication, and we show that it indeed performs well in practice. The tridiagonalization of a symmetric band matrix is a key kernel in solving the symmetric eigenvalue problem for both full and band matrices. In order to preserve sparsity, tridiagonalization routines use annihilate-and-chase procedures that previously have suffered from poor data locality. We improve data locality by reorganizing the computation, asymptotically reducing communication costs compared to existing algorithms. Our sequential implementation demonstrates that avoiding communication improves runtime even at the expense of extra arithmetic: we observe a 2x speedup over Intel MKL while doing 43\% more floating point operations. Our parallel implementation targets shared-memory multicore platforms. It uses pipelined parallelism and a static scheduler while retaining the locality properties of the sequential algorithm. Due to lightweight synchronization and effective data reuse, we see 9.5x scaling over our serial code and up to 6x speedup over the PLASMA library, comparing parallel performance on a ten-core processor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sack:2012:FTA, author = "Paul Sack and William Gropp", title = "Faster topology-aware collective algorithms through non-minimal communication", journal = j-SIGPLAN, volume = "47", number = "8", pages = "45--54", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145823", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Known algorithms for two important collective communication operations, allgather and reduce-scatter, are minimal-communication algorithms; no process sends or receives more than the minimum amount of data. This, combined with the data-ordering semantics of the operations, limits the flexibility and performance of these algorithms. Our novel non-minimal, topology-aware algorithms deliver far better performance with the addition of a very small amount of redundant communication. We develop novel algorithms for Clos networks and single or multi-ported torus networks. Tests on a 32k-node BlueGene/P result in allgather speedups of up to 6x and reduce-scatter speedups of over 11x compared to the native IBM algorithm. Broadcast, reduce, and allreduce can be composed of allgather or reduce-scatter and other collective operations; our techniques also improve the performance of these algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kim:2012:ESC, author = "Seonggun Kim and Hwansoo Han", title = "Efficient {SIMD} code generation for irregular kernels", journal = j-SIGPLAN, volume = "47", number = "8", pages = "55--64", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145824", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Array indirection causes several challenges for compilers to utilize single instruction, multiple data (SIMD) instructions. Disjoint memory references, arbitrarily misaligned memory references, and dependence cycles in loops are main challenges to handle for SIMD compilers. Due to those challenges, existing SIMD compilers have excluded loops with array indirection from their candidate loops for SIMD vectorization. However, addressing those challenges is inevitable, since many important compute-intensive applications extensively use array indirection to reduce memory and computation requirements. In this work, we propose a method to generate efficient SIMD code for loops containing indirected memory references. We extract both inter- and intra-iteration parallelism, taking data reorganization overhead into consideration. We also optimally place data reorganization code in order to amortize the reorganization overhead through the performance gain of SIMD vectorization. Experiments on four array indirection kernels, which are extracted from real-world scientific applications, show that our proposed method effectively generates SIMD code for irregular kernels with array indirection. Compared to the existing SIMD vectorization methods, our proposed method significantly improves the performance of irregular kernels by 91\%, on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Leissa:2012:ECL, author = "Roland Lei{\ss}a and Sebastian Hack and Ingo Wald", title = "Extending a {C}-like language for portable {SIMD} programming", journal = j-SIGPLAN, volume = "47", number = "8", pages = "65--74", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145825", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "SIMD instructions are common in CPUs for years now. Using these instructions effectively requires not only vectorization of code, but also modifications to the data layout. However, automatic vectorization techniques are often not powerful enough and suffer from restricted scope of applicability; hence, programmers often vectorize their programs manually by using intrinsics: compiler-known functions that directly expand to machine instructions. They significantly decrease programmer productivity by enforcing a very error-prone and hard-to-read assembly-like programming style. Furthermore, intrinsics are not portable because they are tied to a specific instruction set. In this paper, we show how a C-like language can be extended to allow for portable and efficient SIMD programming. Our extension puts the programmer in total control over where and how control-flow vectorization is triggered. We present a type system and a formal semantics of our extension and prove the soundness of the type system. Using our prototype implementation IVL that targets Intel's MIC architecture and SSE instruction set, we show that the generated code is roughly on par with handwritten intrinsic code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kwon:2012:HAO, author = "Okwan Kwon and Fahed Jubair and Rudolf Eigenmann and Samuel Midkiff", title = "A hybrid approach of {OpenMP} for clusters", journal = j-SIGPLAN, volume = "47", number = "8", pages = "75--84", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145827", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We present the first fully automated compiler-runtime system that successfully translates and executes OpenMP shared-address-space programs on laboratory-size clusters, for the complete set of regular, repetitive applications in the NAS Parallel Benchmarks. We introduce a hybrid compiler-runtime translation scheme. Compared to previous work, this scheme features a new runtime data flow analysis and new compiler techniques for improving data affinity and reducing communication costs. We present and discuss the performance of our translated programs, and compare them with the performance of the MPI, HPF and UPC versions of the benchmarks. The results show that our translated programs achieve 75\% of the hand-coded MPI programs, on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{hunEom:2012:DDP, author = "Yong hun Eom and Stephen Yang and James C. Jenista and Brian Demsky", title = "{DOJ}: dynamically parallelizing object-oriented programs", journal = j-SIGPLAN, volume = "47", number = "8", pages = "85--96", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145828", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We present Dynamic Out-of-Order Java (DOJ), a dynamic parallelization approach. In DOJ, a developer annotates code blocks as tasks to decouple these blocks from the parent execution thread. The DOJ compiler then analyzes the code to generate heap examiners that ensure the parallel execution preserves the behavior of the original sequential program. Heap examiners dynamically extract heap dependences between code blocks and determine when it is safe to execute a code block. We have implemented DOJ and evaluated it on twelve benchmarks. We achieved an average compilation speedup of 31.15 times over OoOJava and an average execution speedup of 12.73 times over sequential versions of the benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bonetta:2012:SLH, author = "Daniele Bonetta and Achille Peternier and Cesare Pautasso and Walter Binder", title = "{S}: a scripting language for high-performance {RESTful} {Web} services", journal = j-SIGPLAN, volume = "47", number = "8", pages = "97--106", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145829", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "There is an urgent need for novel programming abstractions to leverage the parallelism in modern multicore machines. We introduce S, a new domain-specific language targeting the server-side scripting of high-performance RESTful Web services. S promotes an innovative programming model based on explicit (control-flow) and implicit (process-level) parallelism control, allowing the service developer to specify which portions of the control-flow should be executed in parallel. For each service, the choice of the best level of parallelism is left to the runtime system. We assess performance and scalability by implementing two non-trivial composite Web services in S. Experiments show that S-based Web services can handle thousands of concurrent client requests on a modern multicore machine.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mendez-Lojo:2012:GII, author = "Mario Mendez-Lojo and Martin Burtscher and Keshav Pingali", title = "A {GPU} implementation of inclusion-based points-to analysis", journal = j-SIGPLAN, volume = "47", number = "8", pages = "107--116", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145831", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Graphics Processing Units (GPUs) have emerged as powerful accelerators for many regular algorithms that operate on dense arrays and matrices. In contrast, we know relatively little about using GPUs to accelerate highly irregular algorithms that operate on pointer-based data structures such as graphs. For the most part, research has focused on GPU implementations of graph analysis algorithms that do not modify the structure of the graph, such as algorithms for breadth-first search and strongly-connected components. In this paper, we describe a high-performance GPU implementation of an important graph algorithm used in compilers such as gcc and LLVM: Andersen-style inclusion-based points-to analysis. This algorithm is challenging to parallelize effectively on GPUs because it makes extensive modifications to the structure of the underlying graph and performs relatively little computation. In spite of this, our program, when executed on a 14 Streaming Multiprocessor GPU, achieves an average speedup of 7x compared to a sequential CPU implementation and outperforms a parallel implementation of the same algorithm running on 16 CPU cores. Our implementation provides general insights into how to produce high-performance GPU implementations of graph algorithms, and it highlights key differences between optimizing parallel programs for multicore CPUs and for GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Merrill:2012:SGG, author = "Duane Merrill and Michael Garland and Andrew Grimshaw", title = "Scalable {GPU} graph traversal", journal = j-SIGPLAN, volume = "47", number = "8", pages = "117--128", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145832", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Breadth-first search (BFS) is a core primitive for graph traversal and a basis for many higher-level graph analysis algorithms. It is also representative of a class of parallel computations whose memory accesses and work distribution are both irregular and data-dependent. Recent work has demonstrated the plausibility of GPU sparse graph traversal, but has tended to focus on asymptotically inefficient algorithms that perform poorly on graphs with non-trivial diameter. We present a BFS parallelization focused on fine-grained task management constructed from efficient prefix sum that achieves an asymptotically optimal O (| V |+| E |) work complexity. Our implementation delivers excellent performance on diverse graphs, achieving traversal rates in excess of 3.3 billion and 8.3 billion traversed edges per second using single and quad-GPU configurations, respectively. This level of performance is several times faster than state-of-the-art implementations both CPU and GPU platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zu:2012:GBN, author = "Yuan Zu and Ming Yang and Zhonghu Xu and Lin Wang and Xin Tian and Kunyang Peng and Qunfeng Dong", title = "{GPU}-based {NFA} implementation for memory efficient high speed regular expression matching", journal = j-SIGPLAN, volume = "47", number = "8", pages = "129--140", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145833", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Regular expression pattern matching is the foundation and core engine of many network functions, such as network intrusion detection, worm detection, traffic analysis, web applications and so on. DFA-based solutions suffer exponentially exploding state space and cannot be remedied without sacrificing matching speed. Given this scalability problem of DFA-based methods, there has been increasing interest in NFA-based methods for memory efficient regular expression matching. To achieve high matching speed using NFA, it requires potentially massive parallel processing, and hence represents an ideal programming task on Graphic Processor Unit (GPU). Based on in-depth understanding of NFA properties as well as GPU architecture, we propose effective methods for fitting NFAs into GPU architecture through proper data structure and parallel programming design, so that GPU's parallel processing power can be better utilized to achieve high speed regular expression matching. Experiment results demonstrate that, compared with the existing GPU-based NFA implementation method [9], our proposed methods can boost matching speed by 29 to 46 times, consistently yielding above 10Gbps matching speed on NVIDIA GTX-460 GPU. Meanwhile, our design only needs a small amount of memory space, growing exponentially more slowly than DFA size. These results make our design an effective solution for memory efficient high speed regular expression matching, and clearly demonstrate the power and potential of GPU as a platform for memory efficient high speed regular expression matching.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kogan:2012:MCF, author = "Alex Kogan and Erez Petrank", title = "A methodology for creating fast wait-free data structures", journal = j-SIGPLAN, volume = "47", number = "8", pages = "141--150", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145835", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Lock-freedom is a progress guarantee that ensures overall program progress. Wait-freedom is a stronger progress guarantee that ensures the progress of each thread in the program. While many practical lock-free algorithms exist, wait-free algorithms are typically inefficient and hardly used in practice. In this paper, we propose a methodology called fast-path-slow-path for creating efficient wait-free algorithms. The idea is to execute the efficient lock-free version most of the time and revert to the wait-free version only when things go wrong. The generality and effectiveness of this methodology is demonstrated by two examples. In this paper, we apply this idea to a recent construction of a wait-free queue, bringing the wait-free implementation to perform in practice as efficient as the lock-free implementation. In another work, the fast-path-slow-path methodology has been used for (dramatically) improving the performance of a wait-free linked-list.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Prokopec:2012:CTE, author = "Aleksandar Prokopec and Nathan Grasso Bronson and Phil Bagwell and Martin Odersky", title = "Concurrent tries with efficient non-blocking snapshots", journal = j-SIGPLAN, volume = "47", number = "8", pages = "151--160", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145836", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We describe a non-blocking concurrent hash trie based on shared-memory single-word compare-and-swap instructions. The hash trie supports standard mutable lock-free operations such as insertion, removal, lookup and their conditional variants. To ensure space-efficiency, removal operations compress the trie when necessary. We show how to implement an efficient lock-free snapshot operation for concurrent hash tries. The snapshot operation uses a single-word compare-and-swap and avoids copying the data structure eagerly. Snapshots are used to implement consistent iterators and a linearizable size retrieval. We compare concurrent hash trie performance with other concurrent data structures and evaluate the performance of the snapshot operation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Crain:2012:SFB, author = "Tyler Crain and Vincent Gramoli and Michel Raynal", title = "A speculation-friendly binary search tree", journal = j-SIGPLAN, volume = "47", number = "8", pages = "161--170", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145837", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We introduce the first binary search tree algorithm designed for speculative executions. Prior to this work, tree structures were mainly designed for their pessimistic (non-speculative) accesses to have a bounded complexity. Researchers tried to evaluate transactional memory using such tree structures whose prominent example is the red-black tree library developed by Oracle Labs that is part of multiple benchmark distributions. Although well-engineered, such structures remain badly suited for speculative accesses, whose step complexity might raise dramatically with contention. We show that our speculation-friendly tree outperforms the existing transaction-based version of the AVL and the red-black trees. Its key novelty stems from the decoupling of update operations: they are split into one transaction that modifies the abstraction state and multiple ones that restructure its tree implementation in the background. In particular, the speculation-friendly tree is shown correct, reusable and it speeds up a transaction-based travel reservation application by up to 3.5x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2012:PUA, author = "Yifeng Chen and Xiang Cui and Hong Mei", title = "{PARRAY}: a unifying array representation for heterogeneous parallelism", journal = j-SIGPLAN, volume = "47", number = "8", pages = "171--180", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145838", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "This paper introduces a programming interface called PARRAY (or Parallelizing ARRAYs) that supports system-level succinct programming for heterogeneous parallel systems like GPU clusters. The current practice of software development requires combining several low-level libraries like Pthread, OpenMP, CUDA and MPI. Achieving productivity and portability is hard with different numbers and models of GPUs. PARRAY extends mainstream C programming with novel array types of distinct features: (1) the dimensions of an array type are nested in a tree, conceptually reflecting the memory hierarchy; (2) the definition of an array type may contain references to other array types, allowing sophisticated array types to be created for parallelization; (3) threads also form arrays that allow programming in a Single-Program-Multiple-Codeblock (SPMC) style to unify various sophisticated communication patterns. This leads to shorter, more portable and maintainable parallel codes, while the programmer still has control over performance-related features necessary for deep manual optimization. Although the source-to-source code generator only faithfully generates low-level library calls according to the type information, higher-level programming and automatic performance optimization are still possible through building libraries of sub-programs on top of PARRAY. The case study on cluster FFT illustrates a simple 30-line code that 2x outperforms Intel Cluster MKL on the Tianhe-1A system with 7168 Fermi GPUs and 14336 CPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Blelloch:2012:IDP, author = "Guy E. Blelloch and Jeremy T. Fineman and Phillip B. Gibbons and Julian Shun", title = "Internally deterministic parallel algorithms can be fast", journal = j-SIGPLAN, volume = "47", number = "8", pages = "181--192", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145840", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The virtues of deterministic parallelism have been argued for decades and many forms of deterministic parallelism have been described and analyzed. Here we are concerned with one of the strongest forms, requiring that for any input there is a unique dependence graph representing a trace of the computation annotated with every operation and value. This has been referred to as internal determinism, and implies a sequential semantics--- i.e., considering any sequential traversal of the dependence graph is sufficient for analyzing the correctness of the code. In addition to returning deterministic results, internal determinism has many advantages including ease of reasoning about the code, ease of verifying correctness, ease of debugging, ease of defining invariants, ease of defining good coverage for testing, and ease of formally, informally and experimentally reasoning about performance. On the other hand one needs to consider the possible downsides of determinism, which might include making algorithms (i) more complicated, unnatural or special purpose and/or (ii) slower or less scalable. In this paper we study the effectiveness of this strong form of determinism through a broad set of benchmark problems. Our main contribution is to demonstrate that for this wide body of problems, there exist efficient internally deterministic algorithms, and moreover that these algorithms are natural to reason about and not complicated to code. We leverage an approach to determinism suggested by Steele (1990), which is to use nested parallelism with commutative operations. Our algorithms apply several diverse programming paradigms that fit within the model including (i) a strict functional style (no shared state among concurrent operations), (ii) an approach we refer to as deterministic reservations, and (iii) the use of commutative, linearizable operations on data structures. We describe algorithms for the benchmark problems that use these deterministic approaches and present performance results on a 32-core machine. Perhaps surprisingly, for all problems, our internally deterministic algorithms achieve good speedup and good performance even relative to prior nondeterministic solutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Leiserson:2012:DPR, author = "Charles E. Leiserson and Tao B. Schardl and Jim Sukha", title = "Deterministic parallel random-number generation for dynamic-multithreading platforms", journal = j-SIGPLAN, volume = "47", number = "8", pages = "193--204", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145841", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Existing concurrency platforms for dynamic multithreading do not provide repeatable parallel random-number generators. This paper proposes that a mechanism called pedigrees be built into the runtime system to enable efficient deterministic parallel random-number generation. Experiments with the open-source MIT Cilk runtime system show that the overhead for maintaining pedigrees is negligible. Specifically, on a suite of 10 benchmarks, the relative overhead of Cilk with pedigrees to the original Cilk has a geometric mean of less than 1\%. We persuaded Intel to modify its commercial C/C++ compiler, which provides the Cilk Plus concurrency platform, to include pedigrees, and we built a library implementation of a deterministic parallel random-number generator called DotMix that compresses the pedigree and then ``RC6-mixes'' the result. The statistical quality of DotMix is comparable to that of the popular Mersenne twister, but somewhat slower than a nondeterministic parallel version of this efficient and high-quality serial random-number generator. The cost of calling DotMix depends on the ``spawn depth'' of the invocation. For a naive Fibonacci calculation with n=40 that calls DotMix in every node of the computation, this ``price of determinism'' is a factor of 2.65 in running time, but for more realistic applications with less intense use of random numbers --- such as a maximal-independent-set algorithm, a practical samplesort program, and a Monte Carlo discrete-hedging application from QuantLib --- the observed ``price'' was less than 5\%. Moreover, even if overheads were several times greater, applications using DotMix should be amply fast for debugging purposes, which is a major reason for desiring repeatability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nobari:2012:SPM, author = "Sadegh Nobari and Thanh-Tung Cao and Panagiotis Karras and St{\'e}phane Bressan", title = "Scalable parallel minimum spanning forest computation", journal = j-SIGPLAN, volume = "47", number = "8", pages = "205--214", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145842", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The proliferation of data in graph form calls for the development of scalable graph algorithms that exploit parallel processing environments. One such problem is the computation of a graph's minimum spanning forest (MSF). Past research has proposed several parallel algorithms for this problem, yet none of them scales to large, high-density graphs. In this paper we propose a novel, scalable, parallel MSF algorithm for undirected weighted graphs. Our algorithm leverages Prim's algorithm in a parallel fashion, concurrently expanding several subsets of the computed MSF. Our effort focuses on minimizing the communication among different processors without constraining the local growth of a processor's computed subtree. In effect, we achieve a scalability that previous approaches lacked. We implement our algorithm in CUDA, running on a GPU and study its performance using real and synthetic, sparse as well as dense, structured and unstructured graph data. Our experimental study demonstrates that our algorithm outperforms the previous state-of-the-art GPU-based MSF algorithm, while being several orders of magnitude faster than sequential CPU-based algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2012:GCV, author = "Guodong Li and Peng Li and Geof Sawaya and Ganesh Gopalakrishnan and Indradeep Ghosh and Sreeranga P. Rajan", title = "{GKLEE}: concolic verification and test generation for {GPUs}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "215--224", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145844", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Programs written for GPUs often contain correctness errors such as races, deadlocks, or may compute the wrong result. Existing debugging tools often miss these errors because of their limited input-space and execution-space exploration. Existing tools based on conservative static analysis or conservative modeling of SIMD concurrency generate false alarms resulting in wasted bug-hunting. They also often do not target performance bugs (non-coalesced memory accesses, memory bank conflicts, and divergent warps). We provide a new framework called GKLEE that can analyze C++ GPU programs, locating the aforesaid correctness and performance bugs. For these programs, GKLEE can also automatically generate tests that provide high coverage. These tests serve as concrete witnesses for every reported bug. They can also be used for downstream debugging, for example to test the kernel on the actual hardware. We describe the architecture of GKLEE, its symbolic virtual machine model, and describe previously unknown bugs and performance issues that it detected on commercial SDK kernels. We describe GKLEE's test-case reduction heuristics, and the resulting scalability improvement for a given coverage target.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Du:2012:ABF, author = "Peng Du and Aurelien Bouteiller and George Bosilca and Thomas Herault and Jack Dongarra", title = "Algorithm-based fault tolerance for dense matrix factorizations", journal = j-SIGPLAN, volume = "47", number = "8", pages = "225--234", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145845", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Dense matrix factorizations, such as LU, Cholesky and QR, are widely used for scientific applications that require solving systems of linear equations, eigenvalues and linear least squares problems. Such computations are normally carried out on supercomputers, whose ever-growing scale induces a fast decline of the Mean Time To Failure (MTTF). This paper proposes a new hybrid approach, based on Algorithm-Based Fault Tolerance (ABFT), to help matrix factorizations algorithms survive fail-stop failures. We consider extreme conditions, such as the absence of any reliable component and the possibility of loosing both data and checksum from a single failure. We will present a generic solution for protecting the right factor, where the updates are applied, of all above mentioned factorizations. For the left factor, where the panel has been applied, we propose a scalable checkpointing algorithm. This algorithm features high degree of checkpointing parallelism and cooperatively utilizes the checksum storage leftover from the right factor protection. The fault-tolerant algorithms derived from this hybrid solution is applicable to a wide range of dense matrix factorizations, with minor modifications. Theoretical analysis shows that the fault tolerance overhead sharply decreases with the scaling in the number of computing units and the problem size. Experimental results of LU and QR factorization on the Kraken (Cray XT5) supercomputer validate the theoretical evaluation and confirm negligible overhead, with- and without-errors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Buhler:2012:EDA, author = "Jeremy D. Buhler and Kunal Agrawal and Peng Li and Roger D. Chamberlain", title = "Efficient deadlock avoidance for streaming computation with filtering", journal = j-SIGPLAN, volume = "47", number = "8", pages = "235--246", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145846", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Parallel streaming computations have been studied extensively, and many languages, libraries, and systems have been designed to support this model of computation. In particular, we consider acyclic streaming computations in which individual nodes can choose to filter, or discard, some of their inputs in a data-dependent manner. In these applications, if the channels between nodes have finite buffers, the computation can deadlock. One method of deadlock avoidance is to augment the data streams between nodes with occasional dummy messages; however, for general DAG topologies, no polynomial time algorithm is known to compute the intervals at which dummy messages must be sent to avoid deadlock. In this paper, we show that deadlock avoidance for streaming computations with filtering can be performed efficiently for a large class of DAG topologies. We first present a new method where each dummy message is tagged with a destination, so as to reduce the number of dummy messages sent over the network. We then give efficient algorithms for dummy interval computation in series-parallel DAGs. We finally generalize our results to a larger graph family, which we call the CS4 DAGs, in which every undirected Cycle is Single-Source and Single-Sink ( CS$^4$ ). Our results show that, for a large set of application topologies that are both intuitively useful and formalizable, the streaming model with filtering can be implemented safely with reasonable overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dice:2012:LCG, author = "David Dice and Virendra J. Marathe and Nir Shavit", title = "Lock cohorting: a general technique for designing {NUMA} locks", journal = j-SIGPLAN, volume = "47", number = "8", pages = "247--256", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145848", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Multicore machines are quickly shifting to NUMA and CC-NUMA architectures, making scalable NUMA-aware locking algorithms, ones that take into account the machines' non-uniform memory and caching hierarchy, ever more important. This paper presents lock cohorting, a general new technique for designing NUMA-aware locks that is as simple as it is powerful. Lock cohorting allows one to transform any spin-lock algorithm, with minimal non-intrusive changes, into scalable NUMA-aware spin-locks. Our new cohorting technique allows us to easily create NUMA-aware versions of the TATAS-Backoff, CLH, MCS, and ticket locks, to name a few. Moreover, it allows us to derive a CLH-based cohort abortable lock, the first NUMA-aware queue lock to support abortability. We empirically compared the performance of cohort locks with prior NUMA-aware and classic NUMA-oblivious locks on a synthetic micro-benchmark, a real world key-value store application memcached, as well as the libc memory allocator. Our results demonstrate that cohort locks perform as well or better than known locks when the load is low and significantly out-perform them as the load increases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fatourou:2012:RCS, author = "Panagiota Fatourou and Nikolaos D. Kallimanis", title = "Revisiting the combining synchronization technique", journal = j-SIGPLAN, volume = "47", number = "8", pages = "257--266", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145849", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Fine-grain thread synchronization has been proved, in several cases, to be outperformed by efficient implementations of the combining technique where a single thread, called the combiner, holding a coarse-grain lock, serves, in addition to its own synchronization request, active requests announced by other threads while they are waiting by performing some form of spinning. Efficient implementations of this technique significantly reduce the cost of synchronization, so in many cases they exhibit much better performance than the most efficient finely synchronized algorithms. In this paper, we revisit the combining technique with the goal to discover where its real performance power resides and whether or how ensuring some desired properties (e.g., fairness in serving requests) would impact performance. We do so by presenting two new implementations of this technique; the first (CC-Synch) addresses systems that support coherent caches, whereas the second (DSM-Synch) works better in cacheless NUMA machines. In comparison to previous such implementations, the new implementations (1) provide bounds on the number of remote memory references (RMRs) that they perform, (2) support a stronger notion of fairness, and (3) use simpler and less basic primitives than previous approaches. In all our experiments, the new implementations outperform by far all previous state-of-the-art combining-based and fine-grain synchronization algorithms. Our experimental analysis sheds light to the questions we aimed to answer. Several modern multi-core systems organize the cores into clusters and provide fast communication within the same cluster and much slower communication across clusters. We present an hierarchical version of CC-Synch, called H-Synch which exploits the hierarchical communication nature of such systems to achieve better performance. Experiments show that H-Synch significantly outper forms previous state-of-the-art hierarchical approaches. We provide new implementations of common shared data structures (like stacks and queues) based on CC-Synch, DSM-Synch and H-Synch. Our experiments show that these implementations outperform by far all previous (fine-grain or combined-based) implementations of shared stacks and queues.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tardieu:2012:WSS, author = "Olivier Tardieu and Haichuan Wang and Haibo Lin", title = "A work-stealing scheduler for {X10}'s task parallelism with suspension", journal = j-SIGPLAN, volume = "47", number = "8", pages = "267--276", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145850", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The X10 programming language is intended to ease the programming of scalable concurrent and distributed applications. X10 augments a familiar imperative object-oriented programming model with constructs to support light-weight asynchronous tasks as well as execution across multiple address spaces. A crucial aspect of X10's runtime system is the scheduling of concurrent tasks. Work-stealing schedulers have been shown to efficiently load balance fine-grain divide-and-conquer task-parallel program on SMPs and multicores. But X10 is not limited to shared-memory fork-join parallelism. X10 permits tasks to suspend and synchronize by means of conditional atomic blocks and remote task invocations. In this paper, we demonstrate that work-stealing scheduling principles are applicable to a rich programming language such as X10, achieving performance at scale without compromising expressivity, ease of use, or portability. We design and implement a portable work-stealing execution engine for X10. While this engine is biased toward the efficient execution of fork-join parallelism in shared memory, it handles the full X10 language, especially conditional atomic blocks and distribution. We show that this engine improves the run time of a series of benchmark programs by several orders of magnitude when used in combination with the C++ backend compiler and runtime for X10. It achieves scaling comparable to state-of-the art work-stealing scheduler implementations---the Cilk++ compiler and the Java fork/join framework---despite the dramatic increase in generality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Baskaran:2012:ACO, author = "Muthu Manikandan Baskaran and Nicolas Vasilache and Benoit Meister and Richard Lethin", title = "Automatic communication optimizations through memory reuse strategies", journal = j-SIGPLAN, volume = "47", number = "8", pages = "277--278", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145852", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Modern parallel architectures are emerging with sophisticated hardware consisting of hierarchically placed parallel processors and memories. The properties of memories in a system vary wildly, not only quantitatively (size, latency, bandwidth, number of banks) but also qualitatively (scratchpad, cache). Along with the emergence of such architectures comes the need for effectively utilizing the parallel processors and properly managing data movement across memories to improve memory bandwidth and hide data transfer latency. In this paper, we describe some of the high-level optimizations that are targeted at the improvement of memory performance in the R-Stream compiler, a high-level source-to-source automatic parallelizing compiler. We direct our focus in this paper on optimizing communications (data transfers) by improving memory reuse at various levels of an explicit memory hierarchy. This general concept is well-suited to the hardware properties of GPGPUs, which is the architecture that we concentrate on for this paper. We apply our techniques and obtain performance improvement on various stencil kernels including an important iterative stencil kernel in seismic processing applications where the performance is comparable to that of the state-of-the-art implementation of the kernel by a CUDA expert.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2012:FPA, author = "Gu Liu and Hong An and Wenting Han and Xiaoqiang Li and Tao Sun and Wei Zhou and Xuechao Wei and Xulong Tang", title = "{FlexBFS}: a parallelism-aware implementation of breadth-first search on {GPU}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "279--280", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145853", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In this paper, we present FlexBFS, a parallelism-aware implementation for breadth-first search on GPU. Our implementation can adjust the computation resources according to the feedback of available parallelism dynamically. We also optimized our program in three ways: (1)a simplified two-level queue management,(2)a combined kernel strategy and (3)a high-degree vertices specialization approach. Our experimental results show that it can achieve 3 to 20 times speedup against the fastest serial version, and can outperform the TBB based multi-threading CPU version and the previous most effective GPU version on all types of input graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Andersch:2012:PPE, author = "Michael Andersch and Chi Ching Chi and Ben Juurlink", title = "Programming parallel embedded and consumer applications in {OpenMP} superscalar", journal = j-SIGPLAN, volume = "47", number = "8", pages = "281--282", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145854", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In this paper, we evaluate the performance and usability of the parallel programming model OpenMP Superscalar (OmpSs), apply it to 10 different benchmarks and compare its performance with corresponding POSIX threads implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhong:2012:OMS, author = "Jianlong Zhong and Bingsheng He", title = "An overview of {Medusa}: simplified graph processing on {GPUs}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "283--284", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145855", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Graphs are the de facto data structures for many applications, and efficient graph processing is a must for the application performance. GPUs have an order of magnitude higher computational power and memory bandwidth compared to CPUs and have been adopted to accelerate several common graph algorithms. However, it is difficult to write correct and efficient GPU programs and even more difficult for graph processing due to the irregularities of graph structures. To address those difficulties, we propose a programming framework named Medusa to simplify graph processing on GPUs. Medusa offers a small set of APIs, based on which developers can define their application logics by writing sequential code without awareness of GPU architectures. The Medusa runtime system automatically executes the developer defined APIs in parallel on the GPU, with a series of graph-centric optimizations. This poster gives an overview of Medusa, and presents some preliminary results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Alias:2012:ORA, author = "Christophe Alias and Alain Darte and Alexandru Plesco", title = "Optimizing remote accesses for offloaded kernels: application to high-level synthesis for {FPGA}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "285--286", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145856", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In the context of the high-level synthesis (HLS) of regular kernels offloaded to FPGA and communicating with an external DDR memory, we show how to automatically generate adequate communicating processes for optimizing the transfer of remote data. This requires a generalized form of communication coalescing where data can be transferred from the external memory even when this memory is not fully up-to-date. Experiments with Altera HLS tools demonstrate that this automatization, based on advanced polyhedral code analysis and code generation techniques, can be used to efficiently map C kernels to FPGA, by generating, entirely at C level, all the necessary glue (the communication processes), which is compiled with the same HLS tool as for the computation kernel.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tao:2012:UGA, author = "Jian Tao and Marek Blazewicz and Steven R. Brandt", title = "Using {GPU}'s to accelerate stencil-based computation kernels for the development of large scale scientific applications on heterogeneous systems", journal = j-SIGPLAN, volume = "47", number = "8", pages = "287--288", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145857", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We present CaCUDA --- a GPGPU kernel abstraction and a parallel programming framework for developing highly efficient large scale scientific applications using stencil computations on hybrid CPU/GPU architectures. CaCUDA is built upon the Cactus computational toolkit, an open source problem solving environment designed for scientists and engineers. Due to the flexibility and extensibility of the Cactus toolkit, the addition of a GPGPU programming framework required no changes to the Cactus infrastructure, guaranteeing that existing features and modules will continue to work without modification. CaCUDA was tested and benchmarked using a 3D CFD code based on a finite difference discretization of Navier--Stokes equations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Marker:2012:MED, author = "Bryan Marker and Andy Terrel and Jack Poulson and Don Batory and Robert van de Geijn", title = "Mechanizing the expert dense linear algebra developer", journal = j-SIGPLAN, volume = "47", number = "8", pages = "289--290", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145858", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The efforts of an expert to parallelize and optimize a dense linear algebra algorithm for distributed-memory targets are largely mechanical and repetitive. We demonstrate that these efforts can be encoded and automatically applied to obviate the manual implementation of many algorithms in high-performance code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nugteren:2012:BHM, author = "Cedric Nugteren and Henk Corporaal", title = "The boat hull model: adapting the roofline model to enable performance prediction for parallel computing", journal = j-SIGPLAN, volume = "47", number = "8", pages = "291--292", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145859", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Multi-core and many-core were already major trends for the past six years, and are expected to continue for the next decades. With these trends of parallel computing, it becomes increasingly difficult to decide on which architecture to run a given application. In this work, we use an algorithm classification to predict performance prior to algorithm implementation. For this purpose, we modify the roofline model to include class information. In this way, we enable architectural choice through performance prediction prior to the development of architecture specific code. The new model, the boat hull model, is demonstrated using a GPU as a target architecture. We show for 6 example algorithms that performance is predicted accurately without requiring code to be available.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Feng:2012:SPG, author = "Min Feng and Rajiv Gupta and Laxmi N. Bhuyan", title = "Speculative parallelization on {GPGPUs}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "293--294", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145860", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "This paper overviews the first speculative parallelization technique for GPUs that can exploit parallelism in loops even in the presence of dynamic irregularities that may give rise to cross-iteration dependences. The execution of a speculatively parallelized loop consists of five phases: scheduling, computation, misspeculation check, result committing, and misspeculation recovery. We perform misspeculation check on the GPU to minimize its cost. We optimize the procedures of result committing and misspeculation recovery to reduce the result copying and recovery overhead. Finally, the scheduling policies are designed according to the types of cross-iteration dependences to reduce the misspeculation rate. Our preliminary evaluation was conducted on an nVidia Tesla C1060 hosted in an Intel(R) Xeon(R) E5540 machine. We use three benchmarks of which two contain irregular memory accesses and one contain irregular control flows that can give rise to cross-iteration dependences. Our implementation achieves 3.6x-13.8x speedups for loops in these benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jimborean:2012:APM, author = "Alexandra Jimborean and Philippe Clauss and Beno{\^\i}t Pradelle and Luis Mastrangelo and Vincent Loechner", title = "Adapting the polyhedral model as a framework for efficient speculative parallelization", journal = j-SIGPLAN, volume = "47", number = "8", pages = "295--296", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145861", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In this paper, we present a Thread-Level Speculation (TLS) framework whose main feature is to be able to speculatively parallelize a sequential loop nest in various ways, by re-scheduling its iterations. The transformation to be applied is selected at runtime with the goal of minimizing the number of rollbacks and maximizing performance. We perform code transformations by applying the polyhedral model that we adapted for speculative and runtime code parallelization. For this purpose, we design a parallel code pattern which is patched by our runtime system according to the profiling information collected on some execution samples. Adaptability is ensured by considering chunks of code of various sizes, that are launched successively, each of which being parallelized in a different manner, or run sequentially, depending on the currently observed behavior for accessing memory. We show on several benchmarks that our framework yields good performance on codes which could not be handled efficiently by previously proposed TLS systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gong:2012:OCN, author = "Yifan Gong and Bingsheng He and Jianlong Zhong", title = "An overview of {CMPI}: network performance aware {MPI} in the cloud", journal = j-SIGPLAN, volume = "47", number = "8", pages = "297--298", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145862", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Cloud computing enables users to perform distributed computing tasks on many virtual machines, without owning a physical cluster. Recently, various distributed computing tasks such as scientific applications are being moved from supercomputers and private clusters to public clouds. Message passing interface (MPI) is a key and common component in distributed computing tasks. The virtualized computing environment of the public cloud hides the network topology information from the users, and existing topology-aware optimizations for MPI are no longer feasible in the cloud environment. We propose a network performance aware MPI library named CMPI. CMPI embraces a new model for capturing the network performance among different virtual machines in the cloud. Based on the network performance model, we develop novel network performance aware algorithms for communication operations. This poster gives an overview of CMPI design, and presents some preliminary results on collective operations such as broadcast.We demonstrate the effectiveness of our network performance aware optimizations on Amazon EC2.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kim:2012:OUP, author = "Jungwon Kim and Sangmin Seo and Jun Lee and Jeongho Nah and Gangwon Jo and Jaejin Lee", title = "{OpenCL} as a unified programming model for heterogeneous {CPU\slash GPU} clusters", journal = j-SIGPLAN, volume = "47", number = "8", pages = "299--300", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145863", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In this paper, we propose an OpenCL framework for heterogeneous CPU/GPU clusters, and show that the framework achieves both high performance and ease of programming. The framework provides an illusion of a single system for the user. It allows the application to utilize multiple heterogeneous compute devices, such as multicore CPUs and GPUs, in a remote node as if they were in a local node. No communication API, such as the MPI library, is required in the application source. We implement the OpenCL framework and evaluate its performance on a heterogeneous CPU/GPU cluster that consists of one host node and nine compute nodes using eleven OpenCL benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tzenakis:2012:BBL, author = "George Tzenakis and Angelos Papatriantafyllou and John Kesapides and Polyvios Pratikakis and Hans Vandierendonck and Dimitrios S. Nikolopoulos", title = "{BDDT}: block-level dynamic dependence analysis for deterministic task-based parallelism", journal = j-SIGPLAN, volume = "47", number = "8", pages = "301--302", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145864", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kamil:2012:PPP, author = "Shoaib Kamil and Derrick Coetzee and Scott Beamer and Henry Cook and Ekaterina Gonina and Jonathan Harper and Jeffrey Morlan and Armando Fox", title = "Portable parallel performance from sequential, productive, embedded domain-specific languages", journal = j-SIGPLAN, volume = "47", number = "8", pages = "303--304", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145865", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Domain-expert productivity programmers desire scalable application performance, but usually must rely on efficiency programmers who are experts in explicit parallel programming to achieve it. Since such programmers are rare, to maximize reuse of their work we propose encapsulating their strategies in mini-compilers for domain-specific embedded languages (DSELs) glued together by a common high-level host language familiar to productivity programmers. The nontrivial applications that use these DSELs perform up to 98\% of peak attainable performance, and comparable to or better than existing hand-coded implementations. Our approach is unique in that each mini-compiler not only performs conventional compiler transformations and optimizations, but includes imperative procedural code that captures an efficiency expert's strategy for mapping a narrow domain onto a specific type of hardware. The result is source- and performance-portability for productivity programmers and parallel performance that rivals that of hand-coded efficiency-language implementations of the same applications. We describe a framework that supports our methodology and five implemented DSELs supporting common computation kernels. Our results demonstrate that for several interesting classes of problems, efficiency-level parallel performance can be achieved by packaging efficiency programmers' expertise in a reusable framework that is easy to use for both productivity programmers and efficiency programmers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hoefler:2012:CCO, author = "Torsten Hoefler and Timo Schneider", title = "Communication-centric optimizations by dynamically detecting collective operations", journal = j-SIGPLAN, volume = "47", number = "8", pages = "305--306", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145866", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The steady increase of parallelism in high-performance computing platforms implies that communication will be most important in large-scale applications. In this work, we tackle the problem of transparent optimization of large-scale communication patterns using online compilation techniques. We utilize the Group Operation Assembly Language (GOAL), an abstract parallel dataflow definition language, to specify our transformations in a device-independent manner. We develop fast schemes that analyze dataflow and synchronization semantics in GOAL and detect if parts of the (or the whole) communication pattern express a known collective communication operation. The detection of collective operations allows us to replace the detected patterns with highly optimized algorithms or low-level hardware calls and thus improve performance significantly. Benchmark results suggest that our technique can lead to a performance improvement of orders of magnitude compared with various optimized algorithms written in Co-Array Fortran. Detecting collective operations also improves the programmability of parallel languages in that the user does not have to understand the detailed semantics of high-level communication operations in order to generate efficient and scalable code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2012:LLF, author = "Donghui Zhang and Per-{\AA}ke Larson", title = "{LHlf}: lock-free linear hashing (poster paper)", journal = j-SIGPLAN, volume = "47", number = "8", pages = "307--308", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145868", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "LHlf is a new hash table designed to allow very high levels of concurrency. The table is lock free and grows and shrinks auto-matically according to the number of items in the table. Insertions, lookups and deletions are never blocked. LHlf is based on linear hashing but adopts recursive split-ordering of the items within a bucket to be able to split and merge lists in a lock free manner. LHlf is as fast as the best previous lock-free design and in addition it offers stable performance, uses less space, and supports both expansions and contractions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Timnat:2012:WFL, author = "Shahar Timnat and Anastasia Braginsky and Alex Kogan and Erez Petrank", title = "Wait-free linked-lists", journal = j-SIGPLAN, volume = "47", number = "8", pages = "309--310", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145869", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The linked-list data structure is fundamental and ubiquitous. Lock-free versions of the linked-list are well known. However, the existence of a practical wait-free linked-list has been open. In this work we designed such a linked-list. To achieve better performance, we have also extended this design using the fast-path-slow-path methodology. The resulting implementation achieves performance which is competitive with that of Harris's lock-free list, while still guaranteeing non-starvation via wait-freedom. We have also developed a proof for the correctness and the wait-freedom of our design.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dinh:2012:SPD, author = "Minh Ngoc Dinh and David Abramson and Chao Jin and Andrew Gontarek and Bob Moench and Luiz DeRose", title = "Scalable parallel debugging with statistical assertions", journal = j-SIGPLAN, volume = "47", number = "8", pages = "311--312", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145870", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Traditional debuggers are of limited value for modern scientific codes that manipulate large complex data structures. This paper discusses a novel debug-time assertion, called a ``Statistical Assertion'', that allows a user to reason about large data structures, and the primitives are parallelised to provide an efficient solution. We present the design and implementation of statistical assertions, and illustrate the debugging technique with a molecular dynamics simulation. We evaluate the performance of the tool on a 12,000 cores Cray XE6.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Malkis:2012:VSB, author = "Alexander Malkis and Anindya Banerjee", title = "Verification of software barriers", journal = j-SIGPLAN, volume = "47", number = "8", pages = "313--314", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145871", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "This paper describes frontiers in verification of the software barrier synchronization primitive. So far most software barrier algorithms have not been mechanically verified. We show preliminary results in automatically proving the correctness of the major software barriers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mittal:2012:CAS, author = "Anshul Mittal and Nikhil Jain and Thomas George and Yogish Sabharwal and Sameer Kumar", title = "Collective algorithms for sub-communicators", journal = j-SIGPLAN, volume = "47", number = "8", pages = "315--316", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145872", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Collective communication over a group of processors is an integral and time consuming component in many HPC applications. Many modern day supercomputers are based on torus interconnects. On such systems, for an irregular communicator comprising of a subset of processors, the algorithms developed so far are not contention free in general and hence non-optimal. In this paper, we present a novel contention-free algorithm to perform collective operations over a subset of processors in a torus network. We also extend previous work on regular communicators to handle special cases of irregular communicators that occur frequently in parallel scientific applications. For the generic case where multiple node disjoint sub-communicators communicate simultaneously in a loosely synchronous fashion, we propose a novel cooperative approach to route the data for individual sub-communicators without contention. Empirical results demonstrate that our algorithms outperform the optimized MPI collective implementation on IBM's Blue Gene/P supercomputer for large data sizes and random node distributions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DeKoster:2012:SVE, author = "Joeri {De Koster} and Stefan Marr and Theo D'Hondt", title = "Synchronization views for event-loop actors", journal = j-SIGPLAN, volume = "47", number = "8", pages = "317--318", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145873", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The actor model has already proven itself as an interesting concurrency model that avoids issues such as deadlocks and race conditions by construction, and thus facilitates concurrent programming. The tradeoff is that it sacrifices expressiveness and efficiency especially with respect to data parallelism. However, many standard solutions to computationally expensive problems employ data parallel algorithms for better performance on parallel systems. We identified three problems that inhibit the use of data-parallel algorithms within the actor model. Firstly, one of the main properties of the actor model, the fact that no data is shared, is one of the most severe performance bottlenecks. Especially the fact that shared state can not be read truly in parallel. Secondly, the actor model on its own does not provide a mechanism to specify extra synchronization conditions on batches of messages which leads to event-level data-races. And lastly, programmers are forced to write code in a continuation-passing style (CPS) to handle typical request-response situations. However, CPS breaks the sequential flow of the code and is often hard to understand, which increases complexity and lowers maintainability. We proposes synchronization views to solve these three issues without compromising the semantic properties of the actor model. Thus, the resulting concurrency model maintains deadlock-freedom, avoids low-level race conditions, and keeps the semantics of macro-step execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Metreveli:2012:CCP, author = "Zviad Metreveli and Nickolai Zeldovich and M. Frans Kaashoek", title = "{CPHASH}: a cache-partitioned hash table", journal = j-SIGPLAN, volume = "47", number = "8", pages = "319--320", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145874", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "CPHash is a concurrent hash table for multicore processors. CPHash partitions its table across the caches of cores and uses message passing to transfer lookups\slash inserts to a partition. CPHash's message passing avoids the need for locks, pipelines batches of asynchronous messages, and packs multiple messages into a single cache line transfer. Experiments on a 80-core machine with 2 hardware threads per core show that CPHash has $ \approx 1.6 \times $ higher throughput than a hash table implemented using fine-grained locks. An analysis shows that CPHash wins because it experiences fewer cache misses and its cache misses are less expensive, because of less contention for the on-chip interconnect and DRAM. CPServer, a key\slash value cache server using CPHash, achieves $ \approx 5 \% $ higher throughput than a key\slash value cache server that uses a hash table with fine-grained locks, but both achieve better throughput and scalability than memcached. The throughput of CPHash and CPServer also scale near-linearly with the number of cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wernsing:2012:RHA, author = "John R. Wernsing and Greg Stitt", title = "{RACECAR}: a heuristic for automatic function specialization on multi-core heterogeneous systems", journal = j-SIGPLAN, volume = "47", number = "8", pages = "321--322", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145875", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "High-performance computing systems increasingly combine multi-core processors and heterogeneous resources such as graphics-processing units and field-programmable gate arrays. However, significant application design complexity for such systems has often led to untapped performance potential. Application designers targeting such systems currently must determine how to parallelize computation, create device-specialized implementations for each heterogeneous resource, and determine how to partition work for each resource. In this paper, we present the RACECAR heuristic to automate the optimization of applications for multi-core heterogeneous systems by automatically exploring implementation alternatives that include different algorithms, parallelization strategies, and work distributions. Experimental results show RACECAR-specialized implementations achieve speedups up to 117x and average 11x compared to a single CPU thread when parallelizing computation across multiple cores, graphics-processing units, and field-programmable gate arrays.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2012:LFA, author = "Yujie Liu and Michael Spear", title = "A lock-free, array-based priority queue", journal = j-SIGPLAN, volume = "47", number = "8", pages = "323--324", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145876", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Noll:2012:IDO, author = "Albert Noll and Thomas R. Gross", title = "An infrastructure for dynamic optimization of parallel programs", journal = j-SIGPLAN, volume = "47", number = "8", pages = "325--326", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145877", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Object-oriented programming languages like Java provide only low-level constructs (e.g., starting a thread) to describe concurrency. High-level abstractions (e.g., thread pools) are merely provided as a library. As a result, a compiler is not aware of the high-level semantics of a parallel library and therefore misses important optimization opportunities. This paper presents a simple source language extension based on which a compiler is provided with the opportunity to perform new optimizations that are particularly effective for parallel code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kjolstad:2012:ADG, author = "Fredrik Kjolstad and Torsten Hoefler and Marc Snir", title = "Automatic datatype generation and optimization", journal = j-SIGPLAN, volume = "47", number = "8", pages = "327--328", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145878", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Many high performance applications spend considerable time packing noncontiguous data into contiguous communication buffers. MPI Datatypes provide an alternative by describing noncontiguous data layouts. This allows sophisticated hardware to retrieve data directly from application data structures. However, packing codes in real-world applications are often complex and specifying equivalent datatypes is difficult, time-consuming, and error prone. We present an algorithm that automates the transformation. We have implemented the algorithm in a tool that transforms packing code to MPI Datatypes, and evaluated it by transforming 90 packing codes from the NAS Parallel Benchmarks. The transformation allows easy porting of applications to new machines that benefit from datatypes, thus improving programmer productivity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Burnim:2012:NIN, author = "Jacob Burnim and Tayfun Elmas and George Necula and Koushik Sen", title = "{NDetermin}: inferring nondeterministic sequential specifications for parallelism correctness", journal = j-SIGPLAN, volume = "47", number = "8", pages = "329--330", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145879", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Nondeterministic Sequential (NDSeq) specifications have been proposed as a means for separating the testing, debugging, and verifying of a program's parallelism correctness and its sequential functional correctness. In this work, we present a technique that, given a few representative executions of a parallel program, combines dynamic data flow analysis and Minimum-Cost Boolean Satisfiability (MinCostSAT) solving for automatically inferring a likely NDSeq specification for the parallel program. For a number of Java benchmarks, our tool NDetermin infers equivalent or stronger NDSeq specifications than those previously written manually.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Park:2012:CB, author = "Chang-Seo Park and Koushik Sen", title = "Concurrent breakpoints", journal = j-SIGPLAN, volume = "47", number = "8", pages = "331--332", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145880", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In program debugging, reproducibility of bugs is a key requirement. Unfortunately, bugs in concurrent programs are notoriously difficult to reproduce because bugs due to concurrency happen under very specific thread schedules and the likelihood of taking such corner-case schedules during regular testing is very low. We propose concurrent breakpoints, a light-weight and programmatic way to make a concurrency bug reproducible. We describe a mechanism that helps to hit a concurrent breakpoint in a concurrent execution with high probability. We have implemented concurrent breakpoints as a light-weight library for Java and C/C++ programs. We have used the implementation to deterministically reproduce several known non-deterministic bugs in real-world concurrent Java and C/C++ programs with almost 100\% probability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Stone:2012:EMP, author = "Andrew Stone and John Dennis and Michelle Strout", title = "Establishing a {Miniapp} as a programmability proxy", journal = j-SIGPLAN, volume = "47", number = "8", pages = "333--334", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145881", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Miniapps serve as test beds for prototyping and evaluating new algorithms, data structures, and programming models before incorporating such changes into larger applications. For the miniapp to accurately predict how a prototyped change would affect a larger application it is necessary that the miniapp be shown to serve as a proxy for that larger application. Although many benchmarks claim to proxy the performance for a set of large applications, little work has explored what criteria must be met for a benchmark to serve as a proxy for examining programmability. In this poster we describe criteria that can be used to establish that a miniapp serves as a performance and programmability proxy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jiang:2012:OSP, author = "Lei Jiang and Pragneshkumar B. Patel and George Ostrouchov and Ferdinand Jamitzky", title = "{OpenMP}-style parallelism in data-centered multicore computing with {R}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "335--336", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145882", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "R$^1$ is a domain specific language widely used for data analysis by the statistics community as well as by researchers in finance, biology, social sciences, and many other disciplines. As R programs are linked to input data, the exponential growth of available data makes high-performance computing with R imperative. To ease the process of writing parallel programs in R, code transformation from a sequential program to a parallel version would bring much convenience to R users. In this paper, we present our work in semi-automatic parallelization of R codes with user-added OpenMP-style pragmas. While such pragmas are used at the frontend, we take advantage of multiple parallel backends with different R packages. We provide flexibility for importing parallelism with plug-in components, impose built-in MapReduce for data processing, and also maintain code reusability. We illustrate the advantage of the on-the-fly mechanisms which can lead to significant applications in data-centered parallel computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Caniou:2012:PAP, author = "Yves Caniou and Daniel Diaz and Florian Richoux and Philippe Codognet and Salvador Abreu", title = "Performance analysis of parallel constraint-based local search", journal = j-SIGPLAN, volume = "47", number = "8", pages = "337--338", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145883", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We present a parallel implementation of a constraint-based local search algorithm and investigate its performance results for hard combinatorial optimization problems on two different platforms up to several hundreds of cores. On a variety of classical CSPs benchmarks, speedups are very good for a few tens of cores, and good up to a hundred cores. More challenging problems derived from real-life applications (Costas array) shows even better speedups, nearly optimal up to 256 cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Thiemann:2012:ACE, author = "Peter Thiemann", title = "{AGDA}-curious?: an exploration of programming with dependent types", journal = j-SIGPLAN, volume = "47", number = "9", pages = "1--2", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "I explore programming with the dependently typed functional language, AGDA. I present the progress which AGDA has made, demonstrate its usage in a small development, reflect critically on the state of the art, and speculate about the way ahead. I do not seek to persuade you to adopt AGDA as your primary tool for systems development, but argue that AGDA stimulates new useful ways to think about programming problems and deserves not just curiosity but interest, support and contribution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Stewart:2012:VHT, author = "Gordon Stewart and Lennart Beringer and Andrew W. Appel", title = "Verified heap theorem prover by paramodulation", journal = j-SIGPLAN, volume = "47", number = "9", pages = "3--14", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present VeriStar, a verified theorem prover for a decidable subset of separation logic. Together with VeriSmall [3], a proved-sound Smallfoot-style program analysis for C minor, VeriStar demonstrates that fully machine-checked static analyses equipped with efficient theorem provers are now within the reach of formal methods. As a pair, VeriStar and VeriSmall represent the first application of the Verified Software Toolchain [4], a tightly integrated collection of machine-verified program logics and compilers giving foundational correctness guarantees. VeriStar is (1) purely functional, (2) machine-checked, (3) end-to-end, (4) efficient and (5) modular. By purely functional, we mean it is implemented in Gallina, the pure functional programming language embedded in the Coq theorem prover. By machine-checked, we mean it has a proof in Coq that when the prover says ``valid'', the checked entailment holds in a proved-sound separation logic for C minor. By end-to-end, we mean that when the static analysis+theorem prover says a C minor program is safe, the program will be compiled to a semantically equivalent assembly program that runs on real hardware. By efficient, we mean that the prover implements a state-of-the-art algorithm for deciding heap entailments and uses highly tuned verified functional data structures. By modular, we mean that VeriStar can be retrofitted to other static analyses as a plug-compatible entailment checker and its soundness proof can easily be ported to other separation logics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Huffman:2012:FVM, author = "Brian Huffman", title = "Formal verification of monad transformers", journal = j-SIGPLAN, volume = "47", number = "9", pages = "15--16", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364532", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present techniques for reasoning about constructor classes that (like the monad class) fix polymorphic operations and assert polymorphic axioms. We do not require a logic with first-class type constructors, first-class polymorphism, or type quantification; instead, we rely on a domain-theoretic model of the type system in a universal domain to provide these features. These ideas are implemented in the Tycon library for the Isabelle theorem prover, which builds on the HOLCF library of domain theory. The Tycon library provides various axiomatic type constructor classes, including functors and monads. It also provides automation for instantiating those classes, and for defining further subclasses. We use the Tycon library to formalize three Haskell monad transformers: the error transformer, the writer transformer, and the resumption transformer. The error and writer transformers do not universally preserve the monad laws; however, we establish datatype invariants for each, showing that they are valid monads when viewed as abstract datatypes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Dunfield:2012:EIU, author = "Joshua Dunfield", title = "Elaborating intersection and union types", journal = j-SIGPLAN, volume = "47", number = "9", pages = "17--28", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Designing and implementing typed programming languages is hard. Every new type system feature requires extending the metatheory and implementation, which are often complicated and fragile. To ease this process, we would like to provide general mechanisms that subsume many different features. In modern type systems, parametric polymorphism is fundamental, but intersection polymorphism has gained little traction in programming languages. Most practical intersection type systems have supported only refinement intersections, which increase the expressiveness of types (more precise properties can be checked) without altering the expressiveness of terms; refinement intersections can simply be erased during compilation. In contrast, unrestricted intersections increase the expressiveness of terms, and can be used to encode diverse language features, promising an economy of both theory and implementation. We describe a foundation for compiling unrestricted intersection and union types: an elaboration type system that generates ordinary $ \lambda $-calculus terms. The key feature is a Forsythe-like merge construct. With this construct, not all reductions of the source program preserve types; however, we prove that ordinary call-by-value evaluation of the elaborated program corresponds to a type-preserving evaluation of the source program. We also describe a prototype implementation and applications of unrestricted intersections and unions: records, operator overloading, and simulating dynamic typing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Chen:2012:ETT, author = "Sheng Chen and Martin Erwig and Eric Walkingshaw", title = "An error-tolerant type system for variational lambda calculus", journal = j-SIGPLAN, volume = "47", number = "9", pages = "29--40", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Conditional compilation and software product line technologies make it possible to generate a huge number of different programs from a single software project. Typing each of these programs individually is usually impossible due to the sheer number of possible variants. Our previous work has addressed this problem with a type system for variational lambda calculus (VLC), an extension of lambda calculus with basic constructs for introducing and organizing variation. Although our type inference algorithm is more efficient than the brute-force strategy of inferring the types of each variant individually, it is less robust since type inference will fail for the entire variational expression if any one variant contains a type error. In this work, we extend our type system to operate on VLC expressions containing type errors. This extension directly supports locating ill-typed variants and the incremental development of variational programs. It also has many subtle implications for the unification of variational types. We show that our extended type system possesses a principal typing property and that the underlying unification problem is unitary. Our unification algorithm computes partial unifiers that lead to result types that (1) contain errors in as few variants as possible and (2) are most general. Finally, we perform an empirical evaluation to determine the overhead of this extension compared to our previous work, to demonstrate the improvements over the brute-force approach, and to explore the effects of various error distributions on the inference process.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Krishnaswami:2012:SST, author = "Neelakantan R. Krishnaswami and Aaron Turon and Derek Dreyer and Deepak Garg", title = "Superficially substructural types", journal = j-SIGPLAN, volume = "47", number = "9", pages = "41--54", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many substructural type systems have been proposed for controlling access to shared state in higher-order languages. Central to these systems is the notion of a *resource*, which may be split into disjoint pieces that different parts of a program can manipulate independently without worrying about interfering with one another. Some systems support a *logical* notion of resource (such as permissions), under which two resources may be considered disjoint even if they govern the *same* piece of state. However, in nearly all existing systems, the notions of resource and disjointness are fixed at the outset, baked into the model of the language, and fairly coarse-grained in the kinds of sharing they enable. In this paper, inspired by recent work on ``fictional disjointness'' in separation logic, we propose a simple and flexible way of enabling any module in a program to create its own custom type of splittable resource (represented as a commutative monoid), thus providing fine-grained control over how the module's private state is shared with its clients. This functionality can be incorporated into an otherwise standard substructural type system by means of a new typing rule we call *the sharing rule*, whose soundness we prove semantically via a novel resource-oriented Kripke logical relation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Mitchell:2012:SBB, author = "Neil Mitchell", title = "Shake before building: replacing {\tt make} with {Haskell}", journal = j-SIGPLAN, volume = "47", number = "9", pages = "55--66", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364538", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most complex software projects are compiled using a build tool (e.g. make), which runs commands in an order satisfying user-defined dependencies. Unfortunately, most build tools require all dependencies to be specified before the build starts. This restriction makes many dependency patterns difficult to express, especially those involving files generated at build time. We show how to eliminate this restriction, allowing additional dependencies to be specified while building. We have implemented our ideas in the Haskell library Shake, and have used Shake to write a complex build system which compiles millions of lines of code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Chitil:2012:PTL, author = "Olaf Chitil", title = "Practical typed lazy contracts", journal = j-SIGPLAN, volume = "47", number = "9", pages = "67--76", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Until now there has been no support for specifying and enforcing contracts within a lazy functional program. That is a shame, because contracts consist of pre- and post-conditions for functions that go beyond the standard static types. This paper presents the design and implementation of a small, easy-to-use, purely functional contract library for Haskell, which, when a contract is violated, also provides more useful information than the classical blaming of one contract partner. From now on lazy functional languages can profit from the assurances in the development of correct programs that contracts provide.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Oliveira:2012:FPS, author = "Bruno C.d.S. Oliveira and William R. Cook", title = "Functional programming with structured graphs", journal = j-SIGPLAN, volume = "47", number = "9", pages = "77--88", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a new functional programming model for graph structures called structured graphs. Structured graphs extend conventional algebraic datatypes with explicit definition and manipulation of cycles and/or sharing, and offer a practical and convenient way to program graphs in functional programming languages like Haskell. The representation of sharing and cycles (edges) employs recursive binders and uses an encoding inspired by parametric higher-order abstract syntax. Unlike traditional approaches based on mutable references or node/edge lists, well-formedness of the graph structure is ensured statically and reasoning can be done with standard functional programming techniques. Since the binding structure is generic, we can define many useful generic combinators for manipulating structured graphs. We give applications and show how to reason about structured graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Sheard:2012:PPC, author = "Timothy E. Sheard", title = "Painless programming combining reduction and search: design principles for embedding decision procedures in high-level languages", journal = j-SIGPLAN, volume = "47", number = "9", pages = "89--102", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364542", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe the Funlogic system which extends a functional language with existentially quantified declarations. An existential declaration introduces a variable and a set of constraints that its value should meet. Existential variables are bound to conforming values by a decision procedure. Funlogic embeds multiple external decision procedures using a common framework. Design principles for embedding decision procedures are developed and illustrated for three different decision procedures from widely varying domains.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Dagand:2012:TFA, author = "Pierre-Evariste Dagand and Conor McBride", title = "Transporting functions across ornaments", journal = j-SIGPLAN, volume = "47", number = "9", pages = "103--114", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364544", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming with dependent types is a blessing and a curse. It is a blessing to be able to bake invariants into the definition of datatypes: we can finally write correct-by-construction software. However, this extreme accuracy is also a curse: a datatype is the combination of a structuring medium together with a special purpose logic. These domain-specific logics hamper any effort of code reuse among similarly structured data. In this paper, we exorcise our datatypes by adapting the notion of ornament to our universe of inductive families. We then show how code reuse can be achieved by ornamenting functions. Using these functional ornaments, we capture the relationship between functions such as the addition of natural numbers and the concatenation of lists. With this knowledge, we demonstrate how the implementation of the former informs the implementation of the latter: the user can ask the definition of addition to be lifted to lists and she will only be asked the details necessary to carry on adding lists rather than numbers. Our presentation is formalised in a type theory with a universe of datatypes and all our constructions have been implemented as generic programs, requiring no extension to the type theory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Myreen:2012:PPS, author = "Magnus O. Myreen and Scott Owens", title = "Proof-producing synthesis of {ML} from higher-order logic", journal = j-SIGPLAN, volume = "47", number = "9", pages = "115--126", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The higher-order logic found in proof assistants such as Coq and various HOL systems provides a convenient setting for the development and verification of pure functional programs. However, to efficiently run these programs, they must be converted (or ``extracted'') to functional programs in a programming language such as ML or Haskell. With current techniques, this step, which must be trusted, relates similar looking objects that have very different semantic definitions, such as the set-theoretic model of a logic and the operational semantics of a programming language. In this paper, we show how to increase the trustworthiness of this step with an automated technique. Given a functional program expressed in higher-order logic, our technique provides the corresponding program for a functional language defined with an operational semantics, and it provides a mechanically checked theorem relating the two. This theorem can then be used to transfer verified properties of the logical function to the program. We have implemented our technique in the HOL4 theorem prover, translating functions to a core subset of Standard ML, and have applied it to examples including functional data structures, a parser generator, cryptographic algorithms, and a garbage collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Danielsson:2012:OSU, author = "Nils Anders Danielsson", title = "Operational semantics using the partiality monad", journal = j-SIGPLAN, volume = "47", number = "9", pages = "127--138", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364546", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The operational semantics of a partial, functional language is often given as a relation rather than as a function. The latter approach is arguably more natural: if the language is functional, why not take advantage of this when defining the semantics? One can immediately see that a functional semantics is deterministic and, in a constructive setting, computable. This paper shows how one can use the coinductive partiality monad to define big-step or small-step operational semantics for lambda-calculi and virtual machines as total, computable functions (total definitional interpreters). To demonstrate that the resulting semantics are useful type soundness and compiler correctness results are also proved. The results have been implemented and checked using Agda, a dependently typed programming language and proof assistant.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Olukotun:2012:HPE, author = "Kunle Olukotun", title = "High performance embedded domain specific languages", journal = j-SIGPLAN, volume = "47", number = "9", pages = "139--140", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today, all high-performance computer architectures are parallel and heterogeneous; a combination of multiple CPUs, GPUs and specialized processors. This creates a complex programming problem for application developers. Domain-specific languages (DSLs) are a promising solution to this problem because they provide an avenue for application-specific abstractions to be mapped directly to low level architecture-specific programming models providing high programmer productivity and high execution performance. In this talk I will describe our approach to building high performance DSLs, which is based on embedding in Scala, light-weight modular staging and a DSL infrastructure called Delite. I will describe how we transform impure functional programs into efficient first-order low-level code using domain specific optimization, parallelism optimization, locality optimization, scalar optimization, and architecture-specific code generation. All optimizations and transformations are implemented in an extensible DSL compiler architecture that minimizes the programmer effort required to develop a new DSL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Severi:2012:PTS, author = "Paula G. Severi and Fer-Jan J. de Vries", title = "Pure type systems with corecursion on streams: from finite to infinitary normalisation", journal = j-SIGPLAN, volume = "47", number = "9", pages = "141--152", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364550", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we use types for ensuring that programs involving streams are well-behaved. We extend pure type systems with a type constructor for streams, a modal operator next and a fixed point operator for expressing corecursion. This extension is called Pure Type Systems with Corecursion (CoPTS). The typed lambda calculus for reactive programs defined by Krishnaswami and Benton can be obtained as a CoPTS. CoPTSs allow us to study a wide range of typed lambda calculi extended with corecursion using only one framework. In particular, we study this extension for the calculus of constructions which is the underlying formal language of Coq. We use the machinery of infinitary rewriting and formalise the idea of well-behaved programs using the concept of infinitary normalisation. The set of finite and infinite terms is defined as a metric completion. We establish a precise connection between the modal operator (o A ) and the metric at a syntactic level by relating a variable of type (o A ) with the depth of all its occurrences in a term. This syntactic connection between the modal operator and the depth is the key to the proofs of infinitary weak and strong normalisation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Endrullis:2012:CES, author = "J{\"o}rg Endrullis and Dimitri Hendriks and Rena Bakhshi", title = "On the complexity of equivalence of specifications of infinite objects", journal = j-SIGPLAN, volume = "47", number = "9", pages = "153--164", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364551", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study the complexity of deciding the equality of infinite objects specified by systems of equations, and of infinite objects specified by $ \lambda $-terms. For equational specifications there are several natural notions of equality: equality in all models, equality of the sets of solutions, and equality of normal forms for productive specifications. For -$ \lambda $ terms we investigate B{\"o}hm-tree equality and various notions of observational equality. We pinpoint the complexity of each of these notions in the arithmetical or analytical hierarchy. We show that the complexity of deciding equality in all models subsumes the entire analytical hierarchy. This holds already for the most simple infinite objects, viz. streams over $ \{ 0, 1 \} $, and stands in sharp contrast to the low arithmetical {$ \Pi^0_2 $}-completeness of equality of equationally specified streams derived in [17] employing a different notion of equality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Simoes:2012:AAA, author = "Hugo Sim{\~o}es and Pedro Vasconcelos and M{\'a}rio Florido and Steffen Jost and Kevin Hammond", title = "Automatic amortised analysis of dynamic memory allocation for lazy functional programs", journal = j-SIGPLAN, volume = "47", number = "9", pages = "165--176", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364575", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes the first successful attempt, of which we are aware, to define an automatic, type-based static analysis of resource bounds for lazy functional programs. Our analysis uses the automatic amortisation approach developed by Hofmann and Jost, which was previously restricted to eager evaluation. In this paper, we extend this work to a lazy setting by capturing the costs of unevaluated expressions in type annotations and by amortising the payment of these costs using a notion of lazy potential. We present our analysis as a proof system for predicting heap allocations of a minimal functional language (including higher-order functions and recursive data types) and define a formal cost model based on Launchbury's natural semantics for lazy evaluation. We prove the soundness of our analysis with respect to the cost model. Our approach is illustrated by a number of representative and non-trivial examples that have been analysed using a prototype implementation of our analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Earl:2012:IPA, author = "Christopher Earl and Ilya Sergey and Matthew Might and David {Van Horn}", title = "Introspective pushdown analysis of higher-order programs", journal = j-SIGPLAN, volume = "47", number = "9", pages = "177--188", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364576", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the static analysis of functional programs, pushdown flow analysis and abstract garbage collection skirt just inside the boundaries of soundness and decidability. Alone, each method reduces analysis times and boosts precision by orders of magnitude. This work illuminates and conquers the theoretical challenges that stand in the way of combining the power of these techniques. The challenge in marrying these techniques is not subtle: computing the reachable control states of a pushdown system relies on limiting access during transition to the top of the stack; abstract garbage collection, on the other hand, needs full access to the entire stack to compute a root set, just as concrete collection does. Introspective pushdown systems resolve this conflict. Introspective pushdown systems provide enough access to the stack to allow abstract garbage collection, but they remain restricted enough to compute control-state reachability, thereby enabling the sound and precise product of pushdown analysis and abstract garbage collection. Experiments reveal synergistic interplay between the techniques, and the fusion demonstrates ``better-than-both-worlds'' precision.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Launchbury:2012:ELT, author = "John Launchbury and Iavor S. Diatchki and Thomas DuBuisson and Andy Adams-Moran", title = "Efficient lookup-table protocol in secure multiparty computation", journal = j-SIGPLAN, volume = "47", number = "9", pages = "189--200", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364556", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Secure multiparty computation (SMC) permits a collection of parties to compute a collaborative result, without any of the parties gaining any knowledge about the inputs provided by other parties. Specifications for SMC are commonly presented as boolean circuits, where optimizations come mostly from reducing the number of multiply-operations (including and -gates) --- these are the operations which incur significant cost, either in computation overhead or in communication between the parties. Instead, we take a language-oriented approach, and consequently are able to explore many other kinds of optimizations. We present an efficient and general purpose SMC table-lookup algorithm that can serve as a direct alternative to circuits. Looking up a private (i.e. shared, or encrypted) n -bit argument in a public table requires log(n) parallel-and operations. We use the advanced encryption standard algorithm (AES) as a driving motivation, and by introducing different kinds of parallelization techniques, produce the fastest current SMC implementation of AES, improving the best previously reported results by well over an order of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Stefan:2012:ACT, author = "Deian Stefan and Alejandro Russo and Pablo Buiras and Amit Levy and John C. Mitchell and David Mazi{\'e}res", title = "Addressing covert termination and timing channels in concurrent information flow systems", journal = j-SIGPLAN, volume = "47", number = "9", pages = "201--214", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When termination of a program is observable by an adversary, confidential information may be leaked by terminating accordingly. While this termination covert channel has limited bandwidth for sequential programs, it is a more dangerous source of information leakage in concurrent settings. We address concurrent termination and timing channels by presenting a dynamic information-flow control system that mitigates and eliminates these channels while allowing termination and timing to depend on secret values. Intuitively, we leverage concurrency by placing such potentially sensitive actions in separate threads. While termination and timing of these threads may expose secret values, our system requires any thread observing these properties to raise its information-flow label accordingly, preventing leaks to lower-labeled contexts. We implement this approach in a Haskell library and demonstrate its applicability by building a web server that uses information-flow control to restrict untrusted web applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{zuSiederdissen:2012:SAC, author = "Christian H{\"o}ner zu Siederdissen", title = "Sneaking around {concatMap}: efficient combinators for dynamic programming", journal = j-SIGPLAN, volume = "47", number = "9", pages = "215--226", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364559", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a framework of dynamic programming combinators that provides a high-level environment to describe the recursions typical of dynamic programming over sequence data in a style very similar to algebraic dynamic programming (ADP). Using a combination of type-level programming and stream fusion leads to a substantial increase in performance, without sacrificing much of the convenience and theoretical underpinnings of ADP. We draw examples from the field of computational biology, more specifically RNA secondary structure prediction, to demonstrate how to use these combinators and what differences exist between this library, ADP, and other approaches. The final version of the combinator library allows writing algorithms with performance close to hand-optimized C code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Daniels:2012:ERH, author = "Noah M. Daniels and Andrew Gallant and Norman Ramsey", title = "Experience report: {Haskell} in computational biology", journal = j-SIGPLAN, volume = "47", number = "9", pages = "227--234", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364560", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Haskell gives computational biologists the flexibility and rapid prototyping of a scripting language, plus the performance of native code. In our experience, higher-order functions, lazy evaluation, and monads really worked, but profiling and debugging presented obstacles. Also, Haskell libraries vary greatly: memoization combinators and parallel-evaluation strategies helped us a lot, but other, nameless libraries mostly got in our way. Despite the obstacles and the uncertain quality of some libraries, Haskell's ecosystem made it easy for us to develop new algorithms in computational biology.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Foltzer:2012:MSP, author = "Adam Foltzer and Abhishek Kulkarni and Rebecca Swords and Sajith Sasidharan and Eric Jiang and Ryan Newton", title = "A meta-scheduler for the par-monad: composable scheduling for the heterogeneous cloud", journal = j-SIGPLAN, volume = "47", number = "9", pages = "235--246", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364562", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern parallel computing hardware demands increasingly specialized attention to the details of scheduling and load balancing across heterogeneous execution resources that may include GPU and cloud environments, in addition to traditional CPUs. Many existing solutions address the challenges of particular resources, but do so in isolation, and in general do not compose within larger systems. We propose a general, composable abstraction for execution resources, along with a continuation-based meta-scheduler that harnesses those resources in the context of a deterministic parallel programming library for Haskell. We demonstrate performance benefits of combined CPU/GPU scheduling over either alone, and of combined multithreaded/distributed scheduling over existing distributed programming approaches for Haskell.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Bergstrom:2012:NDP, author = "Lars Bergstrom and John Reppy", title = "Nested data-parallelism on the {GPU}", journal = j-SIGPLAN, volume = "47", number = "9", pages = "247--258", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics processing units (GPUs) provide both memory bandwidth and arithmetic performance far greater than that available on CPUs but, because of their Single-Instruction-Multiple-Data (SIMD) architecture, they are hard to program. Most of the programs ported to GPUs thus far use traditional data-level parallelism, performing only operations that operate uniformly over vectors. NESL is a first-order functional language that was designed to allow programmers to write irregular-parallel programs --- such as parallel divide-and-conquer algorithms --- for wide-vector parallel computers. This paper presents our port of the NESL implementation to work on GPUs and provides empirical evidence that nested data-parallelism (NDP) on GPUs significantly outperforms CPU-based implementations and matches or beats newer GPU languages that support only flat parallelism. While our performance does not match that of hand-tuned CUDA programs, we argue that the notational conciseness of NESL is worth the loss in performance. This work provides the first language implementation that directly supports NDP on a GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Lippmeier:2012:WEH, author = "Ben Lippmeier and Manuel M. T. Chakravarty and Gabriele Keller and Roman Leshchinskiy and Simon Peyton Jones", title = "Work efficient higher-order vectorisation", journal = j-SIGPLAN, volume = "47", number = "9", pages = "259--270", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364564", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing approaches to higher-order vectorisation, also known as flattening nested data parallelism, do not preserve the asymptotic work complexity of the source program. Straightforward examples, such as sparse matrix-vector multiplication, can suffer a severe blow-up in both time and space, which limits the practicality of this method. We discuss why this problem arises, identify the mis-handling of index space transforms as the root cause, and present a solution using a refined representation of nested arrays. We have implemented this solution in Data Parallel Haskell (DPH) and present benchmarks showing that realistic programs, which used to suffer the blow-up, now have the correct asymptotic work complexity. In some cases, the asymptotic complexity of the vectorised program is even better than the original.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Sewell:2012:TJ, author = "Peter Sewell", title = "Tales from the jungle", journal = j-SIGPLAN, volume = "47", number = "9", pages = "271--272", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364566", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We rely on a computational infrastructure that is a densely interwined mass of software and hardware: programming languages, network protocols, operating systems, and processors. It has accumulated great complexity, from a combination of engineering design decisions, contingent historical choices, and sheer scale, yet it is defined at best by prose specifications, or, all too often, just by the common implementations. Can we do better? More specifically, can we apply rigorous methods to this mainstream infrastructure, taking the accumulated complexity seriously, and if we do, does it help? My colleagues and I have looked at these questions in several contexts: the TCP/IP network protocols with their Sockets API; programming language design, including the Java module system and the C11/C++11 concurrency model; the hardware concurrency behaviour of x86, IBM POWER, and ARM multiprocessors; and compilation of concurrent code. In this talk I will draw some lessons from what did and did not succeed, looking especially at the empirical nature of some of the work, at the social process of engagement with the various different communities, and at the mathematical and software tools we used. Domain-specific modelling languages (based on functional programming ideas) and proof assistants were invaluable for working with the large and loose specifications involved: idioms within HOL4 for TCP, our Ott tool for programming language specification, and Owens's Lem tool for portable semantic definitions, with HOL4, Isabelle, and Coq, for the relaxed-memory concurrency semantics work. Our experience with these suggests something of what is needed to make full-scale rigorous semantics a commonplace reality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Wadler:2012:PS, author = "Philip Wadler", title = "Propositions as sessions", journal = j-SIGPLAN, volume = "47", number = "9", pages = "273--286", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364568", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Continuing a line of work by Abramsky (1994), by Bellin and Scott (1994), and by Caires and Pfenning (2010), among others, this paper presents CP, a calculus in which propositions of classical linear logic correspond to session types. Continuing a line of work by Honda (1993), by Honda, Kubo, and Vasconcelos (1998), and by Gay and Vasconcelos (2010), among others, this paper presents GV, a linear functional language with session types, and presents a translation from GV into CP. The translation formalises for the first time a connection between a standard presentation of session types and linear logic, and shows how a modification to the standard presentation yield a language free from deadlock, where deadlock freedom follows from the correspondence to linear logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Henry:2012:TUM, author = "Gr{\'e}goire Henry and Michel Mauny and Emmanuel Chailloux and Pascal Manoury", title = "Typing unmarshalling without marshalling types", journal = j-SIGPLAN, volume = "47", number = "9", pages = "287--298", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Unmarshalling primitives in statically typed language require, in order to preserve type safety, to dynamically verify the compatibility between the incoming values and the statically expected type. In the context of programming languages based on parametric polymorphism and uniform data representation, we propose a relation of compatibility between (unmarshalled) memory graphs and types. It is defined as constraints over nodes of the memory graph. Then, we propose an algorithm to check the compatibility between a memory graph and a type. It is described as a constraint solver based on a rewriting system. We have shown that the proposed algorithm is sound and semi-complete in presence of algebraic data types, mutable data, polymorphic sharing, cycles, and functional values, however, in its general form, it may not terminate. We have implemented a prototype tailored for the OCaml compiler [17] that always terminates and still seems sufficiently complete in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Jones:2012:DD, author = "Will Jones and Tony Field and Tristan Allwood", title = "Deconstraining {DSLs}", journal = j-SIGPLAN, volume = "47", number = "9", pages = "299--310", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364571", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Strongly-typed functional languages provide a powerful framework for embedding Domain-Specific Languages (DSLs). However, building type-safe functions defined over an embedded DSL can introduce application-specific type constraints that end up being imposed on the DSL data types themselves. At best, these constraints are unwieldy and at worst they can limit the range of DSL expressions that can be built. We present a simple solution to this problem that allows application-specific constraints to be specified at the point of use of a DSL expression rather than when the DSL's embedding types are defined. Our solution applies equally to both tagged and tagless representations and, importantly, also works in the presence of higher-rank types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Mainland:2012:EHM, author = "Geoffrey Mainland", title = "Explicitly heterogeneous metaprogramming with {MetaHaskell}", journal = j-SIGPLAN, volume = "47", number = "9", pages = "311--322", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364572", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Languages with support for metaprogramming, like MetaOCaml, offer a principled approach to code generation by guaranteeing that well-typed metaprograms produce well-typed programs. However, many problem domains where metaprogramming can fruitfully be applied require generating code in languages like C, CUDA, or assembly. Rather than resorting to add-hoc code generation techniques, these applications should be directly supported by explicitly heterogeneous metaprogramming languages. We present MetaHaskell, an extension of Haskell 98 that provides modular syntactic and type system support for type safe metaprogramming with multiple object languages. Adding a new object language to MetaHaskell requires only minor modifications to the host language to support type-level quantification over object language types and propagation of type equality constraints. We demonstrate the flexibility of our approach through three object languages: a core ML language, a linear variant of the core ML language, and a subset of C. All three languages support metaprogramming with open terms and guarantee that well-typed MetaHaskell programs will only produce closed object terms that are well-typed. The essence of MetaHaskell is captured in a type system for a simplified metalanguage. MetaHaskell, as well as all three object languages, are fully implemented in the mhc bytecode compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Axelsson:2012:GAS, author = "Emil Axelsson", title = "A generic abstract syntax model for embedded languages", journal = j-SIGPLAN, volume = "47", number = "9", pages = "323--334", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364573", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Representing a syntax tree using a data type often involves having many similar-looking constructors. Functions operating on such types often end up having many similar-looking cases. Different languages often make use of similar-looking constructions. We propose a generic model of abstract syntax trees capable of representing a wide range of typed languages. Syntactic constructs can be composed in a modular fashion enabling reuse of abstract syntax and syntactic processing within and across languages. Building on previous methods of encoding extensible data types in Haskell, our model is a pragmatic solution to Wadler's ``expression problem''. Its practicality has been confirmed by its use in the implementation of the embedded language Feldspar.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Pike:2012:ERD, author = "Lee Pike and Nis Wegmann and Sebastian Niller and Alwyn Goodloe", title = "Experience report: a do-it-yourself high-assurance compiler", journal = j-SIGPLAN, volume = "47", number = "9", pages = "335--340", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364553", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded domain-specific languages (EDSLs) are an approach for quickly building new languages while maintaining the advantages of a rich metalanguage. We argue in this experience report that the ``EDSL approach'' can surprisingly ease the task of building a high-assurance compiler. We do not strive to build a fully formally-verified tool-chain, but take a ``do-it-yourself'' approach to increase our confidence in compiler-correctness without too much effort. Copilot is an EDSL developed by Galois, Inc. and the National Institute of Aerospace under contract to NASA for the purpose of runtime monitoring of flight-critical avionics. We report our experience in using type-checking, QuickCheck, and model-checking ``off-the-shelf'' to quickly increase confidence in our EDSL tool-chain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Vytiniotis:2012:EPD, author = "Dimitrios Vytiniotis and Simon Peyton Jones and Jos{\'e} Pedro Magalh{\~a}es", title = "Equality proofs and deferred type errors: a compiler pearl", journal = j-SIGPLAN, volume = "47", number = "9", pages = "341--352", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364554", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Glasgow Haskell Compiler is an optimizing compiler that expresses and manipulates first-class equality proofs in its intermediate language. We describe a simple, elegant technique that exploits these equality proofs to support deferred type errors. The technique requires us to treat equality proofs as possibly-divergent terms; we show how to do so without losing either soundness or the zero-overhead cost model that the programmer expects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Neatherway:2012:TBA, author = "Robin P. Neatherway and Steven J. Ramsay and Chih-Hao Luke Ong", title = "A traversal-based algorithm for higher-order model checking", journal = j-SIGPLAN, volume = "47", number = "9", pages = "353--364", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364578", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Higher-order model checking --- the model checking of trees generated by higher-order recursion schemes (HORS) --- is a natural generalisation of finite-state and pushdown model checking. Recent work has shown that it can serve as a basis for software model checking for functional languages such as ML and Haskell. In this paper, we introduce higher-order recursion schemes with cases (HORSC), which extend HORS with a definition-by-cases construct (to express program branching based on data) and non-determinism (to express abstractions of behaviours). This paper is a study of the universal HORSC model checking problem for deterministic trivial automata: does the automaton accept every tree in the tree language generated by the given HORSC? We first characterise the model checking problem by an intersection type system extended with a carefully restricted form of union types. We then present an algorithm for deciding the model checking problem, which is based on the notion of traversals induced by the fully abstract game semantics of these schemes, but presented as a goal-directed construction of derivations in the intersection and union type system. We view HORSC model checking as a suitable backend engine for an approach to verifying functional programs. We have implemented the algorithm in a tool called TravMC, and demonstrated its effectiveness on a test suite of programs, including abstract models of functional programs obtained via an abstraction-refinement procedure from pattern-matching recursion schemes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Perera:2012:FPE, author = "Roly Perera and Umut A. Acar and James Cheney and Paul Blain Levy", title = "Functional programs that explain their work", journal = j-SIGPLAN, volume = "47", number = "9", pages = "365--376", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364579", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present techniques that enable higher-order functional computations to ``explain'' their work by answering questions about how parts of their output were calculated. As explanations, we consider the traditional notion of program slices, which we show can be inadequate, and propose a new notion: trace slices. We present techniques for specifying flexible and rich slicing criteria based on partial expressions, parts of which have been replaced by holes. We characterise program slices in an algorithm-independent fashion and show that a least slice for a given criterion exists. We then present an algorithm, called unevaluation, for computing least program slices from computations reified as traces. Observing a limitation of program slices, we develop a notion of trace slice as another form of explanation and present an algorithm for computing them. The unevaluation algorithm can be applied to any subtrace of a trace slice to compute a program slice whose evaluation generates that subtrace. This close correspondence between programs, traces, and their slices can enable the programmer to understand a computation interactively, in terms of the programming language in which the computation is expressed. We present an implementation in the form of a tool, discuss some important practical implementation concerns and present some techniques for addressing them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '12 conference proceedings.", } @Article{Suenaga:2012:TBS, author = "Kohei Suenaga and Ryota Fukuda and Atsushi Igarashi", title = "Type-based safe resource deallocation for shared-memory concurrency", journal = j-SIGPLAN, volume = "47", number = "10", pages = "1--20", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384618", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a type system to guarantee safe resource deallocation for shared-memory concurrent programs by extending the previous type system based on fractional ownerships. Here, safe resource deallocation means that memory cells, locks, or threads are not left allocated when a program terminates. Our framework supports (1) fork/join parallelism, (2) synchronization with locks, and (3) dynamically allocated memory cells and locks. The type system is proved to be sound. We also provide a type inference algorithm for the type system and a prototype implementation of the algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Gordon:2012:URI, author = "Colin S. Gordon and Matthew J. Parkinson and Jared Parsons and Aleks Bromfield and Joe Duffy", title = "Uniqueness and reference immutability for safe parallelism", journal = j-SIGPLAN, volume = "47", number = "10", pages = "21--40", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384619", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A key challenge for concurrent programming is that side-effects (memory operations) in one thread can affect the behavior of another thread. In this paper, we present a type system to restrict the updates to memory to prevent these unintended side-effects. We provide a novel combination of immutable and unique (isolated) types that ensures safe parallelism (race freedom and deterministic execution). The type system includes support for polymorphism over type qualifiers, and can easily create cycles of immutable objects. Key to the system's flexibility is the ability to recover immutable or externally unique references after violating uniqueness without any explicit alias tracking. Our type system models a prototype extension to C\# that is in active use by a Microsoft team. We describe their experiences building large systems with this extension. We prove the soundness of the type system by an embedding into a program logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Sreeram:2012:SCD, author = "Jaswanth Sreeram and Santosh Pande", title = "Safe compiler-driven transaction checkpointing and recovery", journal = j-SIGPLAN, volume = "47", number = "10", pages = "41--56", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384620", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several studies have shown that a large fraction of the work performed inside memory transactions in representative programs is wasted due to the transaction experiencing a conflict and aborting. Aborts inside long running transactions are especially influential to performance and the simplicity of the TM programming model (relative to using finegrained locking) in synchronizing large critical sections means that large transactions are common and this exacerbates the problem of wasted work. In this paper we present a practical transaction checkpoint and recovery scheme in which transactions that experience a conflict can restore their state (including the local context in which they were executing) to some dynamic program point before this access and begin execution from that point. This state saving and restoration is implemented by checkpoint operations that are generated by a compiler into the transaction's body and are also optimized to reduce the amount of state that is saved and restored. We also describe a runtime system that manages these checkpointed states and orchestrates the restoration of the right checkpointed state for a conflict on a particular transactional access. Moreover the synthesis of these save and restore operations, their optimization and invocation at runtime are completely transparent to the programmer. We have implemented the checkpoint generation and optimization scheme in the LLVM compiler and runtime support for the TL2 STM system. Our experiments indicate that for many parallel programs using such checkpoint recovery schemes can result in upto several orders of magnitude reduction in number of aborts and significant execution time speedups relative to plain transactional programs for the same number of threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Muller:2012:TPS, author = "Stefan Muller and Stephen Chong", title = "Towards a practical secure concurrent language", journal = j-SIGPLAN, volume = "47", number = "10", pages = "57--74", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384621", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We demonstrate that a practical concurrent language can be extended in a natural way with information security mechanisms that provably enforce strong information security guarantees. We extend the X10 concurrent programming language with coarse-grained information-flow control. Central to X10 concurrency abstractions is the notion of a place: a container for data and computation. We associate a security level with each place, and restrict each place to store only data appropriate for that security level. When places interact only with other places at the same security level, then our security mechanisms impose no restrictions. When places of differing security levels interact, our information security analysis prevents potentially dangerous information flows, including information flow through covert scheduling channels. The X10 concurrency mechanisms simplify reasoning about information flow in concurrent programs. We present a static analysis that enforces a noninterference-based extensional information security condition in a calculus that captures the key aspects of X10's place abstraction and async-finish parallelism. We extend this security analysis to support many of X10's language features, and have implemented a prototype compiler for the resulting language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Parizek:2012:PAJ, author = "Pavel Par{\'\i}zek and OndYej Lhot{\'a}k", title = "Predicate abstraction of {Java} programs with collections", journal = j-SIGPLAN, volume = "47", number = "10", pages = "75--94", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384623", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Our goal is to develop precise and scalable verification techniques for Java programs that use collections and properties that depend on their content. We apply the popular approach of predicate abstraction to Java programs and collections. The main challenge in this context is precise and compact modeling of collections that enables practical verification. We define a predicate language for modeling the observable state of Java collections at the interface level. Changes of the state by API methods are captured by weakest preconditions. We adapt existing techniques for construction of abstract programs. Most notably, we designed optimizations based on specific features of the predicate language. We evaluated our approach on Java programs that use collections in advanced ways. Our results show that interesting properties, such as consistency between multiple collections, can be verified using our approach. The properties are specified using logic formulas that involve predicates introduced by our language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Schiller:2012:RBW, author = "Todd W. Schiller and Michael D. Ernst", title = "Reducing the barriers to writing verified specifications", journal = j-SIGPLAN, volume = "47", number = "10", pages = "95--112", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384624", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Formally verifying a program requires significant skill not only because of complex interactions between program subcomponents, but also because of deficiencies in current verification interfaces. These skill barriers make verification economically unattractive by preventing the use of less-skilled (less-expensive) workers and distributed workflows (i.e., crowdsourcing). This paper presents VeriWeb, a web-based IDE for verification that decomposes the task of writing verifiable specifications into manageable subproblems. To overcome the information loss caused by task decomposition, and to reduce the skill required to verify a program, VeriWeb incorporates several innovative user interface features: drag and drop condition construction, concrete counterexamples, and specification inlining. To evaluate VeriWeb, we performed three experiments. First, we show that VeriWeb lowers the time and monetary cost of verification by performing a comparative study of VeriWeb and a traditional tool using 14 paid subjects contracted hourly from Exhedra Solution's vWorker online marketplace. Second, we demonstrate the dearth and insufficiency of current ad-hoc labor marketplaces for verification by recruiting workers from Amazon's Mechanical Turk to perform verification with VeriWeb. Finally, we characterize the minimal communication overhead incurred when VeriWeb is used collaboratively by observing two pairs of developers each use the tool simultaneously to verify a single program.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Betts:2012:GVG, author = "Adam Betts and Nathan Chong and Alastair Donaldson and Shaz Qadeer and Paul Thomson", title = "{GPUVerify}: a verifier for {GPU} kernels", journal = j-SIGPLAN, volume = "47", number = "10", pages = "113--132", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384625", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a technique for verifying race- and divergence-freedom of GPU kernels that are written in mainstream kernel programming languages such as OpenCL and CUDA. Our approach is founded on a novel formal operational semantics for GPU programming termed synchronous, delayed visibility (SDV) semantics. The SDV semantics provides a precise definition of barrier divergence in GPU kernels and allows kernel verification to be reduced to analysis of a sequential program, thereby completely avoiding the need to reason about thread interleavings, and allowing existing modular techniques for program verification to be leveraged. We describe an efficient encoding for data race detection and propose a method for automatically inferring loop invariants required for verification. We have implemented these techniques as a practical verification tool, GPUVerify, which can be applied directly to OpenCL and CUDA source code. We evaluate GPUVerify with respect to a set of 163 kernels drawn from public and commercial sources. Our evaluation demonstrates that GPUVerify is capable of efficient, automatic verification of a large number of real-world kernels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Logozzo:2012:MVA, author = "Francesco Logozzo and Thomas Ball", title = "Modular and verified automatic program repair", journal = j-SIGPLAN, volume = "47", number = "10", pages = "133--146", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384626", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study the problem of suggesting code repairs at design time, based on the warnings issued by modular program verifiers. We introduce the concept of a verified repair, a change to a program's source that removes bad execution traces while increasing the number of good traces, where the bad/good traces form a partition of all the traces of a program. Repairs are property-specific. We demonstrate our framework in the context of warnings produced by the modular cccheck (a.k.a. Clousot) abstract interpreter, and generate repairs for missing contracts, incorrect locals and objects initialization, wrong conditionals, buffer overruns, arithmetic overflow and incorrect floating point comparisons. We report our experience with automatically generating repairs for the {.NET} framework libraries, generating verified repairs for over 80\% of the warnings generated by cccheck.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Kulkarni:2012:MCO, author = "Sameer Kulkarni and John Cavazos", title = "Mitigating the compiler optimization phase-ordering problem using machine learning", journal = j-SIGPLAN, volume = "47", number = "10", pages = "147--162", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384628", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's compilers have a plethora of optimizations to choose from, and the correct choice of optimizations can have a significant impact on the performance of the code being optimized. Furthermore, choosing the correct order in which to apply those optimizations has been a long standing problem in compilation research. Each of these optimizations interacts with the code and in turn with all other optimizations in complicated ways. Traditional compilers typically apply the same set of optimization in a fixed order to all functions in a program, without regard the code being optimized. Understanding the interactions of optimizations is very important in determining a good solution to the phase-ordering problem. This paper develops a new approach that automatically selects good optimization orderings on a per method basis within a dynamic compiler. Our approach formulates the phase-ordering problem as a Markov process and uses a characterization of the current state of the code being optimized to creating a better solution to the phase ordering problem. Our technique uses neuro-evolution to construct an artificial neural network that is capable of predicting beneficial optimization ordering for a piece of code that is being optimized. We implemented our technique in Jikes RVM and achieved significant improvements on a set of standard Java benchmarks over a well-engineered fixed order.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{St-Amour:2012:OCO, author = "Vincent St-Amour and Sam Tobin-Hochstadt and Matthias Felleisen", title = "Optimization coaching: optimizers learn to communicate with programmers", journal = j-SIGPLAN, volume = "47", number = "10", pages = "163--178", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384629", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Optimizing compilers map programs in high-level languages to high-performance target language code. To most programmers, such a compiler constitutes an impenetrable black box whose inner workings are beyond their understanding. Since programmers often must understand the workings of their compilers to achieve their desired performance goals, they typically resort to various forms of reverse engineering, such as examining compiled code or intermediate forms. Instead, optimizing compilers should engage programmers in a dialog. This paper introduces one such possible form of dialog: optimization coaching. An optimization coach watches while a program is compiled, analyzes the results, generates suggestions for enabling further compiler optimization in the source program, and presents a suitable synthesis of its results to the programmer. We present an evaluation based on case studies, which illustrate how an optimization coach can help programmers achieve optimizations resulting in substantial performance improvements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Inoue:2012:AML, author = "Hiroshi Inoue and Hiroshige Hayashizaki and Peng Wu and Toshio Nakatani", title = "Adaptive multi-level compilation in a trace-based {Java JIT} compiler", journal = j-SIGPLAN, volume = "47", number = "10", pages = "179--194", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384630", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes our multi-level compilation techniques implemented in a trace-based Java JIT compiler (trace-JIT). Like existing multi-level compilation for method-based compilers, we start JIT compilation with a small compilation scope and a low optimization level so the program can start running quickly. Then we identify hot paths with a timer-based sampling profiler, generate long traces that capture the hot paths, and recompile them with a high optimization level to improve the peak performance. A key to high performance is selecting long traces that effectively capture the entire hot paths for upgrade recompilations. To do this, we introduce a new technique to generate a directed graph representing the control flow, a TTgraph, and use the TTgraph in the trace selection engine to efficiently select long traces. We show that our multi-level compilation improves the peak performance of programs by up to 58.5\% and 22.2\% on average compared to compiling all of the traces only at a low optimization level. Comparing the performance with our multi-level compilation to the performance when compiling all of the traces at a high optimization level, our technique can reduce the startup times of programs by up to 61.1\% and 31.3\% on average without significant reduction in the peak performance. Our results show that our adaptive multi-level compilation can balance the peak performance and startup time by taking advantage of different optimization levels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Castanos:2012:BPE, author = "Jose Castanos and David Edelsohn and Kazuaki Ishizaki and Priya Nagpurkar and Toshio Nakatani and Takeshi Ogasawara and Peng Wu", title = "On the benefits and pitfalls of extending a statically typed language {JIT} compiler for dynamic scripting languages", journal = j-SIGPLAN, volume = "47", number = "10", pages = "195--212", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384631", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Whenever the need to compile a new dynamically typed language arises, an appealing option is to repurpose an existing statically typed language Just-In-Time (JIT) compiler (repurposed JIT compiler). Existing repurposed JIT compilers (RJIT compilers), however, have not yet delivered the hoped-for performance boosts. The performance of JVM languages, for instance, often lags behind standard interpreter implementations. Even more customized solutions that extend the internals of a JIT compiler for the target language compete poorly with those designed specifically for dynamically typed languages. Our own Fiorano JIT compiler is an example of this problem. As a state-of-the-art, RJIT compiler for Python, the Fiorano JIT compiler outperforms two other RJIT compilers (Unladen Swallow and Jython), but still shows a noticeable performance gap compared to PyPy, today's best performing Python JIT compiler. In this paper, we discuss techniques that have proved effective in the Fiorano JIT compiler as well as limitations of our current implementation. More importantly, this work offers the first in-depth look at benefits and limitations of the repurposed JIT compiler approach. We believe the most common pitfall of existing RJIT compilers is not focusing sufficiently on specialization, an abundant optimization opportunity unique to dynamically typed languages. Unfortunately, the lack of specialization cannot be overcome by applying traditional optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Cousot:2012:AIFb, author = "Patrick M. Cousot and Radhia Cousot and Francesco Logozzo and Michael Barnett", title = "An abstract interpretation framework for refactoring with application to extract methods with contracts", journal = j-SIGPLAN, volume = "47", number = "10", pages = "213--232", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384633", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Method extraction is a common refactoring feature provided by most modern IDEs. It replaces a user-selected piece of code with a call to an automatically generated method. We address the problem of automatically inferring contracts (precondition, postcondition) for the extracted method. We require the inferred contract: (a) to be valid for the extracted method (validity); (b) to guard the language and programmer assertions in the body of the extracted method by an opportune precondition (safety); (c) to preserve the proof of correctness of the original code when analyzing the new method separately (completeness); and (d) to be the most general possible (generality). These requirements rule out trivial solutions (e.g., inlining, projection, etc). We propose two theoretical solutions to the problem. The first one is simple and optimal. It is valid, safe, complete and general but unfortunately not effectively computable (except for unrealistic finiteness/decidability hypotheses). The second one is based on an iterative forward/backward method. We show it to be valid, safe, and, under reasonable assumptions, complete and general. We prove that the second solution subsumes the first. All justifications are provided with respect to a new, set-theoretic version of Hoare logic (hence without logic), and abstractions of Hoare logic, revisited to avoid surprisingly unsound inference rules. We have implemented the new algorithms on the top of two industrial-strength tools (CCCheck and the Microsoft Roslyn CTP). Our experience shows that the analysis is both fast enough to be used in an interactive environment and precise enough to generate good annotations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Zhang:2012:RAJ, author = "Ying Zhang and Gang Huang and Xuanzhe Liu and Wei Zhang and Hong Mei and Shunxiang Yang", title = "Refactoring {Android Java} code for on-demand computation offloading", journal = j-SIGPLAN, volume = "47", number = "10", pages = "233--248", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384634", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computation offloading is a promising way to improve the performance as well as reducing the battery power consumption of a smartphone application by executing some parts of the application on a remote server. Supporting such capability is not easy for smartphone application developers due to (1) correctness: some code, e.g., that for GPS, gravity, and other sensors, can run only on the smartphone so that developers have to identify which parts of the application cannot be offloaded; (2) effectiveness: the reduced execution time must be greater than the network delay caused by computation offloading so that developers need to calculate which parts are worth offloading; (3) adaptability: smartphone applications often face changes of user requirements and runtime environments so that developers need to implement the adaptation on offloading. More importantly, considering the large number of today's smartphone applications, solutions applicable for legacy applications will be much more valuable. In this paper, we present a tool, named DPartner, that automatically refactors Android applications to be the ones with computation offloading capability. For a given Android application, DPartner first analyzes its bytecode for discovering the parts worth offloading, then rewrites the bytecode to implement a special program structure supporting on-demand offloading, and finally generates two artifacts to be deployed onto an Android phone and the server, respectively. We evaluated DPartner on three real-world Android applications, demonstrating the reduction of execution time by 46\%-97\% and battery power consumption by 27\%-83\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Hayden:2012:KEG, author = "Christopher M. Hayden and Edward K. Smith and Michail Denchev and Michael Hicks and Jeffrey S. Foster", title = "{Kitsune}: efficient, general-purpose dynamic software updating for {C}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "249--264", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384635", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic software updating (DSU) systems allow programs to be updated while running, thereby permitting developers to add features and fix bugs without downtime. This paper introduces Kitsune, a new DSU system for C whose design has three notable features. First, Kitsune's updating mechanism updates the whole program, not individual functions. This mechanism is more flexible than most prior approaches and places no restrictions on data representations or allowed compiler optimizations. Second, Kitsune makes the important aspects of updating explicit in the program text, making the program's semantics easy to understand while minimizing programmer effort. Finally, the programmer can write simple specifications to direct Kitsune to generate code that traverses and transforms old-version state for use by new code; such state transformation is often necessary, and is significantly more difficult in prior DSU systems. We have used Kitsune to update five popular, open-source, single- and multi-threaded programs, and find that few program changes are required to use Kitsune, and that it incurs essentially no performance overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Magill:2012:AOT, author = "Stephen Magill and Michael Hicks and Suriya Subramanian and Kathryn S. McKinley", title = "Automating object transformations for dynamic software updating", journal = j-SIGPLAN, volume = "47", number = "10", pages = "265--280", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384636", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic software updating (DSU) systems eliminate costly downtime by dynamically fixing bugs and adding features to executing programs. Given a static code patch, most DSU systems construct runtime code changes automatically. However, a dynamic update must also specify how to change the running program's execution state, e.g., the stack and heap, to make it compatible with the new code. Constructing such state transformations correctly and automatically remains an open problem. This paper presents a solution called Targeted Object Synthesis (TOS). TOS first executes the same tests on the old and new program versions separately, observing the program heap state at a few corresponding points. Given two corresponding heap states, TOS matches objects in the two versions using key fields that uniquely identify objects and correlate old and new-version objects. Given example object pairs, TOS then synthesizes the simplest-possible function that transforms an old-version object to its new-version counterpart. We show that TOS is effective on updates to four open-source server programs for which it generates non-trivial transformation functions that use conditionals, operate on collections, and fix memory leaks. These transformations help programmers understand their changes and apply dynamic software updates.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Sartor:2012:EMT, author = "Jennfer B. Sartor and Lieven Eeckhout", title = "Exploring multi-threaded {Java} application performance on multicore hardware", journal = j-SIGPLAN, volume = "47", number = "10", pages = "281--296", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384638", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While there have been many studies of how to schedule applications to take advantage of increasing numbers of cores in modern-day multicore processors, few have focused on multi-threaded managed language applications which are prevalent from the embedded to the server domain. Managed languages complicate performance studies because they have additional virtual machine threads that collect garbage and dynamically compile, closely interacting with application threads. Further complexity is introduced as modern multicore machines have multiple sockets and dynamic frequency scaling options, broadening opportunities to reduce both power and running time. In this paper, we explore the performance of Java applications, studying how best to map application and virtual machine (JVM) threads to a multicore, multi-socket environment. We explore both the cost of separating JVM threads from application threads, and the opportunity to speed up or slow down the clock frequency of isolated threads. We perform experiments with the multi-threaded DaCapo benchmarks and pseudojbb2005 running on the Jikes Research Virtual Machine, on a dual-socket, 8-core Intel Nehalem machine to reveal several novel, and sometimes counter-intuitive, findings. We believe these insights are a first but important step towards understanding and optimizing managed language performance on modern hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Kumar:2012:WSB, author = "Vivek Kumar and Daniel Frampton and Stephen M. Blackburn and David Grove and Olivier Tardieu", title = "Work-stealing without the baggage", journal = j-SIGPLAN, volume = "47", number = "10", pages = "297--314", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384639", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Work-stealing is a promising approach for effectively exploiting software parallelism on parallel hardware. A programmer who uses work-stealing explicitly identifies potential parallelism and the runtime then schedules work, keeping otherwise idle hardware busy while relieving overloaded hardware of its burden. Prior work has demonstrated that work-stealing is very effective in practice. However, work-stealing comes with a substantial overhead: as much as 2x to 12x slowdown over orthodox sequential code. In this paper we identify the key sources of overhead in work-stealing schedulers and present two significant refinements to their implementation. We evaluate our work-stealing designs using a range of benchmarks, four different work-stealing implementations, including the popular fork-join framework, and a range of architectures. On these benchmarks, compared to orthodox sequential Java, our fastest design has an overhead of just 15\%. By contrast, fork-join has a 2.3x overhead and the previous implementation of the system we use has an overhead of 4.1x. These results and our insight into the sources of overhead for work-stealing implementations give further hope to an already promising technique for exploiting increasingly available hardware parallelism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Bocq:2012:MUM, author = "S{\'e}bastien Bocq and Koen Daenen", title = "{Molecule}: using monadic and streaming {I/O} to compose process networks on the {JVM}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "315--334", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384640", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Molecule is a domain specific language library embedded in Scala for easing the creation of scalable and modular concurrent applications on the JVM. Concurrent applications are modeled as parallel process networks that exchange information over mobile and type-safe messaging interfaces. In this paper, we present a concurrent programming environment that combines functional and imperative programming. Using a monad, we structure the sequential or parallel coordination of user-level threads, without JVM modifications or compiler support. Our mobile channel interfaces expose reusable and parallelizable higher-order functions, as if they were streams in a lazily evaluated functional programming language. The support for graceful termination of entire process networks is simplified by integrating channel poisoning with monadic exceptions and resource control. Our runtime and system-level interfaces leverage message batching and a novel flow parallel scheduler to limit expensive context switches in multicore environments. We illustrate the expressiveness and performance benefits on a 24-core AMD Opteron machine with three classical examples: a thread ring, a genuine prime sieve and a chameneos-redux.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Kalibera:2012:BBA, author = "Tomas Kalibera and Matthew Mole and Richard Jones and Jan Vitek", title = "A black-box approach to understanding concurrency in {DaCapo}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "335--354", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384641", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Increasing levels of hardware parallelism are one of the main challenges for programmers and implementers of managed runtimes. Any concurrency or scalability improvements must be evaluated experimentally. However, application benchmarks available today may not reflect the highly concurrent applications we anticipate in the future. They may also behave in ways that VM developers do not expect. We provide a set of platform independent concurrency related metrics and an in-depth observational study of current state of the art benchmarks, discovering how concurrent they really are, how they scale the work and how they synchronise and communicate via shared memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Jo:2012:AEL, author = "Youngjoon Jo and Milind Kulkarni", title = "Automatically enhancing locality for tree traversals with traversal splicing", journal = j-SIGPLAN, volume = "47", number = "10", pages = "355--374", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384643", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Generally applicable techniques for improving temporal locality in irregular programs, which operate over pointer-based data structures such as trees and graphs, are scarce. Focusing on a subset of irregular programs, namely, tree traversal algorithms like Barnes--Hut and nearest neighbor, previous work has proposed point blocking, a technique analogous to loop tiling in regular programs, to improve locality. However point blocking is highly dependent on point sorting, a technique to reorder points so that consecutive points will have similar traversals. Performing this a priori sort requires an understanding of the semantics of the algorithm and hence highly application specific techniques. In this work, we propose traversal splicing, a new, general, automatic locality optimization for irregular tree traversal codes, that is less sensitive to point order, and hence can deliver substantially better performance, even in the absence of semantic information. For six benchmark algorithms, we show that traversal splicing can deliver single-thread speedups of up to 9.147 (geometric mean: 3.095) over baseline implementations, and up to 4.752 (geometric mean: 2.079) over point-blocked implementations. Further, we show that in many cases, automatically applying traversal splicing to a baseline implementation yields performance that is better than carefully hand-optimized implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Prountzos:2012:ESS, author = "Dimitrios Prountzos and Roman Manevich and Keshav Pingali", title = "{Elixir}: a system for synthesizing concurrent graph programs", journal = j-SIGPLAN, volume = "47", number = "10", pages = "375--394", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384644", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Algorithms in new application areas like machine learning and network analysis use ``irregular'' data structures such as graphs, trees and sets. Writing efficient parallel code in these problem domains is very challenging because it requires the programmer to make many choices: a given problem can usually be solved by several algorithms, each algorithm may have many implementations, and the best choice of algorithm and implementation can depend not only on the characteristics of the parallel platform but also on properties of the input data such as the structure of the graph. One solution is to permit the application programmer to experiment with different algorithms and implementations without writing every variant from scratch. Auto-tuning to find the best variant is a more ambitious solution. These solutions require a system for automatically producing efficient parallel implementations from high-level specifications. Elixir, the system described in this paper, is the first step towards this ambitious goal. Application programmers write specifications that consist of an operator, which describes the computations to be performed, and a schedule for performing these computations. Elixir uses sophisticated inference techniques to produce efficient parallel code from such specifications. We used Elixir to automatically generate many parallel implementations for three irregular problems: breadth-first search, single source shortest path, and betweenness-centrality computation. Our experiments show that the best generated variants can be competitive with handwritten code for these problems from other research groups; for some inputs, they even outperform the handwritten versions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Liu:2012:CED, author = "Yanhong A. Liu and Scott D. Stoller and Bo Lin and Michael Gorbovitski", title = "From clarity to efficiency for distributed algorithms", journal = j-SIGPLAN, volume = "47", number = "10", pages = "395--410", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384645", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes a very high-level language for clear description of distributed algorithms and optimizations necessary for generating efficient implementations. The language supports high-level control flows where complex synchronization conditions can be expressed using high-level queries, especially logic quantifications, over message history sequences. Unfortunately, the programs would be extremely inefficient, including consuming unbounded memory, if executed straightforwardly. We present new optimizations that automatically transform complex synchronization conditions into incremental updates of necessary auxiliary values as messages are sent and received. The core of the optimizations is the first general method for efficient implementation of logic quantifications. We have developed an operational semantics of the language, implemented a prototype of the compiler and the optimizations, and successfully used the language and implementation on a variety of important distributed algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Leino:2012:PEJ, author = "K. Rustan M. Leino and Aleksandar Milicevic", title = "Program extrapolation with {Jennisys}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "411--430", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384646", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The desired behavior of a program can be described using an abstract model. Compiling such a model into executable code requires advanced compilation techniques known as synthesis. This paper presents an object-based language, called Jennisys, where programming is done by introducing an abstract model, defining a concrete data representation for the model, and then being aided by automatic synthesis to produce executable code. The paper also presents a synthesis technique for the language. The technique is built on an automatic program verifier that, via an underlying SMT solver, is capable of providing concrete models to failed verifications. The technique proceeds by obtaining sample input/output values from concrete models and then extrapolating programs from the sample points. The synthesis aims to produce code with assignments, branching structure, and possibly recursive calls. It is the first to synthesize code that creates and uses objects in dynamic data structures or aggregate objects. A prototype of the language and synthesis technique has been implemented.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Kling:2012:BDI, author = "Michael Kling and Sasa Misailovic and Michael Carbin and Martin Rinard", title = "{Bolt}: on-demand infinite loop escape in unmodified binaries", journal = j-SIGPLAN, volume = "47", number = "10", pages = "431--450", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384648", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Bolt, a novel system for escaping from infinite and long-running loops. Directed by a user, Bolt can attach to a running process and determine if the program is executing an infinite loop. If so, Bolt can deploy multiple strategies to escape the loop, restore the responsiveness of the program, and enable the program to deliver useful output. Bolt operates on stripped x86 and x64 binaries, dynamically attaches and detaches to and from the program as needed, and dynamically detects loops and creates program state checkpoints to enable exploration of different escape strategies. Bolt can detect and escape from loops in off-the-shelf software, without available source code, and with no overhead in standard production use.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Huang:2012:LSC, author = "Jeff Huang and Charles Zhang", title = "{LEAN}: simplifying concurrency bug reproduction via replay-supported execution reduction", journal = j-SIGPLAN, volume = "47", number = "10", pages = "451--466", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384649", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging concurrent programs is known to be difficult due to scheduling non-determinism. The technique of multiprocessor deterministic replay substantially assists debugging by making the program execution reproducible. However, facing the huge replay traces and long replay time, the debugging task remains stunningly challenging for long running executions. We present a new technique, LEAN, on top of replay, that significantly reduces the complexity of the replay trace and the length of the replay time without losing the determinism in reproducing concurrency bugs. The cornerstone of our work is a redundancy criterion that characterizes the redundant computation in a buggy trace. Based on the redundancy criterion, we have developed two novel techniques to automatically identify and remove redundant threads and instructions in the bug reproduction execution. Our evaluation results with several real world concurrency bugs in large complex server programs demonstrate that LEAN is able to reduce the size, the number of threads, and the number of thread context switches of the replay trace by orders of magnitude, and accordingly greatly shorten the replay time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Effinger-Dean:2012:IIF, author = "Laura Effinger-Dean and Brandon Lucia and Luis Ceze and Dan Grossman and Hans-J. Boehm", title = "{IFRit}: interference-free regions for dynamic data-race detection", journal = j-SIGPLAN, volume = "47", number = "10", pages = "467--484", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384650", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new algorithm for dynamic data-race detection. Our algorithm reports no false positives and runs on arbitrary C and C++ code. Unlike previous algorithms, we do not have to instrument every memory access or track a full happens-before relation. Our data-race detector, which we call IFRit, is based on a run-time abstraction called an interference-free region (IFR). An IFR is an interval of one thread's execution during which any write to a specific variable by a different thread is a data race. We insert instrumentation at compile time to monitor active IFRs at run-time. If the runtime observes overlapping IFRs for conflicting accesses to the same variable in two different threads, it reports a race. The static analysis aggregates information for multiple accesses to the same variable, avoiding the expense of having to instrument every memory access in the program. We directly compare IFRit to FastTrack and ThreadSanitizer, two state-of-the-art fully-precise data-race detectors. We show that IFRit imposes a fraction of the overhead of these detectors. We show that for the PARSEC benchmarks, and several real-world applications, IFRit finds many of the races detected by a fully-precise detector. We also demonstrate that sampling can further reduce IFRit's performance overhead without completely forfeiting precision.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Yu:2012:MCD, author = "Jie Yu and Satish Narayanasamy and Cristiano Pereira and Gilles Pokam", title = "{Maple}: a coverage-driven testing tool for multithreaded programs", journal = j-SIGPLAN, volume = "47", number = "10", pages = "485--502", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384651", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Testing multithreaded programs is a hard problem, because it is challenging to expose those rare interleavings that can trigger a concurrency bug. We propose a new thread interleaving coverage-driven testing tool called Maple that seeks to expose untested thread interleavings as much as possible. It memoizes tested interleavings and actively seeks to expose untested interleavings for a given test input to increase interleaving coverage. We discuss several solutions to realize the above goal. First, we discuss a coverage metric based on a set of interleaving idioms. Second, we discuss an online technique to predict untested interleavings that can potentially be exposed for a given test input. Finally, the predicted untested interleavings are exposed by actively controlling the thread schedule while executing for the test input. We discuss our experiences in using the tool to expose several known and unknown bugs in real-world applications such as Apache and MySQL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Dubrau:2012:TM, author = "Anton Willy Dubrau and Laurie Jane Hendren", title = "Taming {MATLAB}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "503--522", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384653", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "MATLAB is a dynamic scientific language used by scientists, engineers and students worldwide. Although MATLAB is very suitable for rapid prototyping and development, MATLAB users often want to convert their final MATLAB programs to a static language such as FORTRAN. This paper presents an extensible object-oriented toolkit for supporting the generation of static programs from dynamic MATLAB programs. Our open source toolkit, called the MATLAB Tamer, identifies a large tame subset of MATLAB, supports the generation of a specialized Tame IR for that subset, provides a principled approach to handling the large number of builtin MATLAB functions, and supports an extensible interprocedural value analysis for estimating MATLAB types and call graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Siddiqui:2012:SSE, author = "Junaid Haroon Siddiqui and Sarfraz Khurshid", title = "Scaling symbolic execution using ranged analysis", journal = j-SIGPLAN, volume = "47", number = "10", pages = "523--536", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384654", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces a novel approach to scale symbolic execution --- a program analysis technique for systematic exploration of bounded execution paths---for test input generation. While the foundations of symbolic execution were developed over three decades ago, recent years have seen a real resurgence of the technique, specifically for systematic bug finding. However, scaling symbolic execution remains a primary technical challenge due to the inherent complexity of the path-based exploration that lies at core of the technique. Our key insight is that the state of the analysis can be represented highly compactly: a test input is all that is needed to effectively encode the state of a symbolic execution run. We present ranged symbolic execution, which embodies this insight and uses two test inputs to define a range, i.e., the beginning and end, for a symbolic execution run. As an application of our approach, we show how it enables scalability by distributing the path exploration---both in a sequential setting with a single worker node and in a parallel setting with multiple workers. As an enabling technology, we leverage the open-source, state-of-the-art symbolic execution tool KLEE. Experimental results using 71 programs chosen from the widely deployed GNU Coreutils set of Unix utilities show that our approach provides a significant speedup over KLEE. For example, using 10 worker cores, we achieve an average speed-up of 6.6X for the 71 programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Tobin-Hochstadt:2012:HOS, author = "Sam Tobin-Hochstadt and David {Van Horno}", title = "Higher-order symbolic execution via contracts", journal = j-SIGPLAN, volume = "47", number = "10", pages = "537--554", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384655", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new approach to automated reasoning about higher-order programs by extending symbolic execution to use behavioral contracts as symbolic values, thus enabling symbolic approximation of higher-order behavior. Our approach is based on the idea of an abstract reduction semantics that gives an operational semantics to programs with both concrete and symbolic components. Symbolic components are approximated by their contract and our semantics gives an operational interpretation of contracts-as-values. The result is an executable semantics that soundly predicts program behavior, including contract failures, for all possible instantiations of symbolic components. We show that our approach scales to an expressive language of contracts including arbitrary programs embedded as predicates, dependent function contracts, and recursive contracts. Supporting this rich language of specifications leads to powerful symbolic reasoning using existing program constructs. We then apply our approach to produce a verifier for contract correctness of components, including a sound and computable approximation to our semantics that facilitates fully automated contract verification. Our implementation is capable of verifying contracts expressed in existing programs, and of justifying contract-elimination optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Rosu:2012:CRU, author = "Grigore Rosu and Andrei Stefanescu", title = "Checking reachability using matching logic", journal = j-SIGPLAN, volume = "47", number = "10", pages = "555--574", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384656", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a verification framework that is parametric in a (trusted) operational semantics of some programming language. The underlying proof system is language-independent and consists of eight proof rules. The proof system is proved partially correct and relatively complete (with respect to the programming language configuration model). To show its practicality, the generic framework is instantiated with a fragment of C and evaluated with encouraging results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Zhao:2012:HCP, author = "Haiping Zhao and Iain Proctor and Minghui Yang and Xin Qi and Mark Williams and Qi Gao and Guilherme Ottoni and Andrew Paroski and Scott MacVicar and Jason Evans and Stephen Tu", title = "The {HipHop} compiler for {PHP}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "575--586", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384658", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scripting languages are widely used to quickly accomplish a variety of tasks because of the high productivity they enable. Among other reasons, this increased productivity results from a combination of extensive libraries, fast development cycle, dynamic typing, and polymorphism. The dynamic features of scripting languages are traditionally associated with interpreters, which is the approach used to implement most scripting languages. Although easy to implement, interpreters are generally slow, which makes scripting languages prohibitive for implementing large, CPU-intensive applications. This efficiency problem is particularly important for PHP given that it is the most commonly used language for server-side web development. This paper presents the design, implementation, and an evaluation of the HipHop compiler for PHP. HipHop goes against the standard practice and implements a very dynamic language through static compilation. After describing the most challenging PHP features to support through static compilation, this paper presents HipHop's design and techniques that support almost all PHP features. We then present a thorough evaluation of HipHop running both standard benchmarks and the Facebook web site. Overall, our experiments demonstrate that HipHop is about 5.5x faster than standard, interpreted PHP engines. As a result, HipHop has reduced the number of servers needed to run Facebook and other web sites by a factor between 4 and 6, thus drastically cutting operating costs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Chugh:2012:DTJ, author = "Ravi Chugh and David Herman and Ranjit Jhala", title = "Dependent types for {JavaScript}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "587--606", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384659", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Dependent JavaScript (DJS), a statically typed dialect of the imperative, object-oriented, dynamic language. DJS supports the particularly challenging features such as run-time type-tests, higher-order functions, extensible objects, prototype inheritance, and arrays through a combination of nested refinement types, strong updates to the heap, and heap unrolling to precisely track prototype hierarchies. With our implementation of DJS, we demonstrate that the type system is expressive enough to reason about a variety of tricky idioms found in small examples drawn from several sources, including the popular book JavaScript: The Good Parts and the SunSpider benchmark suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Meawad:2012:EBS, author = "Fadi Meawad and Gregor Richards and Flor{\'e}al Morandat and Jan Vitek", title = "{Eval} begone!: semi-automated removal of {\tt eval} from {JavaScript} programs", journal = j-SIGPLAN, volume = "47", number = "10", pages = "607--620", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384660", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Eval endows JavaScript developers with great power. It allows developers and end-users, by turning text into executable code, to seamlessly extend and customize the behavior of deployed applications as they are running. With great power comes great responsibility, though not in our experience. In previous work we demonstrated through a large corpus study that programmers wield that power in rather irresponsible and arbitrary ways. We showed that most calls to eval fall into a small number of very predictable patterns. We argued that those patterns could easily be recognized by an automated algorithm and that they could almost always be replaced with safer JavaScript idioms. In this paper we set out to validate our claim by designing and implementing a tool, which we call Evalorizer, that can assist programmers in getting rid of their unneeded evals. We use the tool to remove eval from a real-world website and validated our approach over logs taken from the top 100 websites with a success rate over 97\% under an open world assumption.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Kang:2012:FSJ, author = "Seonghoon Kang and Sukyoung Ryu", title = "Formal specification of a {JavaScript} module system", journal = j-SIGPLAN, volume = "47", number = "10", pages = "621--638", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384661", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The JavaScript programming language, originally developed as a simple scripting language, is now the language of choice for web applications. All the top 100 sites on the web use JavaScript and its use outside web pages is rapidly growing. However, JavaScript is not yet ready for programming in the large: it does not support a module system. Lack of namespaces introduces module patterns, and makes it difficult to use multiple JavaScript frameworks together. In this paper, we propose a formal specification of a JavaScript module system. A module system for JavaScript will allow safe and incremental development of JavaScript web applications. While the next version of the JavaScript standard proposes a module system, it informally describes its design in prose. We formally specify a module system as an extension to the existing JavaScript language, and rigorously describe its semantics via desugaring to LambdaJS, a prior core calculus for JavaScript. We implement the desugaring process and show its faithfulness using real-world test suites. Finally, we define a set of properties for valid JavaScript programs using modules and formally prove that the proposed module system satisfies the validity properties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Barowy:2012:API, author = "Daniel W. Barowy and Charlie Curtsinger and Emery D. Berger and Andrew McGregor", title = "{AutoMan}: a platform for integrating human-based and digital computation", journal = j-SIGPLAN, volume = "47", number = "10", pages = "639--654", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384663", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Humans can perform many tasks with ease that remain difficult or impossible for computers. Crowdsourcing platforms like Amazon's Mechanical Turk make it possible to harness human-based computational power at an unprecedented scale. However, their utility as a general-purpose computational platform remains limited. The lack of complete automation makes it difficult to orchestrate complex or interrelated tasks. Scheduling more human workers to reduce latency costs real money, and jobs must be monitored and rescheduled when workers fail to complete their tasks. Furthermore, it is often difficult to predict the length of time and payment that should be budgeted for a given task. Finally, the results of human-based computations are not necessarily reliable, both because human skills and accuracy vary widely, and because workers have a financial incentive to minimize their effort. This paper introduces AutoMan, the first fully automatic crowdprogramming system. AutoMan integrates human-based computations into a standard programming language as ordinary function calls, which can be intermixed freely with traditional functions. This abstraction lets AutoMan programmers focus on their programming logic. An AutoMan program specifies a confidence level for the overall computation and a budget. The AutoMan runtime system then transparently manages all details necessary for scheduling, pricing, and quality control. AutoMan automatically schedules human tasks for each computation until it achieves the desired confidence level; monitors, reprices, and restarts human tasks as necessary; and maximizes parallelism across human workers while staying under budget.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Datta:2012:TVW, author = "Subhajit Datta and Renuka Sindhgatta and Bikram Sengupta", title = "Talk versus work: characteristics of developer collaboration on the {Jazz} platform", journal = j-SIGPLAN, volume = "47", number = "10", pages = "655--668", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384664", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "IBM's Jazz initiative offers a state-of-the-art collaborative development environment (CDE) facilitating developer interactions around interdependent units of work. In this paper, we analyze development data across two versions of a major IBM product developed on the Jazz platform, covering in total 19 months of development activity, including 17,000+ work items and 61,000+ comments made by more than 190 developers in 35 locations. By examining the relation between developer talk and work, we find evidence that developers maintain a reasonably high level of connectivity with peer developers with whom they share work dependencies, but the span of a developer's communication goes much beyond the known dependencies of his/her work items. Using multiple linear regression models, we find that the number of defects owned by a developer is impacted by the number of other developers (s)he is connected through talk, his/her interpersonal influence in the network of work dependencies, the number of work items (s)he comments on, and the number work items (s)he owns. These effects are maintained even after controlling for workload, role, work dependency, and connection related factors. We discuss the implications of our results for collaborative software development and project governance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Muulu:2012:SAI, author = "Kivan{\c{c}} Mu{\^u}lu and Yuriy Brun and Reid Holmes and Michael D. Ernst and David Notkin", title = "Speculative analysis of integrated development environment recommendations", journal = j-SIGPLAN, volume = "47", number = "10", pages = "669--682", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384665", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern integrated development environments make recommendations and automate common tasks, such as refactorings, auto-completions, and error corrections. However, these tools present little or no information about the consequences of the recommended changes. For example, a rename refactoring may: modify the source code without changing program semantics; modify the source code and (incorrectly) change program semantics; modify the source code and (incorrectly) create compilation errors; show a name collision warning and require developer input; or show an error and not change the source code. Having to compute the consequences of a recommendation --- either mentally or by making source code changes --- puts an extra burden on the developers. This paper aims to reduce this burden with a technique that informs developers of the consequences of code transformations. Using Eclipse Quick Fix as a domain, we describe a plug-in, Quick Fix Scout, that computes the consequences of Quick Fix recommendations. In our experiments, developers completed compilation-error removal tasks 10\% faster when using Quick Fix Scout than Quick Fix, although the sample size was not large enough to show statistical significance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Mayer:2012:ESI, author = "Clemens Mayer and Stefan Hanenberg and Romain Robbes and {\'E}ric Tanter and Andreas Stefik", title = "An empirical study of the influence of static type systems on the usability of undocumented software", journal = j-SIGPLAN, volume = "47", number = "10", pages = "683--702", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384666", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Abstract Although the study of static and dynamic type systems plays a major role in research, relatively little is known about the impact of type systems on software development. Perhaps one of the more common arguments for static type systems in languages such as Java or C++ is that they require developers to annotate their code with type names, which is thus claimed to improve the documentation of software. In contrast, one common argument against static type systems is that they decrease flexibility, which may make them harder to use. While these arguments are found in the literature, rigorous empirical evidence is lacking. We report on a controlled experiment where 27 subjects performed programming tasks on an undocumented API with a static type system (requiring type annotations) as well as a dynamic type system (which does not). Our results show that for some tasks, programmers had faster completion times using a static type system, while for others, the opposite held. We conduct an exploratory study to try and theorize why.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Tseng:2012:SDT, author = "Hung-Wei Tseng and Dean Michael Tullsen", title = "Software data-triggered threads", journal = j-SIGPLAN, volume = "47", number = "10", pages = "703--716", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384668", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The data-triggered threads (DTT) programming and execution model can increase parallelism and eliminate redundant computation. However, the initial proposal requires significant architecture support, which impedes existing applications and architectures from taking advantage of this model. This work proposes a pure software solution that supports the DTT model without any hardware support. This research uses a prototype compiler and runtime libraries running on top of existing machines. Several enhancements to the initial software implementation are presented, which further improve the performance. The software runtime system improves the performance of serial C SPEC benchmarks by 15\% on a Nehalem processor, but by over 7X over the full suite of single-thread applications. It is shown that the DTT model can work in conjunction with traditional parallelism. The DTT model provides up to 64X speedup over parallel applications exploiting traditional parallelism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Anderson:2012:ECP, author = "Zachary Anderson", title = "Efficiently combining parallel software using fine-grained, language-level, hierarchical resource management policies", journal = j-SIGPLAN, volume = "47", number = "10", pages = "717--736", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384669", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents Poli-C, a language extension, runtime library, and system daemon enabling fine-grained, language-level, hierarchical resource management policies. Poli-C is suitable for use in applications that compose parallel libraries, frameworks, and programs. In particular, we have added a powerful new statement to C for expressing resource limits and guarantees in such a way that programmers can set resource management policies even when the source code of parallel libraries and frameworks is not available. Poli-C enables application programmers to manage any resource exposed by the underlying OS, for example cores or IO bandwidth. Additionally, we have developed a domain-specific language for defining high-level resource management policies, and a facility for extending the kinds of resources that can be managed with our language extension. Finally, through a number of useful variations, our design offers a high degree of composability. We evaluate Poli-C by way of three case-studies: a scientific application, an image processing webserver, and a pair of parallel database join implementations. We found that using Poli-C yields efficiency gains that require the addition of only a few lines of code to applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Huang:2012:EPS, author = "Jeff Huang and Charles Zhang", title = "Execution privatization for scheduler-oblivious concurrent programs", journal = j-SIGPLAN, volume = "47", number = "10", pages = "737--752", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384670", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Making multithreaded execution less non-deterministic is a promising solution to address the difficulty of concurrent programming plagued by the non-deterministic thread scheduling. In fact, a vast category of concurrent programs are scheduler-oblivious: their execution is deterministic, regardless of the scheduling behavior. We present and formally prove a fundamental observation of the privatizability property for scheduler-oblivious programs, that paves the theoretical foundation for privatizing shared data accesses on a path segment. With privatization, the non-deterministic thread interleavings on the privatized accesses are isolated and as the consequence many concurrency problems are alleviated. We further present a path and context sensitive privatization algorithm that safely privatizes the program without introducing any additional program behavior. Our evaluation results show that the privatization opportunity pervasively exists in real world large complex concurrent systems. Through privatization, several real concurrency bugs are fixed and notable performance improvements are also achieved on benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Imam:2012:ITP, author = "Shams M. Imam and Vivek Sarkar", title = "Integrating task parallelism with actors", journal = j-SIGPLAN, volume = "47", number = "10", pages = "753--772", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384671", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces a unified concurrent programming model combining the previously developed Actor Model (AM) and the task-parallel Async-Finish Model (AFM). With the advent of multi-core computers, there is a renewed interest in programming models that can support a wide range of parallel programming patterns. The proposed unified model shows how the divide-and-conquer approach of the AFM and the no-shared mutable state and event-driven philosophy of the AM can be combined to solve certain classes of problems more efficiently and productively than either of the aforementioned models individually. The unified model adds actor creation and coordination to the AFM, while also enabling parallelization within actors. This paper describes two implementations of the unified model as extensions of Habanero-Java and Habanero-Scala. The unified model adds to the foundations of parallel programs, and to the tools available for the programmer to aid in productivity and performance while developing parallel software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Kastner:2012:VAM, author = "Christian K{\"a}stner and Klaus Ostermann and Sebastian Erdweg", title = "A variability-aware module system", journal = j-SIGPLAN, volume = "47", number = "10", pages = "773--792", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384673", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Module systems enable a divide and conquer strategy to software development. To implement compile-time variability in software product lines, modules can be composed in different combinations. However, this way, variability dictates a dominant decomposition. As an alternative, we introduce a variability-aware module system that supports compile-time variability inside a module and its interface. So, each module can be considered a product line that can be type checked in isolation. Variability can crosscut multiple modules. The module system breaks with the antimodular tradition of a global variability model in product-line development and provides a path toward software ecosystems and product lines of product lines developed in an open fashion. We discuss the design and implementation of such a module system on a core calculus and provide an implementation for C as part of the TypeChef project. Our implementation supports variability inside modules from {\tt \#ifdef} preprocessor directives and variable linking at the composition level. With our implementation, we type check all configurations of all modules of the open source product line Busybox with 811 compile-time options, perform linker check of all configurations, and report found type and linker errors --- without resorting to a brute-force strategy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Takikawa:2012:GTF, author = "Asumu Takikawa and T. Stephen Strickland and Christos Dimoulas and Sam Tobin-Hochstadt and Matthias Felleisen", title = "Gradual typing for first-class classes", journal = j-SIGPLAN, volume = "47", number = "10", pages = "793--810", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384674", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic type-checking and object-oriented programming often go hand-in-hand; scripting languages such as Python, Ruby, and JavaScript all embrace object-oriented (OO) programming. When scripts written in such languages grow and evolve into large programs, the lack of a static type discipline reduces maintainability. A programmer may thus wish to migrate parts of such scripts to a sister language with a static type system. Unfortunately, existing type systems neither support the flexible OO composition mechanisms found in scripting languages nor accommodate sound interoperation with untyped code. In this paper, we present the design of a gradual typing system that supports sound interaction between statically- and dynamically-typed units of class-based code. The type system uses row polymorphism for classes and thus supports mixin-based OO composition. To protect migration of mixins from typed to untyped components, the system employs a novel form of contracts that partially seal classes. The design comes with a theorem that guarantees the soundness of the type system even in the presence of untyped components.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Tardieu:2012:CK, author = "Olivier Tardieu and Nathaniel Nystrom and Igor Peshansky and Vijay Saraswat", title = "Constrained kinds", journal = j-SIGPLAN, volume = "47", number = "10", pages = "811--830", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384675", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern object-oriented languages such as X10 require a rich framework for types capable of expressing both value-dependency and genericity, and supporting pluggable, domain-specific extensions. In earlier work, we presented a framework for constrained types in object-oriented languages, parametrized by an underlying constraint system. Types are viewed as formulas {Cc} where C is the name of a class or an interface and c is a constraint on the immutable instance state (the properties) of C. Constraint systems are a very expressive framework for partial information. Many (value-)dependent type systems for object-oriented languages can be viewed as constrained types. This paper extends the constrained types approach to handle type-dependency (``genericity''). The key idea is to introduce constrained kinds: in the same way that constraints on values can be used to define constrained types, constraints on types can define constrained kinds. We develop a core programming language with constrained kinds. Generic types are supported by introducing type variables---literally, variables with ``type'' Type---and permitting programs to impose subtyping and equality constraints on such variables. We formalize the type-checking rules and establish soundness. While the language now intertwines constraints on types and values, its type system remains parametric in the choice of the value constraint system (language and solver). We demonstrate that constrained kinds are expressive and practical and sketch possible extensions with a discussion of the design and implementation of X10.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Cohen:2012:ET, author = "Michael Cohen and Haitao Steve Zhu and Emgin Ezgi Senem and Yu David Liu", title = "Energy types", journal = j-SIGPLAN, volume = "47", number = "10", pages = "831--850", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384676", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel type system to promote and facilitate energy-aware programming. Energy Types is built upon a key insight into today's energy-efficient systems and applications: despite the popular perception that energy and power can only be described in joules and watts, real-world energy management is often based on discrete phases and modes, which in turn can be reasoned about by type systems very effectively. A phase characterizes a distinct pattern of program workload, and a mode represents an energy state the program is expected to execute in. This paper describes a programming model where phases and modes can be intuitively specified by programmers or inferred by the compiler as type information. It demonstrates how a type-based approach to reasoning about phases and modes can help promote energy efficiency. The soundness of our type system and the invariants related to inter-phase and inter-mode interactions are rigorously proved. Energy Types is implemented as the core of a prototyped object-oriented language ET for smartphone programming. Preliminary studies show ET can lead to significant energy savings for Android Apps.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Wu:2012:EIS, author = "Bo Wu and Zhijia Zhao and Xipeng Shen and Yunlian Jiang and Yaoqing Gao and Raul Silvera", title = "Exploiting inter-sequence correlations for program behavior prediction", journal = j-SIGPLAN, volume = "47", number = "10", pages = "851--866", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384678", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Prediction of program dynamic behaviors is fundamental to program optimizations, resource management, and architecture reconfigurations. Most existing predictors are based on locality of program behaviors, subject to some inherent limitations. In this paper, we revisit the design philosophy and systematically explore a second source of clues: statistical correlations between the behavior sequences of different program entities. Concentrated on loops, it examines the correlations' existence, strength, and values in enhancing the design of program behavior predictors. It creates the first taxonomy of program behavior sequence patterns. It develops a new form of predictors, named sequence predictors, to effectively translate the correlations into large-scope, proactive predictions of program behavior sequences. It demonstrates the usefulness of the prediction in dynamic version selection and loop importance estimation, showing 19\% average speedup on a number of real-world utility applications. By taking scope and timing of behavior prediction as the first-order design objectives, the new approach overcomes limitations of existing program behavior predictors, opening up many new opportunities for runtime optimizations at various layers of computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Ausiello:2012:KCC, author = "Giorgio Ausiello and Camil Demetrescu and Irene Finocchi and Donatella Firmani", title = "$k$-Calling context profiling", journal = j-SIGPLAN, volume = "47", number = "10", pages = "867--878", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384679", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Calling context trees are one of the most fundamental data structures for representing the interprocedural control flow of a program, providing valuable information for program understanding and optimization. Nodes of a calling context tree associate performance metrics to whole distinct paths in the call graph starting from the root function. However, no explicit information is provided for detecting short hot sequences of activations, which may be a better optimization target in large modular programs where groups of related functions are reused in many different parts of the code. Furthermore, calling context trees can grow prohibitively large in some scenarios. Another classical approach, called edge profiling, collects performance metrics for caller-callee pairs in the call graph, allowing it to detect hot paths of fixed length one. We study a generalization of edge and context-sensitive profiles by introducing a novel data structure called k-calling context forest (k-CCF). Nodes in a k-CCF associate performance metrics to paths of length at most k that lead to each distinct routine of the program, providing edge profiles for k=1, full context-sensitive profiles for k equal to infinity, as well as any other intermediate point in the spectrum. We study the properties of the k-CCF both theoretically and experimentally on a large suite of prominent Linux applications, showing how to construct it efficiently and discussing its relationships with the calling context tree. Our experiments show that the k-CCF can provide effective space-accuracy tradeoffs for interprocedural contextual profiling, yielding useful clues to the hot spots of a program that may be hidden in a calling context tree and using less space for small values of k, which appear to be the most interesting in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Huang:2012:RRC, author = "Wei Huang and Ana Milanova and Werner Dietl and Michael D. Ernst", title = "{Reim \& ReImInfer}: checking and inference of reference immutability and method purity", journal = j-SIGPLAN, volume = "47", number = "10", pages = "879--896", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384680", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reference immutability ensures that a reference is not used to modify the referenced object, and enables the safe sharing of object structures. A pure method does not cause side-effects on the objects that existed in the pre-state of the method execution. Checking and inference of reference immutability and method purity enables a variety of program analyses and optimizations. We present ReIm, a type system for reference immutability, and ReImInfer, a corresponding type inference analysis. The type system is concise and context-sensitive. The type inference analysis is precise and scalable, and requires no manual annotations. In addition, we present a novel application of the reference immutability type system: method purity inference. To support our theoretical results, we implemented the type system and the type inference analysis for Java. We include a type checker to verify the correctness of the inference result. Empirical results on Java applications and libraries of up to 348kLOC show that our approach achieves both scalability and precision.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Bao:2012:WBS, author = "Tao Bao and Yunhui Zheng and Xiangyu Zhang", title = "White box sampling in uncertain data processing enabled by program analysis", journal = j-SIGPLAN, volume = "47", number = "10", pages = "897--914", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384681", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sampling is a very important and low-cost approach to uncertain data processing, in which output variations caused by input errors are sampled. Traditional methods tend to treat a program as a blackbox. In this paper, we show that through program analysis, we can expose the internals of sample executions so that the process can become more selective and focused. In particular, we develop a sampling runtime that can selectively sample in input error bounds to expose discontinuity in output functions. It identifies all the program factors that can potentially lead to discontinuity and hash the values of such factors during execution in a cost-effective way. The hash values are used to guide the sampling process. Our results show that the technique is very effective for real-world programs. It can achieve the precision of a high sampling rate with the cost of a lower sampling rate.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Lucas:2012:DPM, author = "Charles Lucas and Sebastian Elbaum and David S. Rosenblum", title = "Detecting problematic message sequences and frequencies in distributed systems", journal = j-SIGPLAN, volume = "47", number = "10", pages = "915--926", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384683", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Testing the components of a distributed system is challenging as it requires consideration of not just the state of a component, but also the sequence of messages it may receive from the rest of the system or the environment. Such messages may vary in type and content, and more particularly, in the frequency at which they are generated. All of these factors, in the right combination, may lead to faulty behavior. In this paper we present an approach to address these challenges by systematically analyzing a component in a distributed system to identify specific message sequences and frequencies at which a failure can occur. At the core of the analysis is the generation of a test driver that defines the space of message sequences to be generated, the exploration of that space through the use of dynamic symbolic execution, and the timing and analysis of the generated tests to identify problematic frequencies. We implemented our approach in the context of the popular Robotic Operating System and investigated its application to three systems of increasing complexity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Gu:2012:RDK, author = "Zhongxian Gu and Earl T. Barr and Drew Schleck and Zhendong Su", title = "Reusing debugging knowledge via trace-based bug search", journal = j-SIGPLAN, volume = "47", number = "10", pages = "927--942", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384684", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Some bugs, among the millions that exist, are similar to each other. One bug-fixing tactic is to search for similar bugs that have been reported and resolved in the past. A fix for a similar bug can help a developer understand a bug, or even directly fix it. Studying bugs with similar symptoms, programmers may determine how to detect or resolve them. To speed debugging, we advocate the systematic capture and reuse of debugging knowledge, much of which is currently wasted. The core challenge here is how to search for similar bugs. To tackle this problem, we exploit semantic bug information in the form of execution traces, which precisely capture bug semantics. This paper introduces novel tool and language support for semantically querying and analyzing bugs. We describe OSCILLOSCOPE, an Eclipse plugin, that uses a bug trace to exhaustively search its database for similar bugs and return their bug reports. OSCILLOSCOPE displays the traces of the bugs it returns against the trace of the target bug, so a developer can visually examine the quality of the matches. OSCILLOSCOPE rests on our bug query language (BQL), a flexible query language over traces. To realize OSCILLOSCOPE, we developed an open infrastructure that consists of a trace collection engine, BQL, a Hadoop-based query engine for BQL, a trace-indexed bug database, as well as a web-based frontend. OSCILLOSCOPE records and uploads bug traces to its infrastructure; it does so automatically when a JUnit test fails. We evaluated OSCILLOSCOPE on bugs collected from popular open-source projects. We show that OSCILLOSCOPE accurately and efficiently finds similar bugs, some of which could have been immediately used to fix open bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Strickland:2012:CIR, author = "T. Stephen Strickland and Sam Tobin-Hochstadt and Robert Bruce Findler and Matthew Flatt", title = "Chaperones and impersonators: run-time support for reasonable interposition", journal = j-SIGPLAN, volume = "47", number = "10", pages = "943--962", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384685", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Chaperones and impersonators provide run-time support for interposing on primitive operations such as function calls, array access and update, and structure field access and update. Unlike most interposition support, chaperones and impersonators are restricted so that they constrain the behavior of the interposing code to reasonable interposition, which in practice preserves the abstraction mechanisms and reasoning that programmers and compiler analyses rely on. Chaperones and impersonators are particularly useful for implementing contracts, and our implementation in Racket allows us to improve both the expressiveness and the performance of Racket's contract system. Specifically, contracts on mutable data can be enforced without changing the API to that data; contracts on large data structures can be checked lazily on only the accessed parts of the structure; contracts on objects and classes can be implemented with lower overhead; and contract wrappers can preserve object equality where appropriate. With this extension, gradual typing systems, such as Typed Racket, that rely on contracts for interoperation with untyped code can now pass mutable values safely between typed and untyped modules.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Solodkyy:2012:OET, author = "Yuriy Solodkyy and Gabriel {Dos Reis} and Bjarne Stroustrup", title = "Open and efficient type switch for {C++}", journal = j-SIGPLAN, volume = "47", number = "10", pages = "963--982", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384686", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Selecting operations based on the run-time type of an object is key to many object-oriented and functional programming techniques. We present a technique for implementing open and efficient type switching on hierarchical extensible data types. The technique is general and copes well with C++ multiple inheritance. To simplify experimentation and gain realistic performance using production-quality compilers and tool chains, we implement a type switch construct as an ISO C++11 library, called Mach7. This library-only implementation provides concise notation and outperforms the visitor design pattern, commonly used for case analysis on types in object-oriented programming. For closed sets of types, its performance roughly equals equivalent code in functional languages, such as OCaml and Haskell. The type-switching code is easier to use and is more expressive than hand-coded visitors are. The library is non-intrusive and circumvents most of the extensibility restrictions typical of the visitor design pattern. It was motivated by applications involving large, typed, abstract syntax trees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Tamayo:2012:UBD, author = "Juan M. Tamayo and Alex Aiken and Nathan Bronson and Mooly Sagiv", title = "Understanding the behavior of database operations under program control", journal = j-SIGPLAN, volume = "47", number = "10", pages = "983--996", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384688", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Applications that combine general program logic with persistent databases (e.g., three-tier applications) often suffer large performance penalties from poor use of the database. We introduce a program analysis technique that combines information flow in the program with commutativity analysis of its database operations to produce a unified dependency graph for database statements, which provides programmers with a high-level view of how costly database operations are and how they are connected in the program. As an example application of our analysis we describe three optimizations that can be discovered by examining the structure of the dependency graph; each helps remove communication latency from the critical path of a multi-tier system. We implement our technique in a tool for Java applications using JDBC and experimentally validate it using the multi-tier component of the Dacapo benchmark.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Mishne:2012:TBS, author = "Alon Mishne and Sharon Shoham and Eran Yahav", title = "Typestate-based semantic code search over partial programs", journal = j-SIGPLAN, volume = "47", number = "10", pages = "997--1016", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384689", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel code search approach for answering queries focused on API-usage with code showing how the API should be used. To construct a search index, we develop new techniques for statically mining and consolidating temporal API specifications from code snippets. In contrast to existing semantic-based techniques, our approach handles partial programs in the form of code snippets. Handling snippets allows us to consume code from various sources such as parts of open source projects, educational resources (e.g. tutorials), and expert code sites. To handle code snippets, our approach (i) extracts a possibly partial temporal specification from each snippet using a relatively precise static analysis tracking a generalized notion of typestate, and (ii) consolidates the partial temporal specifications, combining consistent partial information to yield consolidated temporal specifications, each of which captures a full(er) usage scenario. To answer a search query, we define a notion of relaxed inclusion matching a query against temporal specifications and their corresponding code snippets. We have implemented our approach in a tool called PRIME and applied it to search for API usage of several challenging APIs. PRIME was able to analyze and consolidate thousands of snippets per tested API, and our results indicate that the combination of a relatively precise analysis and consolidation allowed PRIME to answer challenging queries effectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{Xu:2012:FRD, author = "Guoqing Xu", title = "Finding reusable data structures", journal = j-SIGPLAN, volume = "47", number = "10", pages = "1017--1034", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384690", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A big source of run-time performance problems in large-scale, object-oriented applications is the frequent creation of data structures (by the same allocation site) whose lifetimes are disjoint, and whose shapes and data content are always the same. Constructing these data structures and computing the same data values many times is expensive; significant performance improvements can be achieved by reusing their instances, shapes, and/or data values rather than reconstructing them. This paper presents a run-time technique that can be used to help programmers find allocation sites that create such data structures to improve performance. At the heart of the technique are three reusability definitions and novel summarization approaches that compute summaries for data structures based on these definitions. The computed summaries are used subsequently to find data structures that have disjoint lifetimes, and/or that have the same shapes and content. We have implemented this technique in the Jikes RVM and performed extensive studies on large-scale, real-world programs. We describe our experience using six case studies, in which we have achieved large performance gains by fixing problems reported by our tool.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '12 conference proceedings.", } @Article{OCallahan:2012:WYW, author = "Robert O'Callahan", title = "Why is your {Web} browser using so much memory?", journal = j-SIGPLAN, volume = "47", number = "11", pages = "1--2", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2258998", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Browsers are the operating systems of the Web. They support a vast universe of applications written in a modern garbage-collected programming language. Browsers expose a rich platform API mostly implemented in C++. Browsers are also consumer software with low switching costs in an intensely competitive market. Thus in addition to standard requirements such as maximizing throughput and minimizing latency, browsers have to consider issues like-when the user closes a window while watching Task Manager, they want to see memory usage go down. Browsers have to compete to minimize memory usage even for poorly written applications. In this talk I will elucidate these requirements and describe how Firefox and other browsers address them. I will pay particular attention to issues that we don't know how to solve, and that could benefit from research attention.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhou:2012:MMM, author = "Jin Zhou and Brian Demsky", title = "Memory management for many-core processors with software configurable locality policies", journal = j-SIGPLAN, volume = "47", number = "11", pages = "3--14", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259000", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "As processors evolve towards higher core counts, architects will develop more sophisticated memory systems to satisfy the cores' increasing thirst for memory bandwidth. Early many-core processor designs suggest that future memory systems will likely include multiple controllers and distributed cache coherence protocols. Many-core processors that expose memory locality policies to the software system provide opportunities for automatic tuning that can achieve significant performance benefits. Managed languages typically provide a simple heap abstraction. This paper presents techniques that bridge the gap between the simple heap abstraction of modern languages and the complicated memory systems of future processors. We present a NUMA-aware approach to garbage collection that balances the competing concerns of data locality and heap utilization to improve performance. We combine a lightweight approach for measuring an application's memory behavior with an online, adaptive algorithm for tuning the cache to optimize it for the specific application's behaviors. We have implemented our garbage collector and cache tuning algorithm and present results on a 64-core TILEPro64 processor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lyberis:2012:MMA, author = "Spyros Lyberis and Polyvios Pratikakis and Dimitrios S. Nikolopoulos and Martin Schulz and Todd Gamblin and Bronis R. de Supinski", title = "The {Myrmics} memory allocator: hierarchical, message-passing allocation for global address spaces", journal = j-SIGPLAN, volume = "47", number = "11", pages = "15--24", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259001", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Constantly increasing hardware parallelism poses more and more challenges to programmers and language designers. One approach to harness the massive parallelism is to move to task-based programming models that rely on runtime systems for dependency analysis and scheduling. Such models generally benefit from the existence of a global address space. This paper presents the parallel memory allocator of the Myrmics runtime system, in which multiple allocator instances organized in a tree hierarchy cooperate to implement a global address space with dynamic region support on distributed memory machines. The Myrmics hierarchical memory allocator is step towards improved productivity and performance in parallel programming. Productivity is improved through the use of dynamic regions in a global address space, which provide a convenient shared memory abstraction for dynamic and irregular data structures. Performance is improved through scaling on manycore systems without system-wide cache coherency. We evaluate the stand-alone allocator on an MPI-based x86 cluster and find that it scales well for up to 512 worker cores, while it can outperform Unified Parallel C by a factor of 3.7-10.7x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Maas:2012:GOO, author = "Martin Maas and Philip Reames and Jeffrey Morlan and Krste Asanovi{\'c} and Anthony D. Joseph and John Kubiatowicz", title = "{GPUs} as an opportunity for offloading garbage collection", journal = j-SIGPLAN, volume = "47", number = "11", pages = "25--36", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259002", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "GPUs have become part of most commodity systems. Nonetheless, they are often underutilized when not executing graphics-intensive or special-purpose numerical computations, which are rare in consumer workloads. Emerging architectures, such as integrated CPU/GPU combinations, may create an opportunity to utilize these otherwise unused cycles for offloading traditional systems tasks. Garbage collection appears to be a particularly promising candidate for offloading, due to the popularity of managed languages on consumer devices. We investigate the challenges for offloading garbage collection to a GPU, by examining the performance trade-offs for the mark phase of a mark \& sweep garbage collector. We present a theoretical analysis and an algorithm that demonstrates the feasibility of this approach. We also discuss a number of algorithmic design trade-offs required to leverage the strengths and capabilities of the GPU hardware. Our algorithm has been integrated into the Jikes RVM and we present promising performance results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yang:2012:BRF, author = "Xi Yang and Stephen M. Blackburn and Daniel Frampton and Antony L. Hosking", title = "Barriers reconsidered, friendlier still!", journal = j-SIGPLAN, volume = "47", number = "11", pages = "37--48", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259004", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Read and write barriers mediate access to the heap allowing the collector to control and monitor mutator actions. For this reason, barriers are a powerful tool in the design of any heap management algorithm, but the prevailing wisdom is that they impose significant costs. However, changes in hardware and workloads make these costs a moving target. Here, we measure the cost of a range of useful barriers on a range of modern hardware and workloads. We confirm some old results and overturn others. We evaluate the microarchitectural sensitivity of barrier performance and the differences among benchmark suites. We also consider barriers in context, focusing on their behavior when used in combination, and investigate a known pathology and evaluate solutions. Our results show that read and write barriers have average overheads as low as 5.4\% and 0.9\% respectively. We find that barrier overheads are more exposed on the workload provided by the modern DaCapo benchmarks than on old SPECjvm98 benchmarks. Moreover, there are differences in barrier behavior between in-order and out-of- order machines, and their respective memory subsystems, which indicate different barrier choices for different platforms. These changing costs mean that algorithm designers need to reconsider their design choices and the nature of their resulting algorithms in order to exploit the opportunities presented by modern hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sivaramakrishnan:2012:ERB, author = "KC Sivaramakrishnan and Lukasz Ziarek and Suresh Jagannathan", title = "Eliminating read barriers through procrastination and cleanliness", journal = j-SIGPLAN, volume = "47", number = "11", pages = "49--60", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259005", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Managed languages typically use read barriers to interpret forwarding pointers introduced to keep track of copied objects. For example, in a multicore environment with thread-local heaps and a global, shared heap, an object initially allocated on a local heap may be copied to a shared heap if it becomes the source of a store operation whose target location resides on the shared heap. As part of the copy operation, a forwarding pointer may be established in the original object to point to the copied object. This level of indirection avoids the need to update all of the references to the object that has been copied. In this paper, we consider the design of a managed runtime that eliminates read barriers. Our design is premised on the availability of a sufficient degree of concurrency to stall operations that would otherwise necessitate the copy. Stalled actions are deferred until the next local collection, avoiding exposing forwarding pointers to the mutator. In certain important cases, procrastination is unnecessary --- lightweight runtime techniques can sometimes be used to allow objects to be eagerly copied when their set of incoming references is known, or when it can be determined that having multiple copies would not violate program semantics. We evaluate our techniques on 3 platforms: a 16-core AMD64 machine, a 48-core Intel SCC, and an 864-core Azul Vega 3. Experimental results over a range of parallel benchmarks indicate that our approach leads to notable performance gains (20 --- 32\% on average) without incurring any additional complexity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Iyengar:2012:SCP, author = "Balaji Iyengar and Edward Gehringer and Michael Wolf and Karthikeyan Manivannan", title = "Scalable concurrent and parallel mark", journal = j-SIGPLAN, volume = "47", number = "11", pages = "61--72", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259006", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Parallel marking algorithms use multiple threads to walk through the object heap graph and mark each reachable object as live. Parallel marker threads mark an object ``live'' by atomically setting a bit in a mark-bitmap or a bit in the object header. Most of these parallel algorithms strive to improve the marking throughput by using work-stealing algorithms for load-balancing and to ensure that all participating threads are kept busy. A purely ``processor-centric'' load-balancing approach in conjunction with a need to atomically set the mark bit, results in significant contention during parallel marking. This limits the scalability and throughput of parallel marking algorithms. We describe a new non-blocking and lock-free, work-sharing algorithm, the primary goal being to reduce contention during atomic updates of the mark-bitmap by parallel task-threads. Our work-sharing mechanism uses the address of a word in the mark-bitmap as the key to stripe work among parallel task-threads, with only a subset of the task-threads working on each stripe. This filters out most of the contention during parallel marking with 20\% improvements in performance. In case of concurrent and on-the-fly collector algorithms, mutator threads also generate marking-work for the marking task-threads. In these schemes, mutator threads are also provided with thread-local marking stacks where they collect references to potentially ``gray'' objects, i.e., objects that haven't been ``marked-through'' by the collector. We note that since this work is generated by mutators when they reference these objects, there is a high likelihood that these objects continue to be present in the processor cache. We describe and evaluate a scheme to distribute mutator generated marking work among the collector's task-threads that is cognizant of the processor and cache topology. We prototype both our algorithms within the C4 [28] collector that ships as part of an industrial strength JVM for the Linux-X86 platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shahriyar:2012:CGR, author = "Rifat Shahriyar and Stephen M. Blackburn and Daniel Frampton", title = "Down for the count? {Getting} reference counting back in the ring", journal = j-SIGPLAN, volume = "47", number = "11", pages = "73--84", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259008", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Reference counting and tracing are the two fundamental approaches that have underpinned garbage collection since 1960. However, despite some compelling advantages, reference counting is almost completely ignored in implementations of high performance systems today. In this paper we take a detailed look at reference counting to understand its behavior and to improve its performance. We identify key design choices for reference counting and analyze how the behavior of a wide range of benchmarks might affect design decisions. As far as we are aware, this is the first such quantitative study of reference counting. We use insights gleaned from this analysis to introduce a number of optimizations that significantly improve the performance of reference counting. We find that an existing modern implementation of reference counting has an average 30\% overhead compared to tracing, and that in combination, our optimizations are able to completely eliminate that overhead. This brings the performance of reference counting on par with that of a well tuned mark-sweep collector. We keep our in-depth analysis of reference counting as general as possible so that it may be useful to other garbage collector implementers. Our finding that reference counting can be made directly competitive with well tuned mark-sweep should shake the community's prejudices about reference counting and perhaps open new opportunities for exploiting reference counting's strengths, such as localization and immediacy of reclamation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Iyengar:2012:CWF, author = "Balaji Iyengar and Gil Tene and Michael Wolf and Edward Gehringer", title = "The {Collie}: a wait-free compacting collector", journal = j-SIGPLAN, volume = "47", number = "11", pages = "85--96", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259009", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "We describe the Collie collector, a fully concurrent compacting collector that uses transactional memory techniques to achieve wait-free compaction. The collector uses compaction as the primary means of reclaiming unused memory, and performs ``individual object transplantations'' as transactions. We introduce new terms and requirements useful for analyzing concurrent relocating collectors, including definitions of referrer sets, object transplantation and the notion of individually transplantable objects. The Collie collector builds on these terms and on a detailed analysis of an object's legal states during compaction. Collie uses a combination of read barriers, write barriers and transactional memory operations. Its read-barrier supports fast, direct object referencing while using a bound, constant time, wait- free triggering path. Collie thereby avoids the constant indirection cost of Brooks [9] style barriers or handle-based heaps [25]. Collie is demonstrated using speculative multi-address atomicity [11], a form of hardware transactional memory supported by the Azul Vega architecture [2]. We evaluate the Collie collector on the Azul platform, on which previous concurrent collectors such as the Pauseless Collector [12] and its generational variant [30] have been commercially available for several years. We discuss Collie's performance while running sustained workloads, and compare it to the Pauseless collector on the same platform. The Collie collector provides significant MMU [5] improvements even in the 1-msec time windows compared to the Pauseless collector. At the same time, it matches Pauseless in throughput and in the ability to scale to large heap sizes. We believe that the Collie collector is the first garbage collector to leverage hardware-assisted transactional memory. While Collie directly leverages Vega's speculative multi-address atomicity feature (SMA) [11], its design can be easily adapted to other hardware- assisted transactional memory systems. Specifically, the upcoming Intel TSX instruction set extensions [21] include capabilities similar to SMA. We expect Collie to be easily implementable on future commodity servers based on Intel Haswell processors and following processor generations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sewe:2012:NSI, author = "Andreas Sewe and Mira Mezini and Aibek Sarimbekov and Danilo Ansaloni and Walter Binder and Nathan Ricci and Samuel Z. Guyer", title = "{{\tt New Scala() instanceof}} {Java}: a comparison of the memory behaviour of {Java} and {Scala} programs", journal = j-SIGPLAN, volume = "47", number = "11", pages = "97--108", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259010", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "While often designed with a single language in mind, managed runtimes like the Java virtual machine (JVM) have become the target of not one but many languages, all of which benefit from the runtime's services. One of these services is automatic memory management. In this paper, we compare and contrast the memory behaviour of programs written in Java and Scala, respectively, two languages which both target the same platform: the JVM. We both analyze core object demographics like object lifetimes as well as secondary properties of objects like their associated monitors and identity hash-codes. We find that objects in Scala programs have lower survival rates and higher rates of immutability, which is only partly explained by the memory behaviour of objects representing closures or boxed primitives. Other metrics vary more by benchmark than language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gu:2012:GTC, author = "Xiaoming Gu and Chen Ding", title = "A generalized theory of collaborative caching", journal = j-SIGPLAN, volume = "47", number = "11", pages = "109--120", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259012", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Collaborative caching allows software to use hints to influence cache management in hardware. Previous theories have shown that such hints observe the inclusion property and can obtain optimal caching if the access sequence and the cache size are known ahead of time. Previously, the interface of a cache hint is limited, e.g., a binary choice between LRU and MRU. In this paper, we generalize the hint interface, where a hint is a number encoding a priority. We show the generality in a hierarchical relation where collaborative caching subsumes non-collaborative caching, and within collaborative caching, the priority hint subsumes the previous binary hint. We show two theoretical results for the general hint. The first is a new cache replacement policy, priority LRU, which permits the complete range of choices between MRU and LRU. We prove a new type of inclusion property---non-uniform inclusion---and give a one-pass algorithm to compute the miss rate for all cache sizes. Second, we show that priority hints can enable the use of the same hints to obtain optimal caching for all cache sizes, without having to know the cache size beforehand.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nasre:2012:ESC, author = "Rupesh Nasre", title = "Exploiting the structure of the constraint graph for efficient points-to analysis", journal = j-SIGPLAN, volume = "47", number = "11", pages = "121--132", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259013", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Points-to analysis is a key compiler analysis. Several memory related optimizations use points-to information to improve their effectiveness. Points-to analysis is performed by building a constraint graph of pointer variables and dynamically updating it to propagate more and more points-to information across its subset edges. So far, the structure of the constraint graph has been only trivially exploited for efficient propagation of information, e.g., in identifying cyclic components or to propagate information in topological order. We perform a careful study of its structure and propose a new inclusion-based flow-insensitive context-sensitive points-to analysis algorithm based on the notion of dominant pointers. We also propose a new kind of pointer-equivalence based on dominant pointers which provides significantly more opportunities for reducing the number of pointers tracked during the analysis. Based on this hitherto unexplored form of pointer-equivalence, we develop a new context-sensitive flow insensitive points-to analysis algorithm which uses incremental dominator update to efficiently compute points-to information. Using a large suite of programs consisting of SPEC 2000 benchmarks and five large open source programs we show that our points-to analysis is 88\% faster than BDD-based Lazy Cycle Detection and $ 2 \times $ faster than Deep Propagation. We argue that our approach of detecting dominator-based pointer-equivalence is a key to improve points-to analysis efficiency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Inoue:2012:ISC, author = "Hiroshi Inoue and Toshio Nakatani", title = "Identifying the sources of cache misses in {Java} programs without relying on hardware counters", journal = j-SIGPLAN, volume = "47", number = "11", pages = "133--142", month = nov, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2426642.2259014", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jan 10 08:55:30 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '12 conference proceedings.", abstract = "Cache miss stalls are one of the major sources of performance bottlenecks for multicore processors. A Hardware Performance Monitor (HPM) in the processor is useful for locating the cache misses, but is rarely used in the real world for various reasons. It would be better to find a simple approach to locate the sources of cache misses and apply runtime optimizations without relying on an HPM. This paper shows that pointer dereferencing in hot loops is a major source of cache misses in Java programs. Based on this observation, we devised a new approach to identify the instructions and objects that cause frequent cache misses. Our heuristic technique effectively identifies the majority of the cache misses in typical Java programs by matching the hot loops to simple idiomatic code patterns. On average, our technique selected only 2.8\% of the load and store instructions generated by the JIT compiler and these instructions accounted for 47\% of the L1D cache misses and 49\% of the L2 cache misses caused by the JIT-compiled code. To prove the effectiveness of our technique in compiler optimizations, we prototyped object placement optimizations, which align objects in cache lines or collocate paired objects in the same cache line to reduce cache misses. For comparison, we also implemented the same optimizations based on the accurate information obtained from the HPM. Our results showed that our heuristic approach was as effective as the HPM-based approach and achieved comparable performance improvements in the {\tt SPECjbb2005} and {\tt SPECpower\_ssj2008} benchmark programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Farmer:2012:HMP, author = "Andrew Farmer and Andy Gill and Ed Komp and Neil Sculthorpe", title = "The {HERMIT} in the machine: a plugin for the interactive transformation of {GHC} core language programs", journal = j-SIGPLAN, volume = "47", number = "12", pages = "1--12", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364508", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "The importance of reasoning about and refactoring programs is a central tenet of functional programming. Yet our compilers and development toolchains only provide rudimentary support for these tasks. This paper introduces a programmatic and compiler-centric interface that facilitates refactoring and equational reasoning. To develop our ideas, we have implemented HERMIT, a toolkit enabling informal but systematic transformation of Haskell programs from inside the Glasgow Haskell Compiler's optimization pipeline. With HERMIT, users can experiment with optimizations and equational reasoning, while the tedious heavy lifting of performing the actual transformations is done for them. HERMIT provides a transformation API that can be used to build higher-level rewrite tools. One use-case is prototyping new optimizations as clients of this API before being committed to the GHC toolchain. We describe a HERMIT application --- a read-eval-print shell for performing transformations using HERMIT. We also demonstrate using this shell to prototype an optimization on a specific example, and report our initial experiences and remaining challenges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Adams:2012:TYB, author = "Michael D. Adams and Thomas M. DuBuisson", title = "Template your boilerplate: using {Template Haskell} for efficient generic programming", journal = j-SIGPLAN, volume = "47", number = "12", pages = "13--24", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Generic programming allows the concise expression of algorithms that would otherwise require large amounts of handwritten code. A number of such systems have been developed over the years, but a common drawback of these systems is poor runtime performance relative to handwritten, non-generic code. Generic-programming systems vary significantly in this regard, but few consistently match the performance of handwritten code. This poses a dilemma for developers. Generic-programming systems offer concision but cost performance. Handwritten code offers performance but costs concision. This paper explores the use of Template Haskell to achieve the best of both worlds. It presents a generic-programming system for Haskell that provides both the concision of other generic-programming systems and the efficiency of handwritten code. Our system gives the programmer a high-level, generic-programming interface, but uses Template Haskell to generate efficient, non-generic code that outperforms existing generic-programming systems for Haskell. This paper presents the results of benchmarking our system against both handwritten code and several other generic-programming systems. In these benchmarks, our system matches the performance of handwritten code while other systems average anywhere from two to twenty times slower.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lippmeier:2012:GPA, author = "Ben Lippmeier and Manuel Chakravarty and Gabriele Keller and Simon Peyton Jones", title = "Guiding parallel array fusion with indexed types", journal = j-SIGPLAN, volume = "47", number = "12", pages = "25--36", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "We present a refined approach to parallel array fusion that uses indexed types to specify the internal representation of each array. Our approach aids the client programmer in reasoning about the performance of their program in terms of the source code. It also makes the intermediate code easier to transform at compile-time, resulting in faster compilation and more reliable runtimes. We demonstrate how our new approach improves both the clarity and performance of several end-user written programs, including a fluid flow solver and an interpolator for volumetric data.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Keller:2012:VA, author = "Gabriele Keller and Manuel M. T. Chakravarty and Roman Leshchinskiy and Ben Lippmeier and Simon Peyton Jones", title = "Vectorisation avoidance", journal = j-SIGPLAN, volume = "47", number = "12", pages = "37--48", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Flattening nested parallelism is a vectorising code transform that converts irregular nested parallelism into flat data parallelism. Although the result has good asymptotic performance, flattening thoroughly restructures the code. Many intermediate data structures and traversals are introduced, which may or may not be eliminated by subsequent optimisation. We present a novel program analysis to identify parts of the program where flattening would only introduce overhead, without appropriate gain. We present empirical evidence that avoiding vectorisation in these cases leads to more efficient programs than if we had applied vectorisation and then relied on array fusion to eliminate intermediates from the resulting code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jeuring:2012:TTC, author = "Johan Jeuring and Patrik Jansson and Cl{\'a}udio Amaral", title = "Testing type class laws", journal = j-SIGPLAN, volume = "47", number = "12", pages = "49--60", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "The specification of a class in Haskell often starts with stating, in comments, the laws that should be satisfied by methods defined in instances of the class, followed by the type of the methods of the class. This paper develops a framework that supports testing such class laws using QuickCheck. Our framework is a light-weight class law testing framework, which requires a limited amount of work per class law, and per datatype for which the class law is tested. We also show how to test class laws with partially-defined values. Using partially-defined values, we show that the standard lazy and strict implementations of the state monad do not satisfy the expected laws.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Duregaard:2012:FFE, author = "Jonas Dureg{\aa}rd and Patrik Jansson and Meng Wang", title = "{Feat}: functional enumeration of algebraic types", journal = j-SIGPLAN, volume = "47", number = "12", pages = "61--72", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364515", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "In mathematics, an enumeration of a set S is a bijective function from (an initial segment of) the natural numbers to S. We define ``functional enumerations'' as efficiently computable such bijections. This paper describes a theory of functional enumeration and provides an algebra of enumerations closed under sums, products, guarded recursion and bijections. We partition each enumerated set into numbered, finite subsets. We provide a generic enumeration such that the number of each part corresponds to the size of its values (measured in the number of constructors). We implement our ideas in a Haskell library called testing-feat, and make the source code freely available. Feat provides efficient ``random access'' to enumerated values. The primary application is property-based testing, where it is used to define both random sampling (for example QuickCheck generators) and exhaustive enumeration (in the style of SmallCheck). We claim that functional enumeration is the best option for automatically generating test cases from large groups of mutually recursive syntax tree types. As a case study we use Feat to test the pretty-printer of the Template Haskell library (uncovering several bugs).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Claessen:2012:SSF, author = "Koen Claessen", title = "Shrinking and showing functions: (functional pearl)", journal = j-SIGPLAN, volume = "47", number = "12", pages = "73--80", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Although quantification over functions in QuickCheck properties has been supported from the beginning, displaying and shrinking them as counter examples has not. The reason is that in general, functions are infinite objects, which means that there is no sensible show function for them, and shrinking an infinite object within a finite number of steps seems impossible. This paper presents a general technique with which functions as counter examples can be shrunk to finite objects, which can then be displayed to the user. The approach turns out to be practically usable, which is shown by a number of examples. The two main limitations are that higher-order functions cannot be dealt with, and it is hard to deal with terms that contain functions as subterms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Allen:2012:SDR, author = "Wyatt Allen and Martin Erwig", title = "{Surveyor}: a {DSEL} for representing and analyzing strongly typed surveys", journal = j-SIGPLAN, volume = "47", number = "12", pages = "81--90", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Polls and surveys are increasingly employed to gather information about attitudes and experiences of all kinds of populations and user groups. The ultimate purpose of a survey is to identify trends and relationships that can inform decision makers. To this end, the data gathered by a survey must be appropriately analyzed. Most of the currently existing tools focus on the user interface aspect of the data collection task, but pay little attention to the structure and type of the collected data, which are usually represented as potentially tag-annotated, but otherwise unstructured, plain text. This makes the task of writing data analysis programs often difficult and error-prone, whereas a typed data representation could support the writing of type-directed data analysis tools that would enjoy the many benefits of static typing. In this paper we present Surveyor, a DSEL that allows the compositional construction of typed surveys, where the types describe the structure of the data to be collected. A survey can be run to gather typed data, which can then be subjected to analysis tools that are built using Surveyor's typed combinators. Altogether the Surveyor DSEL realizes a strongly typed and type-directed approach to data gathering and analysis. The implementation of our DSEL is based on GADTs to allow a flexible, yet strongly typed representation of surveys. Moreover, the implementation employs the Scrap-Your-Boilerplate library to facilitate the type-dependent traversal, extraction, and combination of data gathered from surveys.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Winograd-Cort:2012:WIE, author = "Daniel Winograd-Cort and Paul Hudak", title = "Wormholes: introducing effects to {FRP}", journal = j-SIGPLAN, volume = "47", number = "12", pages = "91--104", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Functional reactive programming (FRP) is a useful model for programming real-time and reactive systems in which one defines a signal function to process a stream of input values into a stream of output values. However, performing side effects (e.g. memory mutation or input/output) in this model is tricky and typically unsafe. In previous work, Winograd-Cort et al. [2012] introduced resource types and wormholes to address this problem. This paper better motivates, expands upon, and formalizes the notion of a wormhole to fully unlock its potential. We show, for example, that wormholes can be used to define the concept of causality. This in turn allows us to provide behaviors such as looping, a core component of most languages, without building it directly into the language. We also improve upon our previous design by making wormholes less verbose and easier to use. To formalize the notion of a wormhole, we define an extension to the simply typed lambda calculus, complete with typing rules and operational semantics. In addition, we present a new form of semantic transition that we call a temporal transition to specify how an FRP program behaves over time and to allow us to better reason about causality. As our model is designed for a Haskell implementation, the semantics are lazy. Finally, with the language defined, we prove that our wormholes indeed allow side effects to be performed safely in an FRP framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yorgey:2012:MTV, author = "Brent A. Yorgey", title = "{Monoids}: theme and variations (functional pearl)", journal = j-SIGPLAN, volume = "47", number = "12", pages = "105--116", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "The monoid is a humble algebraic structure, at first glance even downright boring. However, there's much more to monoids than meets the eye. Using examples taken from the diagrams vector graphics framework as a case study, I demonstrate the power and beauty of monoids for library design. The paper begins with an extremely simple model of diagrams and proceeds through a series of incremental variations, all related somehow to the central theme of monoids. Along the way, I illustrate the power of compositional semantics; why you should also pay attention to the monoid's even humbler cousin, the semigroup; monoid homomorphisms; and monoid actions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Eisenberg:2012:DTP, author = "Richard A. Eisenberg and Stephanie Weirich", title = "Dependently typed programming with singletons", journal = j-SIGPLAN, volume = "47", number = "12", pages = "117--130", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Haskell programmers have been experimenting with dependent types for at least a decade, using clever encodings that push the limits of the Haskell type system. However, the cleverness of these encodings is also their main drawback. Although the ideas are inspired by dependently typed programs, the code looks significantly different. As a result, GHC implementors have responded with extensions to Haskell's type system, such as GADTs, type families, and datatype promotion. However, there remains a significant difference between programming in Haskell and in full-spectrum dependently typed languages. Haskell enforces a phase separation between runtime values and compile-time types. Therefore, singleton types are necessary to express the dependency between values and types. These singleton types introduce overhead and redundancy for the programmer. This paper presents the singletons library, which generates the boilerplate code necessary for dependently typed programming using GHC. To compare with full-spectrum languages, we present an extended example based on an Agda interface for safe database access. The paper concludes with a detailed discussion on the current capabilities of GHC for dependently typed programming and suggestions for future extensions to better support this style of programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Swierstra:2012:XCE, author = "Wouter Swierstra", title = "{{\tt xmonad}} in {Coq} (experience report): programming a window manager in a proof assistant", journal = j-SIGPLAN, volume = "47", number = "12", pages = "131--136", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "This report documents the insights gained from implementing the core functionality of xmonad, a popular window manager written in Haskell, in the Coq proof assistant. Rather than focus on verification, this report outlines the technical challenges involved with incorporating Coq code in a Haskell project.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Terei:2012:SH, author = "David Terei and Simon Marlow and Simon Peyton Jones and David Mazi{\`e}res", title = "{Safe Haskell}", journal = j-SIGPLAN, volume = "47", number = "12", pages = "137--148", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Though Haskell is predominantly type-safe, implementations contain a few loopholes through which code can bypass typing and module encapsulation. This paper presents Safe Haskell, a language extension that closes these loopholes. Safe Haskell makes it possible to confine and safely execute untrusted, possibly malicious code. By strictly enforcing types, Safe Haskell allows a variety of different policies from API sandboxing to information-flow control to be implemented easily as monads. Safe Haskell is aimed to be as unobtrusive as possible. It enforces properties that programmers tend to meet already by convention. We describe the design of Safe Haskell and an implementation (currently shipping with GHC) that infers safety for code that lies in a safe subset of the language. We use Safe Haskell to implement an online Haskell interpreter that can securely execute arbitrary untrusted code with no overhead. The use of Safe Haskell greatly simplifies this task and allows the use of a large body of existing code and tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Erdweg:2012:LSL, author = "Sebastian Erdweg and Felix Rieger and Tillmann Rendel and Klaus Ostermann", title = "Layout-sensitive language extensibility with {SugarHaskell}", journal = j-SIGPLAN, volume = "47", number = "12", pages = "149--160", month = dec, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2430532.2364526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jan 18 18:22:13 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '12 conference proceedings.", abstract = "Programmers need convenient syntax to write elegant and concise programs. Consequently, the Haskell standard provides syntactic sugar for some scenarios (e.g., do notation for monadic code), authors of Haskell compilers provide syntactic sugar for more scenarios (e.g., arrow notation in GHC), and some Haskell programmers implement preprocessors for their individual needs (e.g., idiom brackets in SHE). But manually written preprocessors cannot scale: They are expensive, error-prone, and not composable. Most researchers and programmers therefore refrain from using the syntactic notations they need in actual Haskell programs, but only use them in documentation or papers. We present a syntactically extensible version of Haskell, SugarHaskell, that empowers ordinary programmers to implement and use custom syntactic sugar. Building on our previous work on syntactic extensibility for Java, SugarHaskell integrates syntactic extensions as sugar libraries into Haskell's module system. Syntax extensions in SugarHaskell can declare arbitrary context-free and layout-sensitive syntax. SugarHaskell modules are compiled into Haskell modules and further processed by a Haskell compiler. We provide an Eclipse-based IDE for SugarHaskell that is extensible, too, and automatically provides syntax coloring for all syntax extensions imported into a module. We have validated SugarHaskell with several case studies, including arrow notation (as implemented in GHC) and EBNF as a concise syntax for the declaration of algebraic data types with associated concrete syntax. EBNF declarations also show how to extend the extension mechanism itself: They introduce syntactic sugar for using the declared concrete syntax in other SugarHaskell modules.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cousot:2013:EMO, author = "Radhia Cousot", title = "Engineering mathematics: the odd order theorem proof", journal = j-SIGPLAN, volume = "48", number = "1", pages = "1--2", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429071", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Even with the assistance of computer tools, the formalized description and verification of research-level mathematics remains a daunting task, not least because of the talent with which mathematicians combine diverse theories to achieve their ends. By combining tools and techniques from type theory, language design, and software engineering we have managed to capture enough of these practices to formalize the proof of the Odd Order theorem, a landmark result in Group Theory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Losch:2013:FAN, author = "Steffen L{\"o}sch and Andrew M. Pitts", title = "Full abstraction for nominal {Scott} domains", journal = j-SIGPLAN, volume = "48", number = "1", pages = "3--14", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429073", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop a domain theory within nominal sets and present programming language constructs and results that can be gained from this approach. The development is based on the concept of orbit-finite subset, that is, a subset of a nominal sets that is both finitely supported and contained in finitely many orbits. This concept appears prominently in the recent research programme of Bojanczyk et al. on automata over infinite languages, and our results establish a connection between their work and a characterisation of topological compactness discovered, in a quite different setting, by Winskel and Turner as part of a nominal domain theory for concurrency. We use this connection to derive a notion of Scott domain within nominal sets. The functionals for existential quantification over names and `definite description' over names turn out to be compact in the sense appropriate for nominal Scott domains. Adding them, together with parallel-or, to a programming language for recursively defined higher-order functions with name abstraction and locally scoped names, we prove a full abstraction result for nominal Scott domains analogous to Plotkin's classic result about PCF and conventional Scott domains: two program phrases have the same observable operational behaviour in all contexts if and only if they denote equal elements of the nominal Scott domain model. This is the first full abstraction result we know of for higher-order functions with local names that uses a domain theory based on ordinary extensional functions, rather than using the more intensional approach of game semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Tate:2013:SSP, author = "Ross Tate", title = "The sequential semantics of producer effect systems", journal = j-SIGPLAN, volume = "48", number = "1", pages = "15--26", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429074", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effects are fundamental to programming languages. Even the lambda calculus has effects, and consequently the two famous evaluation strategies produce different semantics. As such, much research has been done to improve our understanding of effects. Since Moggi introduced monads for his computational lambda calculus, further generalizations have been designed to formalize increasingly complex computational effects, such as indexed monads followed by layered monads followed by parameterized monads. This succession prompted us to determine the most general formalization possible. In searching for this formalization we came across many surprises, such as the insufficiencies of arrows, as well as many unexpected insights, such as the importance of considering an effect as a small component of a whole system rather than just an isolated feature. In this paper we present our semantic formalization for producer effect systems, which we call a productor, and prove its maximal generality by focusing on only sequential composition of effectful computations, consequently guaranteeing that the existing monadic techniques are specializations of productors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Abel:2013:CPI, author = "Andreas Abel and Brigitte Pientka and David Thibodeau and Anton Setzer", title = "{Copatterns}: programming infinite structures by observations", journal = j-SIGPLAN, volume = "48", number = "1", pages = "27--38", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429075", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Inductive datatypes provide mechanisms to define finite data such as finite lists and trees via constructors and allow programmers to analyze and manipulate finite data via pattern matching. In this paper, we develop a dual approach for working with infinite data structures such as streams. Infinite data inhabits coinductive datatypes which denote greatest fixpoints. Unlike finite data which is defined by constructors we define infinite data by observations. Dual to pattern matching, a tool for analyzing finite data, we develop the concept of copattern matching, which allows us to synthesize infinite data. This leads to a symmetric language design where pattern matching on finite and infinite data can be mixed. We present a core language for programming with infinite structures by observations together with its operational semantics based on (co)pattern matching and describe coverage of copatterns. Our language naturally supports both call-by-name and call-by-value interpretations and can be seamlessly integrated into existing languages like Haskell and ML. We prove type soundness for our language and sketch how copatterns open new directions for solving problems in the interaction of coinductive and dependent types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Blelloch:2013:CEF, author = "Guy E. Blelloch and Robert Harber", title = "Cache and {I/O} efficient functional algorithms", journal = j-SIGPLAN, volume = "48", number = "1", pages = "39--50", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429077", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The widely studied I/O and ideal-cache models were developed to account for the large difference in costs to access memory at different levels of the memory hierarchy. Both models are based on a two level memory hierarchy with a fixed size primary memory(cache) of size {$M$}, an unbounded secondary memory organized in blocks of size {$B$}. The cost measure is based purely on the number of block transfers between the primary and secondary memory. All other operations are free. Many algorithms have been analyzed in these models and indeed these models predict the relative performance of algorithms much more accurately than the standard RAM model. The models, however, require specifying algorithms at a very low level requiring the user to carefully lay out their data in arrays in memory and manage their own memory allocation. In this paper we present a cost model for analyzing the memory efficiency of algorithms expressed in a simple functional language. We show how some algorithms written in standard forms using just lists and trees (no arrays) and requiring no explicit memory layout or memory management are efficient in the model. We then describe an implementation of the language and show provable bounds for mapping the cost in our model to the cost in the ideal-cache model. These bound imply that purely functional programs based on lists and trees with no special attention to any details of memory layout can be as asymptotically as efficient as the carefully designed imperative I/O efficient algorithms. For example we describe an {$ O(n_B \log M / B n_B) $} cost sorting algorithm, which is optimal in the ideal cache and I/O models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Ben-Amram:2013:LRP, author = "Amir M. Ben-Amram and Samir Genaim", title = "On the linear ranking problem for integer linear-constraint loops", journal = j-SIGPLAN, volume = "48", number = "1", pages = "51--62", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429078", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we study the complexity of the Linear Ranking problem: given a loop, described by linear constraints over a finite set of integer variables, is there a linear ranking function for this loop? While existence of such a function implies termination, this problem is not equivalent to termination. When the variables range over the rationals or reals, the Linear Ranking problem is known to be PTIME decidable. However, when they range over the integers, whether for single-path or multipath loops, the complexity of the Linear Ranking problem has not yet been determined. We show that it is coNP-complete. However, we point out some special cases of importance of PTIME complexity. We also present complete algorithms for synthesizing linear ranking functions, both for the general case and the special PTIME cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Mayr:2013:AAM, author = "Richard Mayr and Lorenzo Clemente", title = "Advanced automata minimization", journal = j-SIGPLAN, volume = "48", number = "1", pages = "63--74", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429079", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an efficient algorithm to reduce the size of nondeterministic Buchi word automata, while retaining their language. Additionally, we describe methods to solve PSPACE-complete automata problems like universality, equivalence and inclusion for much larger instances (1-3 orders of magnitude) than before. This can be used to scale up applications of automata in formal verification tools and decision procedures for logical theories. The algorithm is based on new transition pruning techniques. These use criteria based on combinations of backward and forward trace inclusions. Since these relations are themselves PSPACE-complete, we describe methods to compute good approximations of them in polynomial time. Extensive experiments show that the average-case complexity of our algorithm scales quadratically. The size reduction of the automata depends very much on the class of instances, but our algorithm consistently outperforms all previous techniques by a wide margin. We tested our algorithm on Buchi automata derived from LTL-formulae, many classes of random automata and automata derived from mutual exclusion protocols, and compared its performance to the well-known automata tool GOAL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Unno:2013:ARC, author = "Hiroshi Unno and Tachio Terauchi and Naoki Kobayashi", title = "Automating relatively complete verification of higher-order functional programs", journal = j-SIGPLAN, volume = "48", number = "1", pages = "75--86", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429081", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an automated approach to relatively completely verifying safety (i.e., reachability) property of higher-order functional programs. Our contribution is two-fold. First, we extend the refinement type system framework employed in the recent work on (incomplete) automated higher-order verification by drawing on the classical work on relatively complete ``Hoare logic like'' program logic for higher-order procedural languages. Then, by adopting the recently proposed techniques for solving constraints over quantified first-order logic formulas, we develop an automated type inference method for the type system, thereby realizing an automated relatively complete verification of higher-order programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Atkey:2013:AIA, author = "Robert Atkey and Patricia Johann and Andrew Kennedy", title = "Abstraction and invariance for algebraically indexed types", journal = j-SIGPLAN, volume = "48", number = "1", pages = "87--100", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429082", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reynolds' relational parametricity provides a powerful way to reason about programs in terms of invariance under changes of data representation. A dazzling array of applications of Reynolds' theory exists, exploiting invariance to yield ``free theorems'', non-inhabitation results, and encodings of algebraic datatypes. Outside computer science, invariance is a common theme running through many areas of mathematics and physics. For example, the area of a triangle is unaltered by rotation or flipping. If we scale a triangle, then we scale its area, maintaining an invariant relationship between the two. The transformations under which properties are invariant are often organised into groups, with the algebraic structure reflecting the composability and invertibility of transformations. In this paper, we investigate programming languages whose types are indexed by algebraic structures such as groups of geometric transformations. Other examples include types indexed by principals--for information flow security--and types indexed by distances--for analysis of analytic uniform continuity properties. Following Reynolds, we prove a general Abstraction Theorem that covers all these instances. Consequences of our Abstraction Theorem include free theorems expressing invariance properties of programs, type isomorphisms based on invariance properties, and non-definability results indicating when certain algebraically indexed types are uninhabited or only inhabited by trivial programs. We have fully formalised our framework and most examples in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Benzaken:2013:SDS, author = "V{\'e}ronique Benzaken and Giuseppe Castagna and Kim Nguyen and J{\'e}r{\^o}me Sim{\'e}on", title = "Static and dynamic semantics of {NoSQL} languages", journal = j-SIGPLAN, volume = "48", number = "1", pages = "101--114", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429083", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a calculus for processing semistructured data that spans differences of application area among several novel query languages, broadly categorized as ``NoSQL''. This calculus lets users define their own operators, capturing a wider range of data processing capabilities, whilst providing a typing precision so far typical only of primitive hard-coded operators. The type inference algorithm is based on semantic type checking, resulting in type information that is both precise, and flexible enough to handle structured and semistructured data. We illustrate the use of this calculus by encoding a large fragment of Jaql, including operations and iterators over JSON, embedded SQL expressions, and co-grouping, and show how the encoding directly yields a typing discipline for Jaql as it is, namely without the addition of any type definition or type annotation in the code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Cerny:2013:QAR, author = "Pavol Cerny and Thomas A. Henzinger and Arjun Radhakrishna", title = "Quantitative abstraction refinement", journal = j-SIGPLAN, volume = "48", number = "1", pages = "115--128", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429085", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a general framework for abstraction with respect to quantitative properties, such as worst-case execution time, or power consumption. Our framework provides a systematic way for counter-example guided abstraction refinement for quantitative properties. The salient aspect of the framework is that it allows anytime verification, that is, verification algorithms that can be stopped at any time (for example, due to exhaustion of memory), and report approximations that improve monotonically when the algorithms are given more time. We instantiate the framework with a number of quantitative abstractions and refinement schemes, which differ in terms of how much quantitative information they keep from the original system. We introduce both state-based and trace-based quantitative abstractions, and we describe conditions that define classes of quantitative properties for which the abstractions provide over-approximations. We give algorithms for evaluating the quantitative properties on the abstract systems. We present algorithms for counter-example based refinements for quantitative properties for both state-based and segment-based abstractions. We perform a case study on worst-case execution time of executables to evaluate the anytime verification aspect and the quantitative abstractions we proposed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Farzan:2013:IDF, author = "Azadeh Farzan and Zachary Kincaid and Andreas Podelski", title = "Inductive data flow graphs", journal = j-SIGPLAN, volume = "48", number = "1", pages = "129--142", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429086", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The correctness of a sequential program can be shown by the annotation of its control flow graph with inductive assertions. We propose inductive data flow graphs, data flow graphs with incorporated inductive assertions, as the basis of an approach to verifying concurrent programs. An inductive data flow graph accounts for a set of dependencies between program actions in interleaved thread executions, and therefore stands as a representation for the set of concurrent program traces which give rise to these dependencies. The approach first constructs an inductive data flow graph and then checks whether all program traces are represented. The size of the inductive data flow graph is polynomial in the number of data dependencies (in a sense that can be made formal); it does not grow exponentially in the number of threads unless the data dependencies do. The approach shifts the burden of the exponential explosion towards the check whether all program traces are represented, i.e., to a combinatorial problem (over finite graphs).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{DSilva:2013:ACD, author = "Vijay D'Silva and Leopold Haller and Daniel Kroening", title = "Abstract conflict driven learning", journal = j-SIGPLAN, volume = "48", number = "1", pages = "143--154", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429087", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern satisfiability solvers implement an algorithm, called Conflict Driven Clause Learning, which combines search for a model with analysis of conflicts. We show that this algorithm can be generalised to solve the lattice-theoretic problem of determining if an additive transformer on a Boolean lattice is always bottom. Our generalised procedure combines overapproximations of greatest fixed points with underapproximation of least fixed points to obtain more precise results than computing fixed points in isolation. We generalise implication graphs used in satisfiability solvers to derive underapproximate transformers from overapproximate ones. Our generalisation provides a new method for static analysers that operate over non-distributive lattices to reason about properties that require disjunction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Goyet:2013:LLB, author = "Alexis Goyet", title = "The {Lambda Lambda-Bar} calculus: a dual calculus for unconstrained strategies", journal = j-SIGPLAN, volume = "48", number = "1", pages = "155--166", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429089", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a calculus which combines a simple, CCS-like representation of finite behaviors, with two dual binders $ \lambda $ and $ {\= \lambda } $. Infinite behaviors are obtained through a syntactical fixed-point operator, which is used to give a translation of $ \lambda $-terms. The duality of the calculus makes the roles of a function and its environment symmetrical. As usual, the environment is allowed to call a function at any given point, each time with a different argument. Dually, the function is allowed to answer any given call, each time with a different behavior. This grants terms in our language the power of functional references. The inspiration for this language comes from game semantics. Indeed, its normal forms give a simple concrete syntax for finite strategies, which are inherently non-innocent. This very direct correspondence allows us to describe, in syntactical terms, a number of features from game semantics. The fixed-point expansion of translated $ \lambda $-terms corresponds to the generation of infinite plays from the finite views of an innocent strategy. The syntactical duality between terms and co-terms corresponds to the duality between Player and Opponent. This duality also gives rise to a B{\"o}hm-out lemma. The paper is divided into two parts. The first one is purely syntactical, and requires no background in game semantics. The second describes the fully abstract game model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{lago:2013:GT, author = "Ugo {Dal lago} and Barbara Petit", title = "The geometry of types", journal = j-SIGPLAN, volume = "48", number = "1", pages = "167--178", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429090", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show that time complexity analysis of higher-order functional programs can be effectively reduced to an arguably simpler (although computationally equivalent) verification problem, namely checking first-order inequalities for validity. This is done by giving an efficient inference algorithm for linear dependent types which, given a PCF term, produces in output both a linear dependent type and a cost expression for the term, together with a set of proof obligations. Actually, the output type judgement is derivable iff all proof obligations are valid. This, coupled with the already known relative completeness of linear dependent types, ensures that no information is lost, i.e., that there are no false positives or negatives. Moreover, the procedure reflects the difficulty of the original problem: simple PCF terms give rise to sets of proof obligations which are easy to solve. The latter can then be put in a format suitable for automatic or semi-automatic verification by external solvers. Ongoing experimental evaluation has produced encouraging results, which are briefly presented in the paper.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Staton:2013:UPI, author = "Sam Staton and Paul Blain Levy", title = "Universal properties of impure programming languages", journal = j-SIGPLAN, volume = "48", number = "1", pages = "179--192", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429091", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We investigate impure, call-by-value programming languages. Our first language only has variables and let-binding. Its equational theory is a variant of Lambek's theory of multicategories that omits the commutativity axiom. We demonstrate that type constructions for impure languages --- products, sums and functions --- can be characterized by universal properties in the setting of `premulticategories', multicategories where the commutativity law may fail. This leads us to new, universal characterizations of two earlier equational theories of impure programming languages: the premonoidal categories of Power and Robinson, and the monad-based models of Moggi. Our analysis thus puts these earlier abstract ideas on a canonical foundation, bringing them to a new, syntactic level.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Hur:2013:PPC, author = "Chung-Kil Hur and Georg Neis and Derek Dreyer and Viktor Vafeiadis", title = "The power of parameterization in coinductive proof", journal = j-SIGPLAN, volume = "48", number = "1", pages = "193--206", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429093", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Coinduction is one of the most basic concepts in computer science. It is therefore surprising that the commonly-known lattice-theoretic accounts of the principles underlying coinductive proofs are lacking in two key respects: they do not support compositional reasoning (i.e. breaking proofs into separate pieces that can be developed in isolation), and they do not support incremental reasoning (i.e. developing proofs interactively by starting from the goal and generalizing the coinduction hypothesis repeatedly as necessary). In this paper, we show how to support coinductive proofs that are both compositional and incremental, using a dead simple construction we call the parameterized greatest fixed point. The basic idea is to parameterize the greatest fixed point of interest over the accumulated knowledge of ``the proof so far''. While this idea has been proposed before, by Winskel in 1989 and by Moss in 2001, neither of the previous accounts suggests its general applicability to improving the state of the art in interactive coinductive proof. In addition to presenting the lattice-theoretic foundations of parameterized coinduction, demonstrating its utility on representative examples, and studying its composition with ``up-to'' techniques, we also explore its mechanization in proof assistants like Coq and Isabelle. Unlike traditional approaches to mechanizing coinduction (e.g. Coq's cofix), which employ syntactic ``guardedness checking'', parameterized coinduction offers a semantic account of guardedness. This leads to faster and more robust proof development, as we demonstrate using our new Coq library, Paco.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Delaware:2013:MTC, author = "Benjamin Delaware and Bruno C. d. S. Oliveira and Tom Schrijvers", title = "Meta-theory {\`a} la carte", journal = j-SIGPLAN, volume = "48", number = "1", pages = "207--218", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429094", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Formalizing meta-theory, or proofs about programming languages, in a proof assistant has many well-known benefits. Unfortunately, the considerable effort involved in mechanizing proofs has prevented it from becoming standard practice. This cost can be amortized by reusing as much of existing mechanized formalizations as possible when building a new language or extending an existing one. One important challenge in achieving reuse is that the inductive definitions and proofs used in these formalizations are closed to extension. This forces language designers to cut and paste existing definitions and proofs in an ad-hoc manner and to expend considerable effort to patch up the results. The key contribution of this paper is the development of an induction technique for extensible Church encodings using a novel reinterpretation of the universal property of folds. These encodings provide the foundation for a framework, formalized in Coq, which uses type classes to automate the composition of proofs from modular components. This framework enables a more structured approach to the reuse of meta-theory formalizations through the composition of modular inductive definitions and proofs. Several interesting language features, including binders and general recursion, illustrate the capabilities of our framework. We reuse these features to build fully mechanized definitions and proofs for a number of languages, including a version of mini-ML. Bounded induction enables proofs of properties for non-inductive semantic functions, and mediating type classes enable proof adaptation for more feature-rich languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Park:2013:TPB, author = "Jonghyun Park and Jeongbong Seo and Sungwoo Park", title = "A theorem prover for {Boolean} {BI}", journal = j-SIGPLAN, volume = "48", number = "1", pages = "219--232", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429095", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While separation logic is acknowledged as an enabling technology for large-scale program verification, most of the existing verification tools use only a fragment of separation logic that excludes separating implication. As the first step towards a verification tool using full separation logic, we develop a nested sequent calculus for Boolean BI (Bunched Implications), the underlying theory of separation logic, as well as a theorem prover based on it. A salient feature of our nested sequent calculus is that its sequent may have not only smaller child sequents but also multiple parent sequents, thus producing a graph structure of sequents instead of a tree structure. Our theorem prover is based on backward search in a refinement of the nested sequent calculus in which weakening and contraction are built into all the inference rules. We explain the details of designing our theorem prover and provide empirical evidence of its practicality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Krishnamurthi:2013:PPL, author = "Shriram Krishnamurthi", title = "From principles to programming languages (and back)", journal = j-SIGPLAN, volume = "48", number = "1", pages = "233--234", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429097", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Batty:2013:LAC, author = "Mark Batty and Mike Dodds and Alexey Gotsman", title = "Library abstraction for {C\slash C++} concurrency", journal = j-SIGPLAN, volume = "48", number = "1", pages = "235--248", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429099", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When constructing complex concurrent systems, abstraction is vital: programmers should be able to reason about concurrent libraries in terms of abstract specifications that hide the implementation details. Relaxed memory models present substantial challenges in this respect, as libraries need not provide sequentially consistent abstractions: to avoid unnecessary synchronisation, they may allow clients to observe relaxed memory effects, and library specifications must capture these. In this paper, we propose a criterion for sound library abstraction in the new C11 and C++11 memory model, generalising the standard sequentially consistent notion of linearizability. We prove that our criterion soundly captures all client-library interactions, both through call and return values, and through the subtle synchronisation effects arising from the memory model. To illustrate our approach, we verify implementations against specifications for the lock-free Treiber stack and a producer-consumer queue. Ours is the first approach to compositional reasoning for concurrent C11/C++11 programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Ramalingam:2013:FTI, author = "Ganesan Ramalingam and Kapil Vaswani", title = "Fault tolerance via idempotence", journal = j-SIGPLAN, volume = "48", number = "1", pages = "249--262", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429100", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Building distributed services and applications is challenging due to the pitfalls of distribution such as process and communication failures. A natural solution to these problems is to detect potential failures, and retry the failed computation and/or resend messages. Ensuring correctness in such an environment requires distributed services and applications to be idempotent. In this paper, we study the inter-related aspects of process failures, duplicate messages, and idempotence. We first introduce a simple core language (based on lambda calculus) inspired by modern distributed computing platforms. This language formalizes the notions of a service, duplicate requests, process failures, data partitioning, and local atomic transactions that are restricted to a single store. We then formalize a desired (generic) correctness criterion for applications written in this language, consisting of idempotence (which captures the desired safety properties) and failure-freedom (which captures the desired progress properties). We then propose language support in the form of a monad that automatically ensures failfree idempotence. A key characteristic of our implementation is that it is decentralized and does not require distributed coordination. We show that the language support can be enriched with other useful constructs, such as compensations, while retaining the coordination-free decentralized nature of the implementation. We have implemented the idempotence monad (and its variants) in F\# and C\# and used our implementation to build realistic applications on Windows Azure. We find that the monad has low runtime overheads and leads to more declarative applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Carbone:2013:DFD, author = "Marco Carbone and Fabrizio Montesi", title = "Deadlock-freedom-by-design: multiparty asynchronous global programming", journal = j-SIGPLAN, volume = "48", number = "1", pages = "263--274", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429101", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the last decade, global descriptions have been successfully employed for the verification and implementation of communicating systems, respectively as protocol specifications and choreographies. In this work, we bring these two practices together by proposing a purely-global programming model. We show a novel interpretation of asynchrony and parallelism in a global setting and develop a typing discipline that verifies choreographies against protocol specifications, based on multiparty sessions. Exploiting the nature of global descriptions, our type system defines a new class of deadlock-free concurrent systems (deadlock-freedom-by-design), provides type inference, and supports session mobility. We give a notion of Endpoint Projection (EPP) which generates correct entity code (as pi-calculus terms) from a choreography. Finally, we evaluate our approach by providing a prototype implementation for a concrete programming language and by applying it to some examples from multicore and service-oriented programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Caires:2013:TDB, author = "Lu{\'\i}s Caires and Jo{\~a}o C. Seco", title = "The type discipline of behavioral separation", journal = j-SIGPLAN, volume = "48", number = "1", pages = "275--286", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429103", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce the concept of behavioral separation as a general principle for disciplining interference in higher-order imperative concurrent programs, and present a type-based approach that systematically develops the concept in the context of an ML-like language extended with concurrency and synchronization primitives. Behavioral separation builds on notions originally introduced for behavioral type systems and separation logics, but shifts the focus from the separation of static program state properties towards the separation of dynamic usage behaviors of runtime values. Behavioral separation types specify how values may be safely used by client code, and can enforce fine-grained interference control disciplines while preserving compositionality, information hiding, and flexibility. We illustrate how our type system, even if based on a small set of general primitives, is already able to tackle fairly challenging program idioms, involving aliasing at various types, concurrency with first-class threads, manipulation of linked data structures, behavioral borrowing, and invariant-based separation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Dinsdale-Young:2013:VCR, author = "Thomas Dinsdale-Young and Lars Birkedal and Philippa Gardner and Matthew Parkinson and Hongseok Yang", title = "{Views}: compositional reasoning for concurrent programs", journal = j-SIGPLAN, volume = "48", number = "1", pages = "287--300", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429104", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compositional abstractions underly many reasoning principles for concurrent programs: the concurrent environment is abstracted in order to reason about a thread in isolation; and these abstractions are composed to reason about a program consisting of many threads. For instance, separation logic uses formulae that describe part of the state, abstracting the rest; when two threads use disjoint state, their specifications can be composed with the separating conjunction. Type systems abstract the state to the types of variables; threads may be composed when they agree on the types of shared variables. In this paper, we present the ``Concurrent Views Framework'', a metatheory of concurrent reasoning principles. The theory is parameterised by an abstraction of state with a notion of composition, which we call views. The metatheory is remarkably simple, but highly applicable: the rely-guarantee method, concurrent separation logic, concurrent abstract predicates, type systems for recursive references and for unique pointers, and even an adaptation of the Owicki-Gries method can all be seen as instances of the Concurrent Views Framework. Moreover, our metatheory proves each of these systems is sound without requiring induction on the operational semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Jensen:2013:HLS, author = "Jonas B. Jensen and Nick Benton and Andrew Kennedy", title = "High-level separation logic for low-level code", journal = j-SIGPLAN, volume = "48", number = "1", pages = "301--314", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429105", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Separation logic is a powerful tool for reasoning about structured, imperative programs that manipulate pointers. However, its application to unstructured, lower-level languages such as assembly language or machine code remains challenging. In this paper we describe a separation logic tailored for this purpose that we have applied to x86 machine-code programs. The logic is built from an assertion logic on machine states over which we construct a specification logic that encapsulates uses of frames and step indexing. The traditional notion of Hoare triple is not applicable directly to unstructured machine code, where code and data are mixed together and programs do not in general run to completion, so instead we adopt a continuation-passing style of specification with preconditions alone. Nevertheless, the range of primitives provided by the specification logic, which include a higher-order frame connective, a novel read-only frame connective, and a 'later' modality, support the definition of derived forms to support structured-programming-style reasoning for common cases, in which standard rules for Hoare triples are derived as lemmas. Furthermore, our encoding of scoped assembly-language labels lets us give definitions and proof rules for powerful assembly-language 'macros' such as while loops, conditionals and procedures. We have applied the framework to a model of sequential x86 machine code built entirely within the Coq proof assistant, including tactic support based on computational reflection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Myers:2013:HLC, author = "Andrew C. Myers", title = "How languages can save distributed computing", journal = j-SIGPLAN, volume = "48", number = "1", pages = "315--316", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429107", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Henzinger:2013:QRC, author = "Thomas A. Henzinger and Christoph M. Kirsch and Hannes Payer and Ali Sezgin and Ana Sokolova", title = "Quantitative relaxation of concurrent data structures", journal = j-SIGPLAN, volume = "48", number = "1", pages = "317--328", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429109", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There is a trade-off between performance and correctness in implementing concurrent data structures. Better performance may be achieved at the expense of relaxing correctness, by redefining the semantics of data structures. We address such a redefinition of data structure semantics and present a systematic and formal framework for obtaining new data structures by quantitatively relaxing existing ones. We view a data structure as a sequential specification S containing all ``legal'' sequences over an alphabet of method calls. Relaxing the data structure corresponds to defining a distance from any sequence over the alphabet to the sequential specification: the k-relaxed sequential specification contains all sequences over the alphabet within distance k from the original specification. In contrast to other existing work, our relaxations are semantic (distance in terms of data structure states). As an instantiation of our framework, we present two simple yet generic relaxation schemes, called out-of-order and stuttering relaxation, along with several ways of computing distances. We show that the out-of-order relaxation, when further instantiated to stacks, queues, and priority queues, amounts to tolerating bounded out-of-order behavior, which cannot be captured by a purely syntactic relaxation (distance in terms of sequence manipulation, e.g. edit distance). We give concurrent implementations of relaxed data structures and demonstrate that bounded relaxations provide the means for trading correctness for performance in a controlled way. The relaxations are monotonic which further highlights the trade-off: increasing k increases the number of permitted sequences, which as we demonstrate can lead to better performance. Finally, since a relaxed stack or queue also implements a pool, we actually have new concurrent pool implementations that outperform the state-of-the-art ones.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Demange:2013:PBB, author = "Delphine Demange and Vincent Laporte and Lei Zhao and Suresh Jagannathan and David Pichardie and Jan Vitek", title = "{Plan B}: a buffered memory model for {Java}", journal = j-SIGPLAN, volume = "48", number = "1", pages = "329--342", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429110", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent advances in verification have made it possible to envision trusted implementations of real-world languages. Java with its type-safety and fully specified semantics would appear to be an ideal candidate; yet, the complexity of the translation steps used in production virtual machines have made it a challenging target for verifying compiler technology. One of Java's key innovations, its memory model, poses significant obstacles to such an endeavor. The Java Memory Model is an ambitious attempt at specifying the behavior of multithreaded programs in a portable, hardware agnostic, way. While experts have an intuitive grasp of the properties that the model should enjoy, the specification is complex and not well-suited for integration within a verifying compiler infrastructure. Moreover, the specification is given in an axiomatic style that is distant from the intuitive reordering-based reasonings traditionally used to justify or rule out behaviors, and ill suited to the kind of operational reasoning one would expect to employ in a compiler. This paper takes a step back, and introduces a Buffered Memory Model (BMM) for Java. We choose a pragmatic point in the design space sacrificing generality in favor of a model that is fully characterized in terms of the reorderings it allows, amenable to formal reasoning, and which can be efficiently applied to a specific hardware family, namely x86 multiprocessors. Although the BMM restricts the reorderings compilers are allowed to perform, it serves as the key enabling device to achieving a verification pathway from bytecode to machine instructions. Despite its restrictions, we show that it is backwards compatible with the Java Memory Model and that it does not cripple performance on TSO architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Turon:2013:LRF, author = "Aaron J. Turon and Jacob Thamsborg and Amal Ahmed and Lars Birkedal and Derek Dreyer", title = "Logical relations for fine-grained concurrency", journal = j-SIGPLAN, volume = "48", number = "1", pages = "343--356", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429111", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Fine-grained concurrent data structures (or FCDs) reduce the granularity of critical sections in both time and space, thus making it possible for clients to access different parts of a mutable data structure in parallel. However, the tradeoff is that the implementations of FCDs are very subtle and tricky to reason about directly. Consequently, they are carefully designed to be contextual refinements of their coarse-grained counterparts, meaning that their clients can reason about them as if all access to them were sequentialized. In this paper, we propose a new semantic model, based on Kripke logical relations, that supports direct proofs of contextual refinement in the setting of a type-safe high-level language. The key idea behind our model is to provide a simple way of expressing the ``local life stories'' of individual pieces of an FCD's hidden state by means of protocols that the threads concurrently accessing that state must follow. By endowing these protocols with a simple yet powerful transition structure, as well as the ability to assert invariants on both heap states and specification code, we are able to support clean and intuitive refinement proofs for the most sophisticated types of FCDs, such as conditional compare-and-set (CCAS).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Gaboardi:2013:LDT, author = "Marco Gaboardi and Andreas Haeberlen and Justin Hsu and Arjun Narayan and Benjamin C. Pierce", title = "Linear dependent types for differential privacy", journal = j-SIGPLAN, volume = "48", number = "1", pages = "357--370", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429113", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Differential privacy offers a way to answer queries about sensitive information while providing strong, provable privacy guarantees, ensuring that the presence or absence of a single individual in the database has a negligible statistical effect on the query's result. Proving that a given query has this property involves establishing a bound on the query's sensitivity---how much its result can change when a single record is added or removed. A variety of tools have been developed for certifying that a given query differentially private. In one approach, Reed and Pierce [34] proposed a functional programming language, Fuzz, for writing differentially private queries. Fuzz uses linear types to track sensitivity and a probability monad to express randomized computation; it guarantees that any program with a certain type is differentially private. Fuzz can successfully verify many useful queries. However, it fails when the sensitivity analysis depends on values that are not known statically. We present DFuzz, an extension of Fuzz with a combination of linear indexed types and lightweight dependent types. This combination allows a richer sensitivity analysis that is able to certify a larger class of queries as differentially private, including ones whose sensitivity depends on runtime information. As in Fuzz, the differential privacy guarantee follows directly from the soundness theorem of the type system. We demonstrate the enhanced expressivity of DFuzz by certifying differential privacy for a broad class of iterative algorithms that could not be typed previously.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Fournet:2013:FAC, author = "Cedric Fournet and Nikhil Swamy and Juan Chen and Pierre-Evariste Dagand and Pierre-Yves Strub and Benjamin Livshits", title = "Fully abstract compilation to {JavaScript}", journal = j-SIGPLAN, volume = "48", number = "1", pages = "371--384", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429114", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many tools allow programmers to develop applications in high-level languages and deploy them in web browsers via compilation to JavaScript. While practical and widely used, these compilers are ad hoc: no guarantee is provided on their correctness for whole programs, nor their security for programs executed within arbitrary JavaScript contexts. This paper presents a compiler with such guarantees. We compile an ML-like language with higher-order functions and references to JavaScript, while preserving all source program properties. Relying on type-based invariants and applicative bisimilarity, we show full abstraction: two programs are equivalent in all source contexts if and only if their wrapped translations are equivalent in all JavaScript contexts. We evaluate our compiler on sample programs, including a series of secure libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Livshits:2013:TFA, author = "Benjamin Livshits and Stephen Chong", title = "Towards fully automatic placement of security sanitizers and declassifiers", journal = j-SIGPLAN, volume = "48", number = "1", pages = "385--398", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429115", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A great deal of research on sanitizer placement, sanitizer correctness, checking path validity, and policy inference, has been done in the last five to ten years, involving type systems, static analysis and runtime monitoring and enforcement. However, in pretty much all work thus far, the burden of sanitizer placement has fallen on the developer. However, sanitizer placement in large-scale applications is difficult, and developers are likely to make errors, and thus create security vulnerabilities. This paper advocates a radically different approach: we aim to fully automate the placement of sanitizers by analyzing the ow of tainted data in the program. We argue that developers are better off leaving out sanitizers entirely instead of trying to place them. This paper proposes a fully automatic technique for sanitizer placement. Placement is static whenever possible, switching to run time when necessary. Run-time taint tracking techniques can be used to track the source of a value, and thus apply appropriate sanitization. However, due to the runtime overhead of run-time taint tracking, our technique avoids it wherever possible.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Goodman:2013:PPP, author = "Noah D. Goodman", title = "The principles and practice of probabilistic programming", journal = j-SIGPLAN, volume = "48", number = "1", pages = "399--402", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429117", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Gordon:2013:MLP, author = "Andrew D. Gordon and Mihhail Aizatulin and Johannes Borgstrom and Guillaume Claret and Thore Graepel and Aditya V. Nori and Sriram K. Rajamani and Claudio Russo", title = "A model-learner pattern for {Bayesian} reasoning", journal = j-SIGPLAN, volume = "48", number = "1", pages = "403--416", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429119", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A Bayesian model is based on a pair of probability distributions, known as the prior and sampling distributions. A wide range of fundamental machine learning tasks, including regression, classification, clustering, and many others, can all be seen as Bayesian models. We propose a new probabilistic programming abstraction, a typed Bayesian model, which is based on a pair of probabilistic expressions for the prior and sampling distributions. A sampler for a model is an algorithm to compute synthetic data from its sampling distribution, while a learner for a model is an algorithm for probabilistic inference on the model. Models, samplers, and learners form a generic programming pattern for model-based inference. They support the uniform expression of common tasks including model testing, and generic compositions such as mixture models, evidence-based model averaging, and mixtures of experts. A formal semantics supports reasoning about model equivalence and implementation correctness. By developing a series of examples and three learner implementations based on exact inference, factor graphs, and Markov chain Monte Carlo, we demonstrate the broad applicability of this new programming pattern.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Suenaga:2013:HPS, author = "Kohei Suenaga and Hiroyoshi Sekine and Ichiro Hasuo", title = "Hyperstream processing systems: nonstandard modeling of continuous-time signals", journal = j-SIGPLAN, volume = "48", number = "1", pages = "417--430", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429120", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We exploit the apparent similarity between (discrete-time) stream processing and (continuous-time) signal processing and transfer a deductive verification framework from the former to the latter. Our development is based on rigorous semantics that relies on nonstandard analysis (NSA). Specifically, we start with a discrete framework consisting of a Lustre-like stream processing language, its Kahn-style fixed point semantics, and a program logic (in the form of a type system) for partial correctness guarantees. This stream framework is transferred as it is to one for hyperstreams---streams of streams, that typically arise from sampling (continuous-time) signals with progressively smaller intervals---via the logical infrastructure of NSA. Under a certain continuity assumption we identify hyperstreams with signals; our final outcome thus obtained is a deductive verification framework of signals. In it one verifies properties of signals using the (conventionally discrete) proof principles, like fixed point induction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Vytiniotis:2013:HHL, author = "Dimitrios Vytiniotis and Simon Peyton Jones and Koen Claessen and Dan Ros{\'e}n", title = "{HALO}: {Haskell} to logic through denotational semantics", journal = j-SIGPLAN, volume = "48", number = "1", pages = "431--442", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429121", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Even well-typed programs can go wrong in modern functional languages, by encountering a pattern-match failure, or simply returning the wrong answer. An increasingly-popular response is to allow programmers to write contracts that express semantic properties, such as crash-freedom or some useful post-condition. We study the static verification of such contracts. Our main contribution is a novel translation to first-order logic of both Haskell programs, and contracts written in Haskell, all justified by denotational semantics. This translation enables us to prove that functions satisfy their contracts using an off-the-shelf first-order logic theorem prover.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Botincan:2013:SSL, author = "Matko Botincan and Domagoj Babi{\'c}", title = "{Sigma*}: symbolic learning of input-output specifications", journal = j-SIGPLAN, volume = "48", number = "1", pages = "443--456", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429123", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Sigma*, a novel technique for learning symbolic models of software behavior. Sigma* addresses the challenge of synthesizing models of software by using symbolic conjectures and abstraction. By combining dynamic symbolic execution to discover symbolic input-output steps of the programs and counterexample guided abstraction refinement to over-approximate program behavior, Sigma* transforms arbitrary source representation of programs into faithful input-output models. We define a class of stream filters---programs that process streams of data items---for which Sigma* converges to a complete model if abstraction refinement eventually builds up a sufficiently strong abstraction. In other words, Sigma* is complete relative to abstraction. To represent inferred symbolic models, we use a variant of symbolic transducers that can be effectively composed and equivalence checked. Thus, Sigma* enables fully automatic analysis of behavioral properties such as commutativity, reversibility and idempotence, which is useful for web sanitizer verification and stream programs compiler optimizations, as we show experimentally. We also show how models inferred by Sigma* can boost performance of stream programs by parallelized code generation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Bonchi:2013:CNE, author = "Filippo Bonchi and Damien Pous", title = "Checking {NFA} equivalence with bisimulations up to congruence", journal = j-SIGPLAN, volume = "48", number = "1", pages = "457--468", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429124", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce bisimulation up to congruence as a technique for proving language equivalence of non-deterministic finite automata. Exploiting this technique, we devise an optimisation of the classical algorithm by Hopcroft and Karp. We compare our approach to the recently introduced antichain algorithms, by analysing and relating the two underlying coinductive proof methods. We give concrete examples where we exponentially improve over antichains; experimental results moreover show non negligible improvements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Koksal:2013:SBM, author = "Ali Sinan Koksal and Yewen Pu and Saurabh Srivastava and Rastislav Bodik and Jasmin Fisher and Nir Piterman", title = "Synthesis of biological models from mutation experiments", journal = j-SIGPLAN, volume = "48", number = "1", pages = "469--482", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429125", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Executable biology presents new challenges to formal methods. This paper addresses two problems that cell biologists face when developing formally analyzable models. First, we show how to automatically synthesize a concurrent in-silico model for cell development given in-vivo experiments of how particular mutations influence the experiment outcome. The problem of synthesis under mutations is unique because mutations may produce non-deterministic outcomes (presumably by introducing races between competing signaling pathways in the cells) and the synthesized model must be able to replay all these outcomes in order to faithfully describe the modeled cellular processes. In contrast, a ``regular'' concurrent program is correct if it picks any outcome allowed by the non-deterministic specification. We developed synthesis algorithms and synthesized a model of cell fate determination of the earthworm {\em C. elegans}. A version of this model previously took systems biologists months to develop. Second, we address the problem of under-constrained specifications that arise due to incomplete sets of mutation experiments. Under-constrained specifications give rise to distinct models, each explaining the same phenomenon differently. Addressing the ambiguity of specifications corresponds to analyzing the space of plausible models. We develop algorithms for detecting ambiguity in specifications, i.e., whether there exist alternative models that would produce different fates on some unperformed experiment, and for removing redundancy from specifications, i.e., computing minimal non-ambiguous specifications. Additionally, we develop a modeling language and embed it into Scala. We describe how this language design and embedding allows us to build an efficient synthesizer. For our {\em C. elegans\/} case study, we infer two observationally equivalent models expressing different biological hypotheses through different protein interactions. One of these hypotheses was previously unknown to biologists.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Upadrasta:2013:SPS, author = "Ramakrishna Upadrasta and Albert Cohen", title = "Sub-polyhedral scheduling using (unit-)two-variable-per-inequality polyhedra", journal = j-SIGPLAN, volume = "48", number = "1", pages = "483--496", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429127", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Polyhedral compilation has been successful in the design and implementation of complex loop nest optimizers and parallelizing compilers. The algorithmic complexity and scalability limitations remain one important weakness. We address it using sub-polyhedral under-approximations of the systems of constraints resulting from affine scheduling problems. We propose a sub-polyhedral scheduling technique using (Unit-)Two-Variable-Per-Inequality or (U)TVPI Polyhedra. This technique relies on simple polynomial time algorithms to under-approximate a general polyhedron into (U)TVPI polyhedra. We modify the state-of-the-art PLuTo compiler using our scheduling technique, and show that for a majority of the Polybench (2.0) kernels, the above under-approximations yield polyhedra that are non-empty. Solving the under-approximated system leads to asymptotic gains in complexity, and shows practically significant improvements when compared to a traditional LP solver. We also verify that code generated by our sub-polyhedral parallelization prototype matches the performance of PLuTo-optimized code when the under-approximation preserves feasibility.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Rompf:2013:ODS, author = "Tiark Rompf and Arvind K. Sujeeth and Nada Amin and Kevin J. Brown and Vojin Jovanovic and HyoukJoong Lee and Manohar Jonnalagedda and Kunle Olukotun and Martin Odersky", title = "Optimizing data structures in high-level programs: new directions for extensible compilers based on staging", journal = j-SIGPLAN, volume = "48", number = "1", pages = "497--510", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429128", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High level data structures are a cornerstone of modern programming and at the same time stand in the way of compiler optimizations. In order to reason about user- or library-defined data structures compilers need to be extensible. Common mechanisms to extend compilers fall into two categories. Frontend macros, staging or partial evaluation systems can be used to programmatically remove abstraction and specialize programs before they enter the compiler. Alternatively, some compilers allow extending the internal workings by adding new transformation passes at different points in the compile chain or adding new intermediate representation (IR) types. None of these mechanisms alone is sufficient to handle the challenges posed by high level data structures. This paper shows a novel way to combine them to yield benefits that are greater than the sum of the parts. Instead of using staging merely as a front end, we implement internal compiler passes using staging as well. These internal passes delegate back to program execution to construct the transformed IR. Staging is known to simplify program generation, and in the same way it can simplify program transformation. Defining a transformation as a staged IR interpreter is simpler than implementing a low-level IR to IR transformer. With custom IR nodes, many optimizations that are expressed as rewritings from IR nodes to staged program fragments can be combined into a single pass, mitigating phase ordering problems. Speculative rewriting can preserve optimistic assumptions around loops. We demonstrate several powerful program optimizations using this architecture that are particularly geared towards data structures: a novel loop fusion and deforestation algorithm, array of struct to struct of array conversion, object flattening and code generation for heterogeneous parallel devices. We validate our approach using several non trivial case studies that exhibit order of magnitude speedups in experiments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Adams:2013:PPI, author = "Michael D. Adams", title = "Principled parsing for indentation-sensitive languages: revisiting {Landin}'s offside rule", journal = j-SIGPLAN, volume = "48", number = "1", pages = "511--522", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429129", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several popular languages, such as Haskell, Python, and F\#, use the indentation and layout of code as part of their syntax. Because context-free grammars cannot express the rules of indentation, parsers for these languages currently use ad hoc techniques to handle layout. These techniques tend to be low-level and operational in nature and forgo the advantages of more declarative specifications like context-free grammars. For example, they are often coded by hand instead of being generated by a parser generator. This paper presents a simple extension to context-free grammars that can express these layout rules, and derives GLR and LR(k) algorithms for parsing these grammars. These grammars are easy to write and can be parsed efficiently. Examples for several languages are presented, as are benchmarks showing the practical efficiency of these algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Hobor:2013:RSD, author = "Aquinas Hobor and Jules Villard", title = "The ramifications of sharing in data structures", journal = j-SIGPLAN, volume = "48", number = "1", pages = "523--536", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429131", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programs manipulating mutable data structures with intrinsic sharing present a challenge for modular verification. Deep aliasing inside data structures dramatically complicates reasoning in isolation over parts of these objects because changes to one part of the structure (say, the left child of a dag node) can affect other parts (the right child or some of its descendants) that may point into it. The result is that finding intuitive and compositional proofs of correctness is usually a struggle. We propose a compositional proof system that enables local reasoning in the presence of sharing. While the AI ``frame problem'' elegantly captures the reasoning required to verify programs without sharing, we contend that natural reasoning about programs with sharing instead requires an answer to a different and more challenging AI problem, the ``ramification problem'': reasoning about the indirect consequences of actions. Accordingly, we present a RAMIFY proof rule that attacks the ramification problem head-on and show how to reason with it. Our framework is valid in any separation logic and permits sound compositional and local reasoning in the context of both specified and unspecified sharing. We verify the correctness of a number of examples, including programs that manipulate dags, graphs, and overlaid data structures in nontrivial ways.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Totla:2013:CIB, author = "Nishant Totla and Thomas Wies", title = "Complete instantiation-based interpolation", journal = j-SIGPLAN, volume = "48", number = "1", pages = "537--548", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429132", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Craig interpolation has been a valuable tool for formal methods with interesting applications in program analysis and verification. Modern SMT solvers implement interpolation procedures for the theories that are most commonly used in these applications. However, many application-specific theories remain unsupported, which limits the class of problems to which interpolation-based techniques apply. In this paper, we present a generic framework to build new interpolation procedures via reduction to existing interpolation procedures. We consider the case where an application-specific theory can be formalized as an extension of a base theory with additional symbols and axioms. Our technique uses finite instantiation of the extension axioms to reduce an interpolation problem in the theory extension to one in the base theory. We identify a model-theoretic criterion that allows us to detect the cases where our technique is complete. We discuss specific theories that are relevant in program verification and that satisfy this criterion. In particular, we obtain complete interpolation procedures for theories of arrays and linked lists. The latter is the first complete interpolation procedure for a theory that supports reasoning about complex shape properties of heap-allocated data structures. We have implemented this procedure in a prototype on top of existing SMT solvers and used it to automatically infer loop invariants of list-manipulating programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Barr:2013:ADF, author = "Earl T. Barr and Thanh Vo and Vu Le and Zhendong Su", title = "Automatic detection of floating-point exceptions", journal = j-SIGPLAN, volume = "48", number = "1", pages = "549--560", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429133", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is well-known that floating-point exceptions can be disastrous and writing exception-free numerical programs is very difficult. Thus, it is important to automatically detect such errors. In this paper, we present Ariadne, a practical symbolic execution system specifically designed and implemented for detecting floating-point exceptions. Ariadne systematically transforms a numerical program to explicitly check each exception triggering condition. Ariadne symbolically executes the transformed program using real arithmetic to find candidate real-valued inputs that can reach and trigger an exception. Ariadne converts each candidate input into a floating-point number, then tests it against the original program. In general, approximating floating-point arithmetic with real arithmetic can change paths from feasible to infeasible and vice versa. The key insight of this work is that, for the problem of detecting floating-point exceptions, this approximation works well in practice because, if one input reaches an exception, many are likely to, and at least one of them will do so over both floating-point and real arithmetic. To realize Ariadne, we also devised a novel, practical linearization technique to solve nonlinear constraints. We extensively evaluated Ariadne over 467 scalar functions in the widely used GNU Scientific Library (GSL). Our results show that Ariadne is practical and identifies a large number of real runtime exceptions in GSL. The GSL developers confirmed our preliminary findings and look forward to Ariadne's public release, which we plan to do in the near future.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Ley-Wild:2013:SAS, author = "Ruy Ley-Wild and Aleksandar Nanevski", title = "Subjective auxiliary state for coarse-grained concurrency", journal = j-SIGPLAN, volume = "48", number = "1", pages = "561--574", month = jan, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480359.2429134", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:03 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "From Owicki-Gries' Resource Invariants and Jones' Rely/Guarantee to modern variants based on Separation Logic, axiomatic logics for concurrency require auxiliary state to explicitly relate the effect of all threads to the global invariant on the shared resource. Unfortunately, auxiliary state gives the proof of an individual thread access to the auxiliaries of all other threads. This makes proofs sensitive to the global context, which prevents local reasoning and compositionality. To tame this historical difficulty of auxiliary state, we propose subjective auxiliary state, whereby each thread is verified using a self view (i.e., the thread's effect on the shared resource) and an other view (i.e., the collective effect of all the other threads). Subjectivity generalizes auxiliary state from stacks and heaps to user-chosen partial commutative monoids, which can eliminate the dependence on the global thread structure. We employ subjectivity to formulate Subjective Concurrent Separation Logic as a combination of subjective auxiliary state and Concurrent Separation Logic. The logic yields simple, compositional proofs of coarse-grained concurrent programs that use auxiliary state, and scales to support higher-order recursive procedures that can themselves fork new threads. We prove the soundness of the logic with a novel denotational semantics of action trees and a definition of safety using rely/guarantee transitions over a large subjective footprint. We have mechanized the denotational semantics, logic, metatheory, and a number of examples by a shallow embedding in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '13 conference proceedings.", } @Article{Miller:2013:TSG, author = "Mark Miller", title = "A tested semantics for getters, setters, and eval in {JavaScript}", journal = j-SIGPLAN, volume = "48", number = "2", pages = "1--16", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384579", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present S5, a semantics for the strict mode of the ECMAScript 5.1 (JavaScript) programming language. S5 shrinks the large source language into a manageable core through an implemented transformation. The resulting specification has been tested against real-world conformance suites for the language. This paper focuses on two aspects of S5: accessors (getters and setters) and eval. Since these features are complex and subtle in JavaScript, they warrant special study. Variations on both features are found in several other programming languages, so their study is likely to have broad applicability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Homer:2013:POG, author = "Michael Homer and James Noble and Kim B. Bruce and Andrew P. Black and David J. Pearce", title = "Patterns as objects in {Grace}", journal = j-SIGPLAN, volume = "48", number = "2", pages = "17--28", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384581", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Object orientation and pattern matching are often seen as conflicting approaches to program design. Object-oriented programs place type-dependent behavior inside objects and invoke it via dynamic dispatch, while pattern-matching programs place type-dependent behavior outside data structures and invoke it via multiway conditionals (case statements). Grace is a new, dynamic, object-oriented language designed to support teaching: to this end, Grace needs to support both styles. We explain how this conflict can be resolved gracefully: by modelling patterns and cases as partial functions, reifying those functions as objects, and then building up complex patterns from simpler ones using pattern combinators. We describe the implementation of this design as an object-oriented framework, and a case study of its effectiveness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Bloom:2013:RSP, author = "Bard Bloom and Martin J. Hirzel", title = "Robust scripting via patterns", journal = j-SIGPLAN, volume = "48", number = "2", pages = "29--40", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384582", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic typing in scripting languages is a two-edged sword. On the one hand, it can be more flexible and more concise than static typing. On the other hand, it can lead to less robust code. We argue that patterns can give scripts much of the robustness of static typing, without losing the flexibility and concision of dynamic typing. To make this case, we describe a rich pattern system in the dynamic language Thorn. Thorn patterns interact with its control constructs and scoping rules to support concise and robust test-and-extract idioms. Thorn patterns encompass an extensive set of features from ML-style patterns to regular expressions and beyond. And Thorn patterns can be first-class and support pattern-punning (mirror constructor syntax). Overall, this paper describes a powerful pattern system that makes scripting more robust.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Normark:2013:OOP, author = "Kurt N{\o}rmark and Lone Leth Thomsen and Bent Thomsen", title = "Object-oriented programming with gradual abstraction", journal = j-SIGPLAN, volume = "48", number = "2", pages = "41--52", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384583", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe an experimental object-oriented programming language, ASL2, that supports program development by means of a series of abstraction steps. The language allows immediate object construction, and it is possible to use the constructed objects for concrete problem solving tasks. Classes and class hierarchies can be derived from the objects --- via gradual abstraction steps. We introduce two levels of object classification, called weak and strong object classification. Strong object classification relies on conventional classes, whereas weak object classification is looser, and less restrictive. As a central mechanism, weakly classified objects are allowed to borrow methods from each other. ASL2 supports class generalization, as a counterpart to class specialization and inheritance in mainstream object-oriented programming languages. The final abstraction step discussed in this paper is a syntactical abstraction step that derives a source file with a syntactical class form.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Pignotti:2013:ADP, author = "Alessandro Pignotti and Adam Welc and Bernd Mathiske", title = "Adaptive data parallelism for {Internet} clients on heterogeneous platforms", journal = j-SIGPLAN, volume = "48", number = "2", pages = "53--62", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384585", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's Internet is long past static web pages filled with HTML-formatted text sprinkled with an occasional image or animation. We have entered an era of Rich Internet Applications executed locally on Internet clients such as web browsers: games, physics engines, image rendering, photo editing, etc. Yet today's languages used to program Internet clients have limited ability to tap to the computational capabilities of the underlying, often heterogeneous, platforms. In this paper we present how a Domain Specific Language(DSL) can be integrated into ActionScript, one of the most popular scripting languages used to program Internet clients and a close cousin of JavaScript. We demonstrate how our DSL, called ASDP (ActionScript Data Parallel), can be used to enable data parallelism for existing sequential programs. We also present a prototype of a system where data parallel workloads can be executed on either CPU or a GPU, with the runtime system transparently selecting the best processing unit, depending on the type of workload as well as the architecture and current load of the execution platform. We evaluate performance of our system on a variety of benchmarks, representing different types of workloads: physics, image processing, scientific computing and cryptography.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Ardo:2013:LAO, author = "H{\aa}kan Ard{\"o} and Carl Friedrich Bolz and Maciej Fija{\l}kowski", title = "Loop-aware optimizations in {PyPy}'s tracing {JIT}", journal = j-SIGPLAN, volume = "48", number = "2", pages = "63--72", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384586", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "One of the nice properties of a tracing just-in-time compiler (JIT) is that many of its optimizations are simple, requiring one forward pass only. This is not true for loop-invariant code motion which is a very important optimization for code with tight kernels. Especially for dynamic languages that typically perform quite a lot of loop invariant type checking, boxed value unwrapping and virtual method lookups. In this paper we explain a scheme pioneered within the context of the LuaJIT project for making basic optimizations loop-aware by using a simple pre-processing step on the trace without changing the optimizations themselves. We have implemented the scheme in RPython's tracing JIT compiler. PyPy's Python JIT executing simple numerical kernels can become up to two times faster, bringing the performance into the ballpark of static language compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Wurthinger:2013:SOA, author = "Thomas W{\"u}rthinger and Andreas W{\"o}{\ss} and Lukas Stadler and Gilles Duboscq and Doug Simon and Christian Wimmer", title = "Self-optimizing {AST} interpreters", journal = j-SIGPLAN, volume = "48", number = "2", pages = "73--82", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384587", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An abstract syntax tree (AST) interpreter is a simple and natural way to implement a programming language. However, it is also considered the slowest approach because of the high overhead of virtual method dispatch. Language implementers therefore define bytecodes to speed up interpretation, at the cost of introducing inflexible and hard to maintain bytecode formats. We present a novel approach to implementing AST interpreters in which the AST is modified during interpretation to incorporate type feedback. This tree rewriting is a general and powerful mechanism to optimize many constructs common in dynamic programming languages. Our system is implemented in Java and uses the static typing and primitive data types of Java elegantly to avoid the cost of boxed representations of primitive values in dynamic programming languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Wernli:2013:OFC, author = "Erwann Wernli and Pascal Maerki and Oscar Nierstrasz", title = "Ownership, filters and crossing handlers: flexible ownership in dynamic languages", journal = j-SIGPLAN, volume = "48", number = "2", pages = "83--94", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384589", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sharing mutable objects can result in broken invariants, exposure of internal details, and other subtle bugs. To prevent such issues, it is important to control accessibility and aliasing of objects. Dynamic Ownership is an effective way to do so, but its owner-as-dominator discipline is too restrictive: objects are either accessible or not. We propose in this paper to control accessibility and aliasing with more flexibility using two mechanisms, filters and crossing handlers. We demonstrate the benefits of the flexibility offered by these mechanisms, and report on the adaptation of a Smalltalk web server with our approach. We conclude that our variant of dynamic ownership is flexible enough to accommodate an existing design, while at the same time constraining it enough to highlight design anomalies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Lerner:2013:DCA, author = "Benjamin S. Lerner and Dan Grossman", title = "Detecting conflicts among declarative {UI} extensions", journal = j-SIGPLAN, volume = "48", number = "2", pages = "95--106", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384590", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We examine overlays, a flexible aspect-like mechanism for third-party declarative extensions of declarative UIs. Overlays can be defined for any markup language and permit extensions to define new content that is dynamically woven into a base UI document. While powerful, overlays are inherently non-modular and may conflict with each other, by defining duplicate or contradictory UI components. We construct an abstract language to capture core overlay semantics, and design an automatic analysis to detect inter-extension conflicts. We apply the analysis to a case study of Firefox extensions, finding several real-world bugs. Our analysis provides low-level feedback to extension developers and high-level reports to end users. Finally, we show how variants of overlays more expressive than those of Firefox complicate conflict detection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Steinert:2013:COA, author = "Bastian Steinert and Damien Cassou and Robert Hirschfeld", title = "{CoExist}: overcoming aversion to change", journal = j-SIGPLAN, volume = "48", number = "2", pages = "107--118", month = feb, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480360.2384591", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:12 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers make many changes to the program to eventually find a good solution for a given task. In this course of change, every intermediate development state can of value, when, for example, a promising ideas suddenly turn out inappropriate or the interplay of objects turns out more complex than initially expected before making changes. Programmers would benefit from tool support that provides immediate access to source code and run-time of previous development states of interest. We present IDE extensions, implemented for Squeak/Smalltalk, to preserve, retrieve, and work with this information. With such tool support, programmers can work without worries because they can rely on tools that help them with whatever their explorations will reveal. They no longer have to follow certain best practices only to avoid undesired consequences of hanging code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '12 conference proceedings.", } @Article{Damiani:2013:FFD, author = "Ferruccio Damiani and Luca Padovani and Ina Schaefer", title = "A formal foundation for dynamic delta-oriented software product lines", journal = j-SIGPLAN, volume = "48", number = "3", pages = "1--10", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371403", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Delta-oriented programming (DOP) is a flexible approach for implementing software product lines (SPLs). DOP SPLs are implemented by a code base (a set of delta modules encapsulating changes to object-oriented programs) and a product line declaration (providing the connection of the delta modules with the product features). In this paper, we extend DOP by the capability to switch the implemented product configuration at runtime and present a formal foundation for dynamic DOP. A dynamic DOP SPL is a DOP SPL with a dynamic reconfiguration graph that specifies how to switch between different feature configurations. Dynamic DOP supports (unanticipated) software evolution such that at runtime, the product line declaration, the code base and the dynamic reconfiguration graph can be changed in any (unanticipated) way that preserves the currently running product. The type system of our dynamic DOP core calculus ensures that the dynamic reconfigurations lead to type safe products and do not cause runtime type errors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Thum:2013:FBD, author = "Thomas Th{\"u}m and Ina Schaefer and Sven Apel and Martin Hentschel", title = "Family-based deductive verification of software product lines", journal = j-SIGPLAN, volume = "48", number = "3", pages = "11--20", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371404", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A software product line is a set of similar software products that share a common code base. While software product lines can be implemented efficiently using feature-oriented programming, verifying each product individually does not scale, especially if human effort is required (e.g., as in interactive theorem proving). We present a family-based approach of deductive verification to prove the correctness of a software product line efficiently. We illustrate and evaluate our approach for software product lines written in a feature-oriented dialect of Java and specified using the Java Modeling Language. We show that the theorem prover KeY can be used off-the-shelf for this task, without any modifications. Compared to the individual verification of each product, our approach reduces the verification time needed for our case study by more than 85\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Ryssel:2013:RFM, author = "Uwe Ryssel and Joern Ploennigs and Klaus Kabitzsch", title = "Reasoning of feature models from derived features", journal = j-SIGPLAN, volume = "48", number = "3", pages = "21--30", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371405", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When using product lines, whose variability models are based on derived features, e.g., Simulink variant objects, the dependencies among the features are only described implicitly. This makes it difficult to verify the mapping of the features to the solution space and to create a comprehensive overview of the feature dependencies like in a feature model. In this paper, an OWL-based approach is presented, which permits the automatic verification of the feature mapping and an automatic feature model synthesis for derived features using OWL reasoning and formal concept analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Rayside:2013:SIA, author = "Derek Rayside and Vajihollah Montaghami and Francesca Leung and Albert Yuen and Kevin Xu and Daniel Jackson", title = "Synthesizing iterators from abstraction functions", journal = j-SIGPLAN, volume = "48", number = "3", pages = "31--40", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371407", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A technique for synthesizing iterators from declarative abstraction functions written in a relational logic specification language is described. The logic includes a transitive closure operator that makes it convenient for expressing reachability queries on linked data structures. Some optimizations, including tuple elimination, iterator flattening, and traversal state reduction, are used to improve performance of the generated iterators. A case study demonstrates that most of the iterators in the widely used JDK Collections classes can be replaced with code synthesized from declarative abstraction functions. These synthesized iterators perform competitively with the hand-written originals. In a user study the synthesized iterators always passed more test cases than the hand-written ones, were almost always as efficient, usually took less programmer effort, and were the qualitative preference of all participants who provided free-form comments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Hulette:2013:CTT, author = "Geoffrey C. Hulette and Matthew Sottile and Allen D. Malony", title = "Composing typemaps in {Twig}", journal = j-SIGPLAN, volume = "48", number = "3", pages = "41--49", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371408", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Twig is a language for writing typemaps, programs which transform the type of a value while preserving its underlying meaning. Typemaps are typically used by tools that generate code, such as multi-language wrapper generators, to automatically convert types as needed. Twig builds on existing typemap tools in a few key ways. Twig's typemaps are composable so that complex transformations may be built from simpler ones. In addition, Twig incorporates an abstract, formal model of code generation, allowing it to output code for different target languages. We describe Twig's formal semantics and show how the language allows us to concisely express typemaps. Then, we demonstrate Twig's utility by building an example typemap.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Axelsen:2013:PTD, author = "Eyvind W. Axelsen and Stein Krogdahl", title = "{Package Templates}: a definition by semantics-preserving source-to-source transformations to efficient {Java} code", journal = j-SIGPLAN, volume = "48", number = "3", pages = "50--59", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Package Templates (PT) is a mechanism designed for writing reusable modules, called templates, each consisting of a set of classes that can be adapted to their use in a program through compile-time specialization. A template must be instantiated in a program before its classes can be used. The mechanism supports type-safe renaming, merging, type parameterization and refinement in the form of static additions and overrides that are orthogonal to the corresponding concepts of ordinary inheritance. In this paper, we consider PT as an extension to Java, and a PT program will then consist of a number of Java packages and templates, where templates are instantiated in packages or other templates. Our aim and main contribution is to define the meaning of such a program, and to show that this definition is consistent. We first show this for a core subset of PT, C-PT, and define a set of source-to-source transformations for converting C-PT programs to plain Java programs using semantics we have described informally in previous papers. We can then define the meaning of a C-PT program in terms of the resulting Java program. Thus, we have to verify that the transformations will always convert a legal C-PT program to a legal Java program. Finally, we briefly discuss how this approach can be extended to full PT. A main challenge is to preserve externally visible names (for classes, methods and fields), and at the same time prevent unwanted subsequent rebindings caused e.g. by over-load resolution in the Java compiler. Names that are bound to declarations in a template should not be rebound to different declarations by subsequent compositions or adaptions. In addition to defining the runtime semantics of PT constructs in terms of their translation to Java, the transformation rules can also be seen as a high-level approach to how a compiler for this language might be implemented.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Spacek:2013:ISS, author = "Petr Spacek and Christophe Dony and Chouki Tibermacine and Luc Fabresse", title = "An inheritance system for structural \& behavioral reuse in component-based software programming", journal = j-SIGPLAN, volume = "48", number = "3", pages = "60--69", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371411", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the context of Component-based Programming, which addresses the implementation stage of a component-based software engineering development process, this paper describes a specification and an operational integration of an inheritance system into a self-contained new component-based programming language named Compo. Our proposal completes and extends related works by making it possible to apply inheritance to the full description of components, i.e. both to structural (description of provisions and requirements, of component architecture) and behavioral (full implementations of services) parts in component descriptions. Inheritance in Compo is designed to be used in conjunction with composition to maximize reuse capabilities and expressive power. Compo implementation proposes a clear operational solution for inheritance and for achieving and testing substitutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Zhang:2013:TLC, author = "Huaxi (Yulin) Zhang and Lei Zhang and Christelle Urtado and Sylvain Vauttier and Marianne Huchard", title = "A three-level component model in component based software development", journal = j-SIGPLAN, volume = "48", number = "3", pages = "70--79", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371412", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Component-based development promotes a software development process that focuses on component reuse. How to describe a desired component before searching in the repository? How to find an existing component that fulfills the required functionalities? How to capture the system personalization based on its constitutive components' customization? To answer these questions, this paper claims that components should be described using three different forms at three development stages: architecture specification, configuration and assembly. However, no architecture description language proposes such a detailed description for components that supports such a three step component-based development. This paper proposes a three-level Adl, named Dedal, that enables the explicit and separate definitions of component roles, component classes, and component instances.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Freeman:2013:HLW, author = "John Freeman and Jaakko J{\"a}rvi and Gabriel Foust", title = "{HotDrink}: a library for {Web} user interfaces", journal = j-SIGPLAN, volume = "48", number = "3", pages = "80--83", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371413", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "HotDrink is a JavaScript library for constructing forms, dialogs, and other common user interfaces for Web applications. With HotDrink, instead of writing event handlers, developers declare a ``view-model'' in JavaScript and a set of ``bindings'' between the view-model and the HTML elements comprising the view. These specifications tend to be small, but they are enough for HotDrink to provide a fully operational GUI with multi-way dataflows, enabling/disabling of values, activation/deactivation of commands, and data validation. HotDrink implements these rich behaviors, expected of high-quality user interfaces, as generic reusable algorithms. This paper/tool demonstration introduces developers to the HotDrink library by stepping through the construction of an example web application GUI. The library is a concrete realization of our prior work on the ``property models'' approach to declarative GUI programming. To encourage adoption among developers, we have packaged the technology following established web programming conventions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Riche:2013:PSA, author = "T. L. Rich{\'e} and R. Gon{\c{c}}alves and B. Marker and D. Batory", title = "Pushouts in software architecture design", journal = j-SIGPLAN, volume = "48", number = "3", pages = "84--92", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371415", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A classical approach to program derivation is to progressively extend a simple specification and then incrementally refine it to an implementation. We claim this approach is hard or impractical when reverse engineering legacy software architectures. We present a case study that shows optimizations and pushouts ---in addition to refinements and extensions---are essential for practical stepwise development of complex software architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Bagheri:2013:PSD, author = "Hamid Bagheri and Kevin Sullivan", title = "{Pol}: specification-driven synthesis of architectural code frameworks for platform-based applications", journal = j-SIGPLAN, volume = "48", number = "3", pages = "93--102", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371416", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing applications that use complex platforms for functionalities such as authentication and messaging is hard. Model-driven engineering promises to help, but transformation systems are themselves hard to produce. We contribute a new approach using constraint-based synthesis of partial code frameworks that developers complete by hand without the need for hand-coded transformation systems. Rather, synthesis is driven by formal, partial specifications of target platforms and application architectures, and by design (code) fragments encoding application-specific platform usage patterns. We present results of an early evaluation using the case study method to test hypotheses of feasibility and potential industrial utility, using a laboratory model of a nationwide health information network as a subject system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Bauer:2013:FPA, author = "Tim Bauer and Martin Erwig and Alan Fern and Jervis Pinto", title = "Faster program adaptation through reward attribution inference", journal = j-SIGPLAN, volume = "48", number = "3", pages = "103--111", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371417", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the adaptation-based programming (ABP) paradigm, programs may contain variable parts (function calls, parameter values, etc.) that can be take a number of different values. Programs also contain reward statements with which a programmer can provide feedback about how well a program is performing with respect to achieving its goals (for example, achieving a high score on some scale). By repeatedly running the program, a machine learning component will, guided by the rewards, gradually adjust the automatic choices made in the variable program parts so that they converge toward an optimal strategy. ABP is a method for semi-automatic program generation in which the choices and rewards offered by programmers allow standard machine-learning techniques to explore a design space defined by the programmer to find an optimal instance of a program template. ABP effectively provides a DSL that allows non-machine-learning experts to exploit machine learning to generate self-optimizing programs. Unfortunately, in many cases the placement and structuring of choices and rewards can have a detrimental effect on how an optimal solution to a program-generation problem can be found. To address this problem, we have developed a dataflow analysis that computes influence tracks of choices and rewards. This information can be exploited by an augmented machine-learning technique to ignore misleading rewards and to generally attribute rewards better to the choices that have actually influenced them. Moreover, this technique allows us to detect errors in the adaptive program that might arise out of program maintenance. Our evaluation shows that the dataflow analysis can lead to improvements in performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Efftinge:2013:XID, author = "Sven Efftinge and Moritz Eysholdt and Jan K{\"o}hnlein and Sebastian Zarnekow and Robert von Massow and Wilhelm Hasselbring and Michael Hanus", title = "{Xbase}: implementing domain-specific languages for {Java}", journal = j-SIGPLAN, volume = "48", number = "3", pages = "112--121", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371419", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Xtext is an open-source framework for implementing external, textual domain-specific languages (DSLs). So far, most DSLs implemented with Xtext and similar tools focus on structural aspects such as service specifications and entities. Because behavioral aspects are significantly more complicated to implement, they are often delegated to general-purpose programming languages. This approach introduces complex integration patterns and the DSL's high level of abstraction is compromised. We present Xbase as part of Xtext, an expression language that can be reused via language inheritance in any DSL implementation based on Xtext. Xbase expressions provide both control structures and program expressions in a uniform way. Xbase is statically typed and tightly integrated with the Java type system. Languages extending Xbase inherit the syntax of a Java-like expression language as well as language infrastructure components, including a parser, an unparser, a linker, a compiler and an interpreter. Furthermore, the framework provides integration into the Eclipse IDE including debug and refactoring support. The application of Xbase is presented by means of a domain model language which serves as a tutorial example and by the implementation of the programming language Xtend. Xtend is a functional and object-oriented general purpose language for the Java Virtual Machine (JVM). It is built on top of Xbase which is the reusable expression language that is the foundation of Xtend.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Rafkind:2013:HSE, author = "Jon Rafkind and Matthew Flatt", title = "{Honu}: syntactic extension for algebraic notation through enforestation", journal = j-SIGPLAN, volume = "48", number = "3", pages = "122--131", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371420", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Honu is a new language that fuses traditional algebraic notation (e.g., infix binary operators) with Scheme-style language extensibility. A key element of Honu's design is an enforestation parsing step, which converts a flat stream of tokens into an S-expression-like tree, in addition to the initial ``read'' phase of parsing and interleaved with the ``macro-expand'' phase. We present the design of Honu, explain its parsing and macro-extension algorithm, and show example syntactic extensions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Walkingshaw:2013:CMI, author = "Eric Walkingshaw and Martin Erwig", title = "A calculus for modeling and implementing variation", journal = j-SIGPLAN, volume = "48", number = "3", pages = "132--140", month = mar, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2480361.2371421", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:18 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a formal calculus for modeling and implementing variation in software. It unifies the compositional and annotative approaches to feature implementation and supports the development of abstractions that can be used to directly relate feature models to their implementation. Since the compositional and annotative approaches are complementary, the calculus enables implementers to use the best combination of tools for the job and focus on inherent feature interactions, rather than those introduced by biases in the representation. The calculus also supports the abstraction of recurring variational patterns and provides a metaprogramming platform for organizing variation in artifacts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Bond:2013:GDG, author = "Michael Bond", title = "{GPUDet}: a deterministic {GPU} architecture", journal = j-SIGPLAN, volume = "48", number = "4", pages = "1--12", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451118", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nondeterminism is a key challenge in developing multithreaded applications. Even with the same input, each execution of a multithreaded program may produce a different output. This behavior complicates debugging and limits one's ability to test for correctness. This non-reproducibility situation is aggravated on massively parallel architectures like graphics processing units (GPUs) with thousands of concurrent threads. We believe providing a deterministic environment to ease debugging and testing of GPU applications is essential to enable a broader class of software to use GPUs. Many hardware and software techniques have been proposed for providing determinism on general-purpose multi-core processors. However, these techniques are designed for small numbers of threads. Scaling them to thousands of threads on a GPU is a major challenge. This paper proposes a scalable hardware mechanism, GPUDet, to provide determinism in GPU architectures. In this paper we characterize the existing deterministic and nondeterministic aspects of current GPU execution models, and we use these observations to inform GPUDet's design. For example, GPUDet leverages the inherent determinism of the SIMD hardware in GPUs to provide determinism within a wavefront at no cost. GPUDet also exploits the Z-Buffer Unit, an existing GPU hardware unit for graphics rendering, to allow parallel out-of-order memory writes to produce a deterministic output. Other optimizations in GPUDet include deterministic parallel execution of atomic operations and a workgroup-aware algorithm that eliminates unnecessary global synchronizations. Our simulation results indicate that GPUDet incurs only 2X slowdown on average over a baseline nondeterministic architecture, with runtime overheads as low as 4\% for compute-bound applications, despite running GPU kernels with thousands of threads. We also characterize the sources of overhead for deterministic execution on GPUs to provide insights for further optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Sung:2013:DEH, author = "Hyojin Sung and Rakesh Komuravelli and Sarita V. Adve", title = "{DeNovoND}: efficient hardware support for disciplined non-determinism", journal = j-SIGPLAN, volume = "48", number = "4", pages = "13--26", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451119", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent work has shown that disciplined shared-memory programming models that provide deterministic-by-default semantics can simplify both parallel software and hardware. Specifically, the DeNovo hardware system has shown that the software guarantees of such models (e.g., data-race-freedom and explicit side-effects) can enable simpler, higher performance, and more energy-efficient hardware than the current state-of-the-art for deterministic programs. Many applications, however, contain non-deterministic parts; e.g., using lock synchronization. For commercial hardware to exploit the benefits of DeNovo, it is therefore necessary to extend DeNovo to support non-deterministic applications. This paper proposes DeNovoND, a system that supports lock-based, disciplined non-determinism, with the simplicity, performance, and energy benefits of DeNovo. We use a combination of distributed queue-based locks and access signatures to implement simple memory consistency semantics for safe non-determinism, with a coherence protocol that does not require transient states, invalidation traffic, or directories, and does not incur false sharing. The resulting system is simpler, shows comparable or better execution time, and has 33\% less network traffic on average (translating directly into energy savings) relative to a state-of-the-art invalidation-based protocol for 8 applications designed for lock synchronization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Wester:2013:PDR, author = "Benjamin Wester and David Devecsery and Peter M. Chen and Jason Flinn and Satish Narayanasamy", title = "Parallelizing data race detection", journal = j-SIGPLAN, volume = "48", number = "4", pages = "27--38", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451120", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Detecting data races in multithreaded programs is a crucial part of debugging such programs, but traditional data race detectors are too slow to use routinely. This paper shows how to speed up race detection by spreading the work across multiple cores. Our strategy relies on uniparallelism, which executes time intervals of a program (called epochs ) in parallel to provide scalability, but executes all threads from a single epoch on a single core to eliminate locking overhead. We use several techniques to make parallelization effective: dividing race detection into three phases, predicting a subset of the analysis state, eliminating sequential work via transitive reduction, and reducing the work needed to maintain multiple versions of analysis via factorization. We demonstrate our strategy by parallelizing a happens-before detector and a lockset-based detector. We find that uniparallelism can significantly speed up data race detection. With 4x the number of cores as the original application, our strategy speeds up the median execution time by 4.4x for a happens-before detector and 3.3x for a lockset race detector. Even on the same number of cores as the conventional detectors, the ability for uniparallelism to elide analysis locks allows it to reduce the median overhead by 13\% for a happens-before detector and 8\% for a lockset detector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Lucia:2013:CEF, author = "Brandon Lucia and Luis Ceze", title = "Cooperative empirical failure avoidance for multithreaded programs", journal = j-SIGPLAN, volume = "48", number = "4", pages = "39--50", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451121", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency errors in multithreaded programs are difficult to find and fix. We propose Aviso, a system for avoiding schedule-dependent failures. Aviso monitors events during a program's execution and, when a failure occurs, records a history of events from the failing execution. It uses this history to generate schedule constraints that perturb the order of events in the execution and thereby avoids schedules that lead to failures in future program executions. Aviso leverages scenarios where many instances of the same software run, using a statistical model of program behavior and experimentation to determine which constraints most effectively avoid failures. After implementing Aviso, we showed that it decreased failure rates for a variety of important desktop, server, and cloud applications by orders of magnitude, with an average overhead of less than 20\% and, in some cases, as low as 5\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Goiri:2013:PGM, author = "{\'I}{\~n}igo Goiri and William Katsak and Kien Le and Thu D. Nguyen and Ricardo Bianchini", title = "{Parasol} and {GreenSwitch}: managing datacenters powered by renewable energy", journal = j-SIGPLAN, volume = "48", number = "4", pages = "51--64", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451123", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several companies have recently announced plans to build ``green'' datacenters, i.e. datacenters partially or completely powered by renewable energy. These datacenters will either generate their own renewable energy or draw it directly from an existing nearby plant. Besides reducing carbon footprints, renewable energy can potentially reduce energy costs, reduce peak power costs, or both. However, certain renewable fuels are intermittent, which requires approaches for tackling the energy supply variability. One approach is to use batteries and/or the electrical grid as a backup for the renewable energy. It may also be possible to adapt the workload to match the renewable energy supply. For highest benefits, green datacenter operators must intelligently manage their workloads and the sources of energy at their disposal. In this paper, we first discuss the tradeoffs involved in building green datacenters today and in the future. Second, we present Parasol, a prototype green datacenter that we have built as a research platform. Parasol comprises a small container, a set of solar panels, a battery bank, and a grid-tie. Third, we describe GreenSwitch, our model-based approach for dynamically scheduling the workload and selecting the source of energy to use. Our real experiments with Parasol, GreenSwitch, and MapReduce workloads demonstrate that intelligent workload and energy source management can produce significant cost reductions. Our results also isolate the cost implications of peak power management, storing energy on the grid, and the ability to delay the MapReduce jobs. Finally, our results demonstrate that careful workload and energy source management can minimize the negative impact of electrical grid outages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Shen:2013:PCF, author = "Kai Shen and Arrvindh Shriraman and Sandhya Dwarkadas and Xiao Zhang and Zhuan Chen", title = "Power containers: an {OS} facility for fine-grained power and energy management on multicore servers", journal = j-SIGPLAN, volume = "48", number = "4", pages = "65--76", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451124", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy efficiency and power capping are critical concerns in server and cloud computing systems. They face growing challenges due to dynamic power variations from new client-directed web applications, as well as complex behaviors due to multicore resource sharing and hardware heterogeneity. This paper presents a new operating system facility called ``power containers'' that accounts for and controls the power and energy usage of individual fine-grained requests in multicore servers. This facility relies on three key techniques --- (1) online model that attributes multicore power (including shared maintenance power) to concurrently running tasks, (2) alignment of actual power measurements and model estimates to enable online model recalibration, and (3) on-the-fly application-transparent request tracking in multi-stage servers to isolate the power and energy contributions and customize per-request control. Our mechanisms enable new multicore server management capabilities including fair power capping that only penalizes power-hungry requests, and energy-aware request distribution between heterogeneous servers. Our evaluation uses three multicore processors (Intel Woodcrest, Westmere, and SandyBridge) and a variety of server and cloud computing (Google App Engine) workloads. Our results demonstrate the high accuracy of our request power accounting (no more than 11\% errors) and the effectiveness of container-enabled power virus isolation and throttling. Our request distribution case study shows up to 25\% energy saving compared to an alternative approach that recognizes machine heterogeneity but not fine-grained workload affinity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Delimitrou:2013:PQA, author = "Christina Delimitrou and Christos Kozyrakis", title = "{Paragon}: {QoS}-aware scheduling for heterogeneous datacenters", journal = j-SIGPLAN, volume = "48", number = "4", pages = "77--88", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451125", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale datacenters (DCs) host tens of thousands of diverse applications each day. However, interference between colocated workloads and the difficulty to match applications to one of the many hardware platforms available can degrade performance, violating the quality of service (QoS) guarantees that many cloud workloads require. While previous work has identified the impact of heterogeneity and interference, existing solutions are computationally intensive, cannot be applied online and do not scale beyond few applications. We present Paragon, an online and scalable DC scheduler that is heterogeneity and interference-aware. Paragon is derived from robust analytical methods and instead of profiling each application in detail, it leverages information the system already has about applications it has previously seen. It uses collaborative filtering techniques to quickly and accurately classify an unknown, incoming workload with respect to heterogeneity and interference in multiple shared resources, by identifying similarities to previously scheduled applications. The classification allows Paragon to greedily schedule applications in a manner that minimizes interference and maximizes server utilization. Paragon scales to tens of thousands of servers with marginal scheduling overheads in terms of time or state. We evaluate Paragon with a wide range of workload scenarios, on both small and large-scale systems, including 1,000 servers on EC2. For a 2,500-workload scenario, Paragon enforces performance guarantees for 91\% of applications, while significantly improving utilization. In comparison, heterogeneity-oblivious, interference-oblivious and least-loaded schedulers only provide similar guarantees for 14\%, 11\% and 3\% of workloads. The differences are more striking in oversubscribed scenarios where resource efficiency is more critical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Tang:2013:RRS, author = "Lingjia Tang and Jason Mars and Wei Wang and Tanima Dey and Mary Lou Soffa", title = "{ReQoS}: reactive static\slash dynamic compilation for {QoS} in warehouse scale computers", journal = j-SIGPLAN, volume = "48", number = "4", pages = "89--100", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451126", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As multicore processors with expanding core counts continue to dominate the server market, the overall utilization of the class of datacenters known as warehouse scale computers (WSCs) depends heavily on colocation of multiple workloads on each server to take advantage of the computational power provided by modern processors. However, many of the applications running in WSCs, such as Web search, are user-facing and have quality of service (QoS) requirements. When multiple applications are co-located on a multicore machine, contention for shared memory resources threatens application QoS as severe cross-core performance interference may occur. WSC operators are left with two options: either disregard QoS to maximize WSC utilization, or disallow the co-location of high-priority user-facing applications with other applications, resulting in low machine utilization and millions of dollars wasted. This paper presents ReQoS, a static/dynamic compilation approach that enables low-priority applications to adaptively manipulate their own contentiousness to ensure the QoS of high-priority co-runners. ReQoS is composed of a profile guided compilation technique that identifies and inserts markers in contentious code regions in low-priority applications, and a lightweight runtime that monitors the QoS of high-priority applications and reactively reduces the pressure low-priority applications generate to the memory subsystem when cross-core interference is detected. In this work, we show that ReQoS can accurately diagnose contention and significantly reduce performance interference to ensure application QoS. Applying ReQoS to SPEC2006 and SmashBench workloads on real multicore machines, we are able to improve machine utilization by more than 70\% in many cases, and more than 50\% on average, while enforcing a 90\% QoS threshold. We are also able to improve the energy efficiency of modern multicore machines by 47\% on average over a policy of disallowing co-locations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Arulraj:2013:PRS, author = "Joy Arulraj and Po-Chun Chang and Guoliang Jin and Shan Lu", title = "Production-run software failure diagnosis via hardware performance counters", journal = j-SIGPLAN, volume = "48", number = "4", pages = "101--112", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451128", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sequential and concurrency bugs are widespread in deployed software. They cause severe failures and huge financial loss during production runs. Tools that diagnose production-run failures with low overhead are needed. The state-of-the-art diagnosis techniques use software instrumentation to sample program properties at run time and use off-line statistical analysis to identify properties most correlated with failures. Although promising, these techniques suffer from high run-time overhead, which is sometimes over 100\%, for concurrency-bug failure diagnosis and hence are not suitable for production-run usage. We present PBI, a system that uses existing hardware performance counters to diagnose production-run failures caused by sequential and concurrency bugs with low overhead. PBI is designed based on several key observations. First, a few widely supported performance counter events can reflect a wide variety of common software bugs and can be monitored by hardware with almost no overhead. Second, the counter overflow interrupt supported by existing hardware and operating systems provides a natural and effective mechanism to conduct event sampling at user level. Third, the noise and non-determinism in interrupt delivery complements well with statistical processing. We evaluate PBI using 13 real-world concurrency and sequential bugs from representative open-source server, client, and utility programs, and 10 bugs from a widely used software-testing benchmark. Quantitatively, PBI can effectively diagnose failures caused by these bugs with a small overhead that is never higher than 10\%. Qualitatively, PBI does not require any change to software and presents a novel use of existing hardware performance counters.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Zhang:2013:CFC, author = "Wei Zhang and Marc de Kruijf and Ang Li and Shan Lu and Karthikeyan Sankaralingam", title = "{ConAir}: featherweight concurrency bug recovery via single-threaded idempotent execution", journal = j-SIGPLAN, volume = "48", number = "4", pages = "113--126", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451129", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many concurrency bugs are hidden in deployed software and cause severe failures for end-users. When they finally manifest and become known by developers, they are difficult to fix correctly. To support end-users, we need techniques that help software survive hidden concurrency bugs during production runs. To help developers, we need techniques that fix exposed concurrency bugs. The state-of-the-art techniques on concurrency-bug fixing and survival only satisfy a subset of four important properties: compatibility, correctness, generality, and performance.We aim to develop a system that satisfies all of these four properties. To achieve this goal, we leverage two observations: (1) rolling back a single thread is sufficient to recover from most concurrency-bug failures; (2) reexecuting an idempotent region, which requires no memory-state checkpoint, is sufficient to recover from many concurrency-bug failures. Our system ConAir includes a static analysis component that automatically identifies potential failure sites, a static analysis component that automatically identifies the idempotent code regions around every failure site, and a code-transformation component that inserts rollback-recovery code around the identified idempotent regions. We evaluated ConAir on 10 real-world concurrency bugs in widely used C/C++ open-source applications. These bugs cover different types of failure symptoms and root causes. Quantitatively, ConAir helps software survive failures caused by all of these bugs with negligible run-time overhead ($< 1\%$) and short recovery time. Qualitatively, ConAir can help recover from failures caused by unknown bugs. It guarantees that program semantics remain unchanged and requires no change to operating systems or hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Viennot:2013:TMR, author = "Nicolas Viennot and Siddharth Nair and Jason Nieh", title = "Transparent mutable replay for multicore debugging and patch validation", journal = j-SIGPLAN, volume = "48", number = "4", pages = "127--138", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451130", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Dora, a mutable record-replay system which allows a recorded execution of an application to be replayed with a modified version of the application. This feature, not available in previous record-replay systems, enables powerful new functionality. In particular, Dora can help reproduce, diagnose, and fix software bugs by replaying a version of a recorded application that is recompiled with debugging information, reconfigured to produce verbose log output, modified to include additional print statements, or patched to fix a bug. Dora uses lightweight operating system mechanisms to record an application execution by capturing nondeterministic events to a log without imposing unnecessary timing and ordering constraints. It replays the log using a modified version of the application even in the presence of added, deleted, or modified operations that do not match events in the log. Dora searches for a replay that minimizes differences between the log and the replayed execution of the modified program. If there are no modifications, Dora provides deterministic replay of the unmodified program. We have implemented a Linux prototype which provides transparent mutable replay without recompiling or relinking applications. We show that Dora is useful for reproducing, diagnosing, and fixing software bugs in real-world applications, including Apache and MySQL. Our results show that Dora (1) captures bugs and replays them with applications modified or reconfigured to produce additional debugging output for root cause diagnosis, (2) captures exploits and replays them with patched applications to validate that the patches successfully eliminate vulnerabilities, (3) records production workloads and replays them with patched applications to validate patches with realistic workloads, and (4) maintains low recording overhead on commodity multicore hardware, making it suitable for production systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Sahoo:2013:ULI, author = "Swarup Kumar Sahoo and John Criswell and Chase Geigle and Vikram Adve", title = "Using likely invariants for automated software fault localization", journal = j-SIGPLAN, volume = "48", number = "4", pages = "139--152", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451131", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose an automatic diagnosis technique for isolating the root cause(s) of software failures. We use likely program invariants, automatically generated using correct inputs that are close to the fault-triggering input, to select a set of candidate program locations which are possible root causes. We then trim the set of candidate root causes using software-implemented dynamic backwards slicing, plus two new filtering heuristics: dependence filtering, and filtering via multiple failing inputs that are also close to the failing input. Experimental results on reported software bugs of three large open-source servers show that we are able to narrow down the number of candidate bug locations to between 5 and 17 program expressions, even in programs that are hundreds of thousands of lines long.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Paulos:2013:REA, author = "Eric Paulos", title = "The rise of the expert amateur: {DIY} culture and the evolution of computer science", journal = j-SIGPLAN, volume = "48", number = "4", pages = "153--154", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451133", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We are at an important technological inflection point. Most of our computing systems have been designed and built by professionally trained experts (i.e. us --- computer scientists, engineers, and designers) for use in specific domains and to solve explicit prob-lems. Artifacts often called ``user manuals'' traditionally prescribed the appropriate usage of these tools and implied an acceptable etiquette for interaction and experience. A fringe group of individuals usually labeled ``hackers'' or ``amateurs'' or ``makers'' have challenged this producer-consumer model of technology by creating novel hardware and software features to ``improve'' our research and products while a similar creative group of technicians called ``artists'' have redirected the techniques, tools, and tenets of accepted technological usage away from their typical manifestations in practicality and product. Over time the technological artifacts of these fringe groups and the support for their rhetoric have gained them a foothold into computing culture and eroded the established power discontinuities within the practice of computing research. We now expect our computing tools to be driven by an architecture of open participation and democracy that encourages users to add value to their tools and applications as they use them. Similarly, the bar for enabling the design of novel, personal computing systems and ``hardware remixes'' has fallen to the point where many non-experts and novices are readily embracing and creating fascinating and ingenious computing artifacts outside of our official and traditionally sanctioned academic and industrial research communities. But how have we as ``expert'' practitioners been influencing this discussion? By constructing a practice around the design and development of technology for task based and problem solving applications, we have unintentionally established such work as the status quo for the human computing experience. We have failed in our duty to open up alternate forums for technology to express itself and touch our lives beyond productivity and efficiency. Blinded by our quest for ``smart technologies'' we have forgotten to contemplate the design of technologies to inspire us to be smarter, more curious, and more inquisitive. We owe it to ourselves to rethink the impact we desire to have on this historic moment in computing culture. We must choose to participate in and perhaps lead a dialogue that heralds an expansive new acceptable practice of designing to enable participation by experts and non-experts alike. We are in the milieu of the rise of the ``expert amateur''. We must change our mantra --- not just performance, completeness, and usability but openness, usefulness and relevancy to our world, its citizens, and our environment. This talk will explore elements of the DIY and maker culture and its relevancy to research questions across computational hardware, languages, and systems. Ultimately, this talk will outline and argue for expanding the design territory and potential opportunities for all of us to collaborate and benefit as a society from this cultural movement.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Raghavan:2013:CSH, author = "Arun Raghavan and Laurel Emurian and Lei Shao and Marios Papaefthymiou and Kevin P. Pipe and Thomas F. Wenisch and Milo M. K. Martin", title = "Computational sprinting on a hardware\slash software testbed", journal = j-SIGPLAN, volume = "48", number = "4", pages = "155--166", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451135", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "CMOS scaling trends have led to an inflection point where thermal constraints (especially in mobile devices that employ only passive cooling) preclude sustained operation of all transistors on a chip --- a phenomenon called ``dark silicon.'' Recent research proposed computational sprinting --- exceeding sustainable thermal limits for short intervals --- to improve responsiveness in light of the bursty computation demands of many media-rich interactive mobile applications. Computational sprinting improves responsiveness by activating reserve cores (parallel sprinting) and/or boosting frequency/voltage (frequency sprinting) to power levels that far exceed the system's sustainable cooling capabilities, relying on thermal capacitance to buffer heat. Prior work analyzed the feasibility of sprinting through modeling and simulation. In this work, we investigate sprinting using a hardware/software testbed. First, we study unabridged sprints, wherein the computation completes before temperature becomes critical, demonstrating a 6.3x responsiveness gain, and a 6\% energy efficiency improvement by racing to idle. We then analyze truncated sprints, wherein our software runtime system must intervene to prevent overheating by throttling parallelism and frequency before the computation is complete. To avoid oversubscription penalties (context switching inefficiencies after a truncated parallel sprint), we develop a sprint-aware task-based parallel runtime. We find that maximal-intensity sprinting is not always best, introduce the concept of sprint pacing, and evaluate an adaptive policy for selecting sprint intensity. We report initial results using a phase change heat sink to extend maximum sprint duration. Finally, we demonstrate that a sprint-and-rest operating regime can actually outperform thermally-limited sustained execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Ahn:2013:DAS, author = "Wonsun Ahn and Yuelu Duan and Josep Torrellas", title = "{DeAliaser}: alias speculation using atomic region support", journal = j-SIGPLAN, volume = "48", number = "4", pages = "167--180", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451136", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Alias analysis is a critical component in many compiler optimizations. A promising approach to reduce the complexity of alias analysis is to use speculation. The approach consists of performing optimizations assuming the alias relationships that are true most of the time, and repairing the code when such relationships are found not to hold through runtime checks. This paper proposes a general alias speculation scheme that leverages upcoming hardware support for transactions with the help of some ISA extensions. The ability of transactions to checkpoint and roll back frees the compiler to pursue aggressive optimizations without having to worry about recovery code. Also, exposing the memory conflict detection hardware in transactions to software allows runtime checking of aliases with little or no overhead. We test the potential of the novel alias speculation approach with Loop Invariant Code Motion (LICM), Global Value Numbering (GVN), and Partial Redundancy Elimination (PRE) optimization passes. On average, they are shown to reduce program execution time by 9\% in SPEC FP2006 applications and 3\% in SPEC INT2006 applications over the alias analysis of a state-of-the-art compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Park:2013:RCH, author = "Heekwon Park and Seungjae Baek and Jongmoo Choi and Donghee Lee and Sam H. Noh", title = "Regularities considered harmful: forcing randomness to memory accesses to reduce row buffer conflicts for multi-core, multi-bank systems", journal = j-SIGPLAN, volume = "48", number = "4", pages = "181--192", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451137", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a novel kernel-level memory allocator, called M$^3$ (M-cube, Multi-core Multi-bank Memory allocator), that has the following two features. First, it introduces and makes use of a notion of a memory container, which is defined as a unit of memory that comprises the minimum number of page frames that can cover all the banks of the memory organization, by exclusively assigning a container to a core so that each core achieves bank parallelism as much as possible. Second, it orchestrates page frame allocation so that pages that threads access are dispersed randomly across multiple banks so that each thread's access pattern is randomized. The development of M$^3$ is based on a tool that we develop to fully understand the architectural characteristics of the underlying memory organization. Using an extension of this tool, we observe that the same application that accesses pages in a random manner outperforms one that accesses pages in a regular pattern such as sequential or same ordered accesses. This is because such randomized accesses reduces inter-thread access interference on the row-buffer in memory banks. We implement M$^3$ in the Linux kernel version 2.6.32 on the Intel Xeon system that has 16 cores and 32GB DRAM. Performance evaluation with various workloads show that M$^3$ improves the overall performance for memory intensive benchmarks by up to 85\% with an average of about 40\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Honarmand:2013:CUA, author = "Nima Honarmand and Nathan Dautenhahn and Josep Torrellas and Samuel T. King and Gilles Pokam and Cristiano Pereira", title = "{Cyrus}: unintrusive application-level record-replay for replay parallelism", journal = j-SIGPLAN, volume = "48", number = "4", pages = "193--206", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451138", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Architectures for deterministic record-replay (R\&R) of multithreaded code are attractive for program debugging, intrusion analysis, and fault-tolerance uses. However, very few of the proposed designs have focused on maximizing replay speed --- a key enabling property of these systems. The few efforts that focus on replay speed require intrusive hardware or software modifications, or target whole-system R\&R rather than the more useful application-level R\&R. This paper presents the first hardware-based scheme for unintrusive, application-level R\&R that explicitly targets high replay speed. Our scheme, called Cyrus, requires no modification to commodity snoopy cache coherence. It introduces the concept of an on-the-fly software Backend Pass during recording which, as the log is being generated, transforms it for high replay parallelism. This pass also fixes-up the log, and can flexibly trade-off replay parallelism for log size. We analyze the performance of Cyrus using full system (OS plus hardware) simulation. Our results show that Cyrus has negligible recording overhead. In addition, for 8-processor runs of SPLASH-2, Cyrus attains an average replay parallelism of 5, and a replay speed that is, on average, only about 50\% lower than the recording speed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{deOliveira:2013:WYS, author = "Augusto Born de Oliveira and Sebastian Fischmeister and Amer Diwan and Matthias Hauswirth and Peter F. Sweeney", title = "Why you should care about quantile regression", journal = j-SIGPLAN, volume = "48", number = "4", pages = "207--218", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451140", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Research has shown that correctly conducting and analysing computer performance experiments is difficult. This paper investigates what is necessary to conduct successful computer performance evaluation by attempting to repeat a prior experiment: the comparison between two Linux schedulers. In our efforts, we found that exploring an experimental space through a series of incremental experiments can be inconclusive, and there may be no indication of how much experimentation will be enough. Analysis of variance (ANOVA), a traditional analysis method, is able to partly solve the problems with the previous approach, but we demonstrate that ANOVA can be insufficient for proper analysis due to the requirements it imposes on the data. Finally, we demonstrate the successful application of quantile regression, a recent development in statistics, to computer performance experiments. Quantile regression can provide more insight into the experiment than ANOVA, with the additional benefit of being applicable to data from any distribution. This property makes it especially useful in our field, since non-normally distributed data is common in computer experiments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Curtsinger:2013:SSS, author = "Charlie Curtsinger and Emery D. Berger", title = "{STABILIZER}: statistically sound performance evaluation", journal = j-SIGPLAN, volume = "48", number = "4", pages = "219--228", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451141", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Researchers and software developers require effective performance evaluation. Researchers must evaluate optimizations or measure overhead. Software developers use automatic performance regression tests to discover when changes improve or degrade performance. The standard methodology is to compare execution times before and after applying changes. Unfortunately, modern architectural features make this approach unsound. Statistically sound evaluation requires multiple samples to test whether one can or cannot (with high confidence) reject the null hypothesis that results are the same before and after. However, caches and branch predictors make performance dependent on machine-specific parameters and the exact layout of code, stack frames, and heap objects. A single binary constitutes just one sample from the space of program layouts, regardless of the number of runs. Since compiler optimizations and code changes also alter layout, it is currently impossible to distinguish the impact of an optimization from that of its layout effects. This paper presents Stabilizer, a system that enables the use of the powerful statistical techniques required for sound performance evaluation on modern architectures. Stabilizer forces executions to sample the space of memory configurations by repeatedly re-randomizing layouts of code, stack, and heap objects at runtime. Stabilizer thus makes it possible to control for layout effects. Re-randomization also ensures that layout effects follow a Gaussian distribution, enabling the use of statistical tests like ANOVA. We demonstrate Stabilizer's efficiency ($< 7\%$ median overhead) and its effectiveness by evaluating the impact of LLVM's optimizations on the SPEC CPU2006 benchmark suite. We find that, while -O2 has a significant impact relative to -O1, the performance impact of -O3 over -O2 optimizations is indistinguishable from random noise.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Gidra:2013:SSS, author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and Marc Shapiro", title = "A study of the scalability of stop-the-world garbage collectors on multicores", journal = j-SIGPLAN, volume = "48", number = "4", pages = "229--240", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451142", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale multicore architectures create new challenges for garbage collectors (GCs). In particular, throughput-oriented stop-the-world algorithms demonstrate good performance with a small number of cores, but have been shown to degrade badly beyond approximately 8 cores on a 48-core with OpenJDK 7. This negative result raises the question whether the stop-the-world design has intrinsic limitations that would require a radically different approach. Our study suggests that the answer is no, and that there is no compelling scalability reason to discard the existing highly-optimised throughput-oriented GC code on contemporary hardware. This paper studies the default throughput-oriented garbage collector of OpenJDK 7, called Parallel Scavenge. We identify its bottlenecks, and show how to eliminate them using well-established parallel programming techniques. On the SPECjbb2005, SPECjvm2008 and DaCapo 9.12 benchmarks, the improved GC matches the performance of Parallel Scavenge at low core count, but scales well, up to 48 cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{McFarlin:2013:DDO, author = "Daniel S. McFarlin and Charles Tucker and Craig Zilles", title = "Discerning the dominant out-of-order performance advantage: is it speculation or dynamism?", journal = j-SIGPLAN, volume = "48", number = "4", pages = "241--252", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451143", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we set out to study the performance advantages of an Out-of-Order (OOO) processor relative to in-order processors with similar execution resources. In particular, we try to tease apart the performance contributions from two sources: the improved sched- ules enabled by OOO hardware speculation support and its ability to generate different schedules on different occurrences of the same instructions based on operand and functional unit availability. We find that the ability to express good static schedules achieves the bulk of the speedup resulting from OOO. Specifically, of the 53\% speedup achieved by OOO relative to a similarly provisioned in- order machine, we find that 88\% of that speedup can be achieved by using a single ``best'' static schedule as suggested by observing an OOO schedule of the code. We discuss the ISA mechanisms that would be required to express these static schedules. Furthermore, we find that the benefits of dynamism largely come from two kinds of events that influence the application's critical path: load instructions that miss in the cache only part of the time and branch mispredictions. We find that much of the benefit of OOO dynamism can be achieved by the potentially simpler task of addressing these two behaviors directly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Checkoway:2013:IAW, author = "Stephen Checkoway and Hovav Shacham", title = "{Iago} attacks: why the system call {API} is a bad untrusted {RPC} interface", journal = j-SIGPLAN, volume = "48", number = "4", pages = "253--264", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451145", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In recent years, researchers have proposed systems for running trusted code on an untrusted operating system. Protection mechanisms deployed by such systems keep a malicious kernel from directly manipulating a trusted application's state. Under such systems, the application and kernel are, conceptually, peers, and the system call API defines an RPC interface between them. We introduce Iago attacks, attacks that a malicious kernel can mount in this model. We show how a carefully chosen sequence of integer return values to Linux system calls can lead a supposedly protected process to act against its interests, and even to undertake arbitrary computation at the malicious kernel's behest. Iago attacks are evidence that protecting applications from malicious kernels is more difficult than previously realized.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Hofmann:2013:ISA, author = "Owen S. Hofmann and Sangman Kim and Alan M. Dunn and Michael Z. Lee and Emmett Witchel", title = "{InkTag}: secure applications on an untrusted operating system", journal = j-SIGPLAN, volume = "48", number = "4", pages = "265--278", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451146", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "InkTag is a virtualization-based architecture that gives strong safety guarantees to high-assurance processes even in the presence of a malicious operating system. InkTag advances the state of the art in untrusted operating systems in both the design of its hypervisor and in the ability to run useful applications without trusting the operating system. We introduce paraverification, a technique that simplifies the InkTag hypervisor by forcing the untrusted operating system to participate in its own verification. Attribute-based access control allows trusted applications to create decentralized access control policies. InkTag is also the first system of its kind to ensure consistency between secure data and metadata, ensuring recoverability in the face of system crashes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Giuffrida:2013:SAL, author = "Cristiano Giuffrida and Anton Kuijsten and Andrew S. Tanenbaum", title = "Safe and automatic live update for operating systems", journal = j-SIGPLAN, volume = "48", number = "4", pages = "279--292", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451147", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Increasingly many systems have to run all the time with no downtime allowed. Consider, for example, systems controlling electric power plants and e-banking servers. Nevertheless, security patches and a constant stream of new operating system versions need to be deployed without stopping running programs. These factors naturally lead to a pressing demand for live update---upgrading all or parts of the operating system without rebooting. Unfortunately, existing solutions require significant manual intervention and thus work reliably only for small operating system patches. In this paper, we describe an automated system for live update that can safely and automatically handle major upgrades without rebooting. We have implemented our ideas in Proteos, a new research OS designed with live update in mind. Proteos relies on system support and nonintrusive instrumentation to handle even very complex updates with minimal manual effort. The key novelty is the idea of state quiescence, which allows updates to happen only in safe and predictable system states. A second novelty is the ability to automatically perform transactional live updates at the process level, ensuring a safe and stable update process. Unlike prior solutions, Proteos supports automated state transfer, state checking, and hot rollback. We have evaluated Proteos on 50 real updates and on novel live update scenarios. The results show that our techniques can effectively support both simple and complex updates, while outperforming prior solutions in terms of flexibility, security, reliability, and stability of the update process.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Mai:2013:VSI, author = "Haohui Mai and Edgar Pek and Hui Xue and Samuel Talmadge King and Parthasarathy Madhusudan", title = "Verifying security invariants in {ExpressOS}", journal = j-SIGPLAN, volume = "48", number = "4", pages = "293--304", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451148", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Security for applications running on mobile devices is important. In this paper we present ExpressOS, a new OS for enabling high-assurance applications to run on commodity mobile devices securely. Our main contributions are a new OS architecture and our use of formal methods for proving key security invariants about our implementation. In our use of formal methods, we focus solely on proving that our OS implements our security invariants correctly, rather than striving for full functional correctness, requiring significantly less verification effort while still proving the security relevant aspects of our system. We built ExpressOS, analyzed its security, and tested its performance. Our evaluation shows that the performance of ExpressOS is comparable to an Android-based system. In one test, we ran the same web browser on ExpressOS and on an Android-based system, and found that ExpressOS adds 16\% overhead on average to the page load latency time for nine popular web sites.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Schkufza:2013:SS, author = "Eric Schkufza and Rahul Sharma and Alex Aiken", title = "Stochastic superoptimization", journal = j-SIGPLAN, volume = "48", number = "4", pages = "305--316", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451150", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We formulate the loop-free binary superoptimization task as a stochastic search problem. The competing constraints of transformation correctness and performance improvement are encoded as terms in a cost function, and a Markov Chain Monte Carlo sampler is used to rapidly explore the space of all possible programs to find one that is an optimization of a given target program. Although our method sacrifices completeness, the scope of programs we are able to consider, and the resulting quality of the programs that we produce, far exceed those of existing superoptimizers. Beginning from binaries compiled by llvm -O0 for 64-bit x86, our prototype implementation, STOKE, is able to produce programs which either match or outperform the code produced by gcc -O3, icc -O3, and in some cases, expert handwritten assembly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Schulte:2013:ARB, author = "Eric Schulte and Jonathan DiLorenzo and Westley Weimer and Stephanie Forrest", title = "Automated repair of binary and assembly programs for cooperating embedded devices", journal = j-SIGPLAN, volume = "48", number = "4", pages = "317--328", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451151", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a method for automatically repairing arbitrary software defects in embedded systems, which have limited memory, disk and CPU capacities, but exist in great numbers. We extend evolutionary computation (EC) algorithms that search for valid repairs at the source code level to assembly and ELF format binaries, compensating for limited system resources with several algorithmic innovations. Our method does not require access to the source code or build toolchain of the software under repair, does not require program instrumentation, specialized execution environments, or virtual machines, or prior knowledge of the bug type. We repair defects in ARM and x86 assembly as well as ELF binaries, observing decreases of 86\% in memory and 95\% in disk requirements, with 62\% decrease in repair time, compared to similar source-level techniques. These advances allow repairs previously possible only with C source code to be applied to any ARM or x86 assembly or ELF executable. Efficiency gains are achieved by introducing stochastic fault localization, with much lower overhead than comparable deterministic methods, and low-level program representations. When distributed over multiple devices, our algorithm finds repairs faster than predicted by naive parallelism. Four devices using our approach are five times more efficient than a single device because of our collaboration model. The algorithm is implemented on Nokia N900 smartphones, with inter-phone communication fitting in 900 bytes sent in 7 SMS text messages per device per repair on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Cui:2013:VSR, author = "Heming Cui and Gang Hu and Jingyue Wu and Junfeng Yang", title = "Verifying systems rules using rule-directed symbolic execution", journal = j-SIGPLAN, volume = "48", number = "4", pages = "329--342", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451152", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Systems code must obey many rules, such as ``opened files must be closed.'' One approach to verifying rules is static analysis, but this technique cannot infer precise runtime effects of code, often emitting many false positives. An alternative is symbolic execution, a technique that verifies program paths over all inputs up to a bounded size. However, when applied to verify rules, existing symbolic execution systems often blindly explore many redundant program paths while missing relevant ones that may contain bugs. Our key insight is that only a small portion of paths are relevant to rules, and the rest (majority) of paths are irrelevant and do not need to be verified. Based on this insight, we create WOODPECKER, a new symbolic execution system for effectively checking rules on systems programs. It provides a set of builtin checkers for common rules, and an interface for users to easily check new rules. It directs symbolic execution toward the program paths relevant to a checked rule, and soundly prunes redundant paths, exponentially speeding up symbolic execution. It is designed to be heuristic-agnostic, enabling users to leverage existing powerful search heuristics. Evaluation on 136 systems programs totaling 545K lines of code, including some of the most widely used programs, shows that, with a time limit of typically just one hour for each verification run, WOODPECKER effectively verifies 28.7\% of the program and rule combinations over bounded input, whereas an existing symbolic execution system KLEE verifies only 8.5\%. For the remaining combinations, WOODPECKER verifies 4.6 times as many relevant paths as KLEE. With a longer time limit, WOODPECKER verifies much more paths than KLEE, e.g., 17 times as many with a fourhour limit. WOODPECKER detects 113 rule violations, including 10 serious data loss errors with 2 most serious ones already confirmed by the corresponding developers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Xiang:2013:HHO, author = "Xiaoya Xiang and Chen Ding and Hao Luo and Bin Bao", title = "{HOTL}: a higher order theory of locality", journal = j-SIGPLAN, volume = "48", number = "4", pages = "343--356", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451153", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The locality metrics are many, for example, miss ratio to test performance, data footprint to manage cache sharing, and reuse distance to analyze and optimize a program. It is unclear how different metrics are related, whether one subsumes another, and what combination may represent locality completely. This paper first derives a set of formulas to convert between five locality metrics and gives the condition for correctness. The transformation is analogous to differentiation and integration used to convert between higher order polynomials. As a result, these metrics can be assigned an order and organized into a hierarchy. Using the new theory, the paper then develops two techniques: one measures the locality in real time without special hardware support, and the other predicts multicore cache interference without parallel testing. The paper evaluates them using sequential and parallel programs as well as for a parallel mix of sequential programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Kang:2013:HPP, author = "Hui Kang and Jennifer L. Wong", title = "To hardware prefetch or not to prefetch?: a virtualized environment study and core binding approach", journal = j-SIGPLAN, volume = "48", number = "4", pages = "357--368", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451155", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most hardware and software vendors suggest disabling hardware prefetching in virtualized environments. They claim that prefetching is detrimental to application performance due to inaccurate prediction caused by workload diversity and VM interference on shared cache. However, no comprehensive or quantitative measurements to support this belief have been performed. This paper is the first to systematically measure the influence of hardware prefetching in virtualized environments. We examine a wide variety of benchmarks on three types of chip-multiprocessors (CMPs) to analyze the hardware prefetching performance. We conduct extensive experiments by taking into account a number of important virtualization factors. We find that hardware prefetching has minimal destructive influence under most configurations. Only with certain application combinations does prefetching influence the overall performance. To leverage these findings and make hardware prefetching effective across a diversity of virtualized environments, we propose a dynamic prefetching-aware VCPU-core binding approach (PAVCB), which includes two phases --- classifying and binding. The workload of each VM is classified into different cache sharing constraint categories based upon its cache access characteristics, considering both prefetch requests and demand requests. Then following heuristic rules, the VCPUs of each VM are scheduled onto appropriate cores subject to cache sharing constraints. We show that the proposed approach can improve performance by 12\% on average over the default scheduler and 46\% over manual system administrator bindings across different workload combinations in the presence of hardware prefetching.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Kim:2013:DBC, author = "Hwanju Kim and Sangwook Kim and Jinkyu Jeong and Joonwon Lee and Seungryoul Maeng", title = "Demand-based coordinated scheduling for {SMP VMs}", journal = j-SIGPLAN, volume = "48", number = "4", pages = "369--380", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451156", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As processor architectures have been enhancing their computing capacity by increasing core counts, independent workloads can be consolidated on a single node for the sake of high resource efficiency in data centers. With the prevalence of virtualization technology, each individual workload can be hosted on a virtual machine for strong isolation between co-located workloads. Along with this trend, hosted applications have increasingly been multithreaded to take advantage of improved hardware parallelism. Although the performance of many multithreaded applications highly depends on communication (or synchronization) latency, existing schemes of virtual machine scheduling do not explicitly coordinate virtual CPUs based on their communication behaviors. This paper presents a demand-based coordinated scheduling scheme for consolidated virtual machines that host multithreaded workloads. To this end, we propose communication-driven scheduling that controls time-sharing in response to inter-processor interrupts (IPIs) between virtual CPUs. On the basis of in-depth analysis on the relationship between IPI communications and coordination demands, we devise IPI-driven coscheduling and delayed preemption schemes, which effectively reduce synchronization latency and unnecessary CPU consumption. In addition, we introduce a load-conscious CPU allocation policy in order to address load imbalance in heterogeneously consolidated environments. The proposed schemes are evaluated with respect to various scenarios of mixed workloads using the PARSEC multithreaded applications. In the evaluation, our scheme improves the overall performance of consolidated workloads, especially communication-intensive applications, by reducing inefficient synchronization latency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Dashti:2013:TMH, author = "Mohammad Dashti and Alexandra Fedorova and Justin Funston and Fabien Gaud and Renaud Lachaize and Baptiste Lepers and Vivien Quema and Mark Roth", title = "Traffic management: a holistic approach to memory placement on {NUMA} systems", journal = j-SIGPLAN, volume = "48", number = "4", pages = "381--394", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451157", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "NUMA systems are characterized by Non-Uniform Memory Access times, where accessing data in a remote node takes longer than a local access. NUMA hardware has been built since the late 80's, and the operating systems designed for it were optimized for access locality. They co-located memory pages with the threads that accessed them, so as to avoid the cost of remote accesses. Contrary to older systems, modern NUMA hardware has much smaller remote wire delays, and so remote access costs per se are not the main concern for performance, as we discovered in this work. Instead, congestion on memory controllers and interconnects, caused by memory traffic from data-intensive applications, hurts performance a lot more. Because of that, memory placement algorithms must be redesigned to target traffic congestion. This requires an arsenal of techniques that go beyond optimizing locality. In this paper we describe Carrefour, an algorithm that addresses this goal. We implemented Carrefour in Linux and obtained performance improvements of up to 3.6 relative to the default kernel, as well as significant improvements compared to NUMA-aware patch sets available for Linux. Carrefour never hurts performance by more than 4\% when memory placement cannot be improved. We present the design of Carrefour, the challenges of implementing it on modern hardware, and draw insights about hardware support that would help optimize system software on future NUMA systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Jog:2013:OCT, author = "Adwait Jog and Onur Kayiran and Nachiappan Chidambaram Nachiappan and Asit K. Mishra and Mahmut T. Kandemir and Onur Mutlu and Ravishankar Iyer and Chita R. Das", title = "{OWL}: cooperative thread array aware scheduling techniques for improving {GPGPU} performance", journal = j-SIGPLAN, volume = "48", number = "4", pages = "395--406", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging GPGPU architectures, along with programming models like CUDA and OpenCL, offer a cost-effective platform for many applications by providing high thread level parallelism at lower energy budgets. Unfortunately, for many general-purpose applications, available hardware resources of a GPGPU are not efficiently utilized, leading to lost opportunity in improving performance. A major cause of this is the inefficiency of current warp scheduling policies in tolerating long memory latencies. In this paper, we identify that the scheduling decisions made by such policies are agnostic to thread-block, or cooperative thread array (CTA), behavior, and as a result inefficient. We present a coordinated CTA-aware scheduling policy that utilizes four schemes to minimize the impact of long memory latencies. The first two schemes, CTA-aware two-level warp scheduling and locality aware warp scheduling, enhance per-core performance by effectively reducing cache contention and improving latency hiding capability. The third scheme, bank-level parallelism aware warp scheduling, improves overall GPGPU performance by enhancing DRAM bank-level parallelism. The fourth scheme employs opportunistic memory-side prefetching to further enhance performance by taking advantage of open DRAM rows. Evaluations on a 28-core GPGPU platform with highly memory-intensive applications indicate that our proposed mechanism can provide 33\% average performance improvement compared to the commonly-employed round-robin warp scheduling policy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Pai:2013:IGC, author = "Sreepathi Pai and Matthew J. Thazhuthaveetil and R. Govindarajan", title = "Improving {GPGPU} concurrency with elastic kernels", journal = j-SIGPLAN, volume = "48", number = "4", pages = "407--418", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Each new generation of GPUs vastly increases the resources available to GPGPU programs. GPU programming models (like CUDA) were designed to scale to use these resources. However, we find that CUDA programs actually do not scale to utilize all available resources, with over 30\% of resources going unused on average for programs of the Parboil2 suite that we used in our work. Current GPUs therefore allow concurrent execution of kernels to improve utilization. In this work, we study concurrent execution of GPU kernels using multiprogram workloads on current NVIDIA Fermi GPUs. On two-program workloads from the Parboil2 benchmark suite we find concurrent execution is often no better than serialized execution. We identify that the lack of control over resource allocation to kernels is a major serialization bottleneck. We propose transformations that convert CUDA kernels into elastic kernels which permit fine-grained control over their resource usage. We then propose several elastic-kernel aware concurrency policies that offer significantly better performance and concurrency compared to the current CUDA policy. We evaluate our proposals on real hardware using multiprogrammed workloads constructed from benchmarks in the Parboil 2 suite. On average, our proposals increase system throughput (STP) by 1.21x and improve the average normalized turnaround time (ANTT) by 3.73x for two-program workloads when compared to the current CUDA concurrency implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Oh:2013:PAL, author = "Taewook Oh and Hanjun Kim and Nick P. Johnson and Jae W. Lee and David I. August", title = "Practical automatic loop specialization", journal = j-SIGPLAN, volume = "48", number = "4", pages = "419--430", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451161", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program specialization optimizes a program with respect to program invariants, including known, fixed inputs. These invariants can be used to enable optimizations that are otherwise unsound. In many applications, a program input induces predictable patterns of values across loop iterations, yet existing specializers cannot fully capitalize on this opportunity. To address this limitation, we present Invariant-induced Pattern based Loop Specialization (IPLS), the first fully-automatic specialization technique designed for everyday use on real applications. Using dynamic information-flow tracking, IPLS profiles the values of instructions that depend solely on invariants and recognizes repeating patterns across multiple iterations of hot loops. IPLS then specializes these loops, using those patterns to predict values across a large window of loop iterations. This enables aggressive optimization of the loop; conceptually, this optimization reconstructs recurring patterns induced by the input as concrete loops in the specialized binary. IPLS specializes real-world programs that prior techniques fail to specialize without requiring hints from the user. Experiments demonstrate a geomean speedup of 14.1\% with a maximum speedup of 138\% over the original codes when evaluated on three script interpreters and eleven scripts each.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Phothilimthana:2013:PPH, author = "Phitchaya Mangpo Phothilimthana and Jason Ansel and Jonathan Ragan-Kelley and Saman Amarasinghe", title = "Portable performance on heterogeneous architectures", journal = j-SIGPLAN, volume = "48", number = "4", pages = "431--444", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451162", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Trends in both consumer and high performance computing are bringing not only more cores, but also increased heterogeneity among the computational resources within a single machine. In many machines, one of the greatest computational resources is now their graphics coprocessors (GPUs), not just their primary CPUs. But GPU programming and memory models differ dramatically from conventional CPUs, and the relative performance characteristics of the different processors vary widely between machines. Different processors within a system often perform best with different algorithms and memory usage patterns, and achieving the best overall performance may require mapping portions of programs across all types of resources in the machine. To address the problem of efficiently programming machines with increasingly heterogeneous computational resources, we propose a programming model in which the best mapping of programs to processors and memories is determined empirically. Programs define choices in how their individual algorithms may work, and the compiler generates further choices in how they can map to CPU and GPU processors and memory systems. These choices are given to an empirical autotuning framework that allows the space of possible implementations to be searched at installation time. The rich choice space allows the autotuner to construct poly-algorithms that combine many different algorithmic techniques, using both the CPU and the GPU, to obtain better performance than any one technique alone. Experimental results show that algorithmic changes, and the varied use of both CPUs and GPUs, are necessary to obtain up to a 16.5x speedup over using a single program configuration for all architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Mittal:2013:EVE, author = "Aashish Mittal and Dushyant Bansal and Sorav Bansal and Varun Sethi", title = "Efficient virtualization on embedded {Power Architecture\reg} platforms", journal = j-SIGPLAN, volume = "48", number = "4", pages = "445--458", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451163", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Power Architecture\reg{} processors are popular and widespread on embedded systems, and such platforms are increasingly being used to run virtual machines. While the Power Architecture meets the Popek-and-Goldberg virtualization requirements for traditional trap-and-emulate style virtualization, the performance overhead of virtualization remains high. For example, workloads exhibiting a large amount of kernel activity typically show 3-5x slowdowns over bare-metal. Recent additions to the Linux kernel contain guest and host side paravirtual extensions for Power Architecture platforms. While these extensions improve performance significantly, they are guest-specific, guest-intrusive, and cover only a subset of all possible virtualization optimizations. We present a set of host-side optimizations that achieve comparable performance to the aforementioned paravirtual extensions, on an unmodified guest. Our optimizations are based on adaptive in-place binary translation. Unlike the paravirtual approach, our solution is guest neutral. We implement our ideas in a prototype based on Qemu/KVM. After our modifications, KVM can boot an unmodified Linux guest around 2.5x faster. We contrast our optimization approach with previous similar binary translation based approaches for the x86 architecture; in our experience, each architecture presents a unique set of challenges and optimization opportunities.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Hill:2013:RDC, author = "Mark D. Hill", title = "Research directions for {21st Century} computer systems: {ASPLOS 2013} panel", journal = j-SIGPLAN, volume = "48", number = "4", pages = "459--460", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451165", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Four recent efforts call out architectural challenges and opportunities up and down the software/hardware stack. This panel will discuss, ``What should the community do to facilitate, transcend, or refute these partially overlapping visions?'' The panel is chaired by Mark D. Hill with other panel members not finalized for the ASPLOS'13 proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Madhavapeddy:2013:ULO, author = "Anil Madhavapeddy and Richard Mortier and Charalampos Rotsos and David Scott and Balraj Singh and Thomas Gazagnaire and Steven Smith and Steven Hand and Jon Crowcroft", title = "Unikernels: library operating systems for the cloud", journal = j-SIGPLAN, volume = "48", number = "4", pages = "461--472", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451167", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present unikernels, a new approach to deploying cloud services via applications written in high-level source code. Unikernels are single-purpose appliances that are compile-time specialised into standalone kernels, and sealed against modification when deployed to a cloud platform. In return they offer significant reduction in image sizes, improved efficiency and security, and should reduce operational costs. Our Mirage prototype compiles OCaml code into unikernels that run on commodity clouds and offer an order of magnitude reduction in code size without significant performance penalty. The architecture combines static type-safety with a single address-space layout that can be made immutable via a hypervisor extension. Mirage contributes a suite of type-safe protocol libraries, and our results demonstrate that the hypervisor is a platform that overcomes the hardware compatibility issues that have made past library operating systems impractical to deploy in the real-world.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Kadav:2013:FGF, author = "Asim Kadav and Matthew J. Renzelmann and Michael M. Swift", title = "Fine-grained fault tolerance using device checkpoints", journal = j-SIGPLAN, volume = "48", number = "4", pages = "473--484", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451168", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recovering faults in drivers is difficult compared to other code because their state is spread across both memory and a device. Existing driver fault-tolerance mechanisms either restart the driver and discard its state, which can break applications, or require an extensive logging mechanism to replay requests and recreate driver state. Even logging may be insufficient, though, if the semantics of requests are ambiguous. In addition, these systems either require large subsystems that must be kept up-to-date as the kernel changes, or require substantial rewriting of drivers. We present a new driver fault-tolerance mechanism that provides fine-grained control over the code protected. Fine-Grained Fault Tolerance (FGFT) isolates driver code at the granularity of a single entry point. It executes driver code as a transaction, allowing roll back if the driver fails. We develop a novel checkpointing mechanism to save and restore device state using existing power management code. Unlike past systems, FGFT can be incrementally deployed in a single driver without the need for a large kernel subsystem, but at the cost of small modifications to the driver. In the evaluation, we show that FGFT can have almost zero runtime cost in many cases, and that checkpoint-based recovery can reduce the duration of a failure by 79\% compared to restarting the driver. Finally, we show that applying FGFT to a driver requires little effort, and the majority of drivers in common classes already contain the power-management code needed for checkpoint/restore.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Silberstein:2013:GIF, author = "Mark Silberstein and Bryan Ford and Idit Keidar and Emmett Witchel", title = "{GPUfs}: integrating a file system with {GPUs}", journal = j-SIGPLAN, volume = "48", number = "4", pages = "485--498", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451169", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "PU hardware is becoming increasingly general purpose, quickly outgrowing the traditional but constrained GPU-as-coprocessor programming model. To make GPUs easier to program and easier to integrate with existing systems, we propose making the host's file system directly accessible from GPU code. GPUfs provides a POSIX-like API for GPU programs, exploits GPU parallelism for efficiency, and optimizes GPU file access by extending the buffer cache into GPU memory. Our experiments, based on a set of real benchmarks adopted to use our file system, demonstrate the feasibility and benefits of our approach. For example, we demonstrate a simple self-contained GPU program which searches for a set of strings in the entire tree of Linux kernel source files over seven times faster than an eight-core CPU run.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Hunt:2013:DTN, author = "Nicholas Hunt and Tom Bergan and Luis Ceze and Steven D. Gribble", title = "{DDOS}: taming nondeterminism in distributed systems", journal = j-SIGPLAN, volume = "48", number = "4", pages = "499--508", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451170", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nondeterminism complicates the development and management of distributed systems, and arises from two main sources: the local behavior of each individual node as well as the behavior of the network connecting them. Taming nondeterminism effectively requires dealing with both sources. This paper proposes DDOS, a system that leverages prior work on deterministic multithreading to offer: (1) space-efficient record/replay of distributed systems; and (2) fully deterministic distributed behavior. Leveraging deterministic behavior at each node makes outgoing messages strictly a function of explicit inputs. This allows us to record the system by logging just message's arrival time, not the contents. Going further, we propose and implement an algorithm that makes all communication between nodes deterministic by scheduling communication onto a global logical timeline. We implement both algorithms in a system called DDOS and evaluate our system with parallel scientific applications, an HTTP/memcached system and a distributed microbenchmark with a high volume of peer-to-peer communication. Our results show up to two orders of magnitude reduction in log size of record/replay, and that distributed systems can be made deterministic with an order of magnitude of overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Wang:2013:TEH, author = "Cheng Wang and Youfeng Wu", title = "{TSO\_ATOMICITY}: efficient hardware primitive for {TSO}-preserving region optimizations", journal = j-SIGPLAN, volume = "48", number = "4", pages = "509--520", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451172", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program optimizations based on data dependences may not preserve the memory consistency in the programs. Previous works leverage a hardware ATOMICITY primitive to restrict the thread interleaving for preserving sequential consistency in region optimizations. However, ATOMICITY primitive is over restrictive on the thread interleaving for optimizing real-world applications developed with the popular Total-Store-Ordering (TSO) memory consistency, which is weaker than sequential consistency. In this paper, we present a novel hardware TSO\_ATOMICITY primitive, which has less restriction on the thread interleaving than ATOMICITY primitive to permit more efficient program execution than ATOMICITY primitive, but can still preserve TSO memory consistency in all region optimizations. Furthermore, TSO\_ATOMICITY primitive requires similar architecture support as ATOMICITY primitive and can be implemented with only slight change to the existing ATOMICITY primitive implementation. Our experimental results show that in a start-of-art dynamic binary optimization system on a large set of workloads, ATOMICITY primitive can only improve the performance by 4\% on average. TSO\_ATOMICITY primitive can reduce the overhead associated with ATOMICITY primitive and improve the performance by 12\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Jafri:2013:WGI, author = "Syed Ali Raza Jafri and Gwendolyn Voskuilen and T. N. Vijaykumar", title = "{Wait-n-GoTM}: improving {HTM} performance by serializing cyclic dependencies", journal = j-SIGPLAN, volume = "48", number = "4", pages = "521--534", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451173", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional memory (TM) has been proposed to alleviate some key programmability problems in chip multiprocessors. Most TMs optimistically allow concurrent transactions, detecting read-write or write-write conflicts. Upon conflicts, existing hardware TMs (HTMs) use one of three conflict-resolution policies: (1) always-abort, (2) always-wait for some conflicting transactions to complete, or (3) always-go past conflicts and resolve acyclic conflicts at commit or abort upon cyclic dependencies. While each policy has advantages, the policies degrade performance under contention by limiting concurrency (always-abort, always-wait) or incurring late aborts due to cyclic dependencies (always-go). Thus, while always-go avoids acyclic aborts, no policy avoids cyclic aborts. We propose Wait-n-GoTM (WnGTM) to increase concurrency while avoiding cyclic aborts. We observe that most cyclic dependencies are caused by threads interleaving multiple accesses to a few heavily-read-write-shared delinquent data cache blocks. These accesses occur in code sections called cycle inducer sections (CISTs). Accordingly, we propose Wait-n-Go (WnG) conflict-resolution to avoid many cyclic aborts by predicting and serializing the CISTs. To support the WnG policy, we extend previous HTMs to (1) allow multiple readers and writers, (2) scalably identify dependencies, and (3) detect cyclic dependencies via new mechanisms, namely, conflict transactional state, order-capture, and hardware timestamps, respectively. In 16-core simulations of STAMP, WnGTM achieves average speedups of 46\% for higher-contention benchmarks and 28\% for all benchmarks over always-abort (TokenTM) with low-contention benchmarks remaining unchanged, compared to always-go (DATM) and always-wait (LogTM-SE), which perform worse than and 6\% better than TokenTM, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Qian:2013:VSP, author = "Xuehai Qian and Josep Torrellas and Benjamin Sahelices and Depei Qian", title = "{Volition}: scalable and precise sequential consistency violation detection", journal = j-SIGPLAN, volume = "48", number = "4", pages = "535--548", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451174", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sequential Consistency (SC) is the most intuitive memory model, and SC Violations (SCVs) produce unintuitive, typically incorrect executions. Most prior SCV detection schemes have used data races as proxies for SCVs, which is highly imprecise. Other schemes that have targeted data-race cycles are either too conservative or are designed only for two-processor cycles and snoopy-based systems. This paper presents Volition, the first hardware scheme that detects SCVs in a relaxed-consistency machine precisely, in a scalable manner, and for an arbitrary number of processors in the cycle. Volition leverages cache coherence protocol transactions to dynamically detect cycles in memory-access orders across threads. When a cycle is about to occur, an exception is triggered. Volition can be used in both directory- and snoopy-based coherence protocols. Our simulations of Volition in a 64-processor multicore with directory-based coherence running SPLASH-2 and Parsec programs shows that Volition induces negligible traffic and execution overhead. In addition, it can detect SCVs with several processors. Volition is suitable for on-the-fly use.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Grossman:2013:HSF, author = "J. P. Grossman and Jeffrey S. Kuskin and Joseph A. Bank and Michael Theobald and Ron O. Dror and Douglas J. Ierardi and Richard H. Larson and U. Ben Schafer and Brian Towles and Cliff Young and David E. Shaw", title = "Hardware support for fine-grained event-driven computation in {Anton 2}", journal = j-SIGPLAN, volume = "48", number = "4", pages = "549--560", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451175", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Exploiting parallelism to accelerate a computation typically involves dividing it into many small tasks that can be assigned to different processing elements. An efficient execution schedule for these tasks can be difficult or impossible to determine in advance, however, if there is uncertainty as to when each task's input data will be available. Ideally, each task would run in direct response to the arrival of its input data, thus allowing the computation to proceed in a fine-grained event-driven manner. Realizing this ideal is difficult in practice, and typically requires sacrificing flexibility for performance. In Anton 2, a massively parallel special-purpose supercomputer for molecular dynamics simulations, we addressed this challenge by including a hardware block, called the dispatch unit, that provides flexible and efficient support for fine-grained event-driven computation. Its novel features include a many-to-many mapping from input data to a set of synchronization counters, and the ability to prioritize tasks based on their type. To solve the additional problem of using a fixed set of synchronization counters to track input data for a potentially large number of tasks, we created a software library that allows programmers to treat Anton 2 as an idealized machine with infinitely many synchronization counters. The dispatch unit, together with this library, made it possible to simplify our molecular dynamics software by expressing it as a collection of independent tasks, and the resulting fine-grained execution schedule improved overall performance by up to 16\% relative to a coarse-grained schedule for precisely the same computation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '13 conference proceedings.", } @Article{Vitek:2013:SCR, author = "Jan Vitek", title = "{SIGPLAN Chair}'s report", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "1--2", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gibbons:2013:ASV, author = "Jeremy Gibbons", title = "{ACM SIGPLAN Vice-Chair}'s report", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "3--3", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Black:2013:SSR, author = "Andrew P. Black", title = "{SIGPLAN Secretary}'s report", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "4--5", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lopes:2013:STR, author = "Cristina V. Lopes", title = "{SIGPLAN Treasurer}'s report", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "6--6", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502513", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dreyer:2013:SMI, author = "Derek Dreyer", title = "{SIGPLAN} most influential paper awards", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "7--8", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lawall:2013:SPA, author = "Julia Lawall and Cristina V. Lopes", title = "{SIGPLAN Professional Activities Committee} report", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "9--9", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502515", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hind:2013:CRH, author = "Michael Hind", title = "{CACM} research highlights annual report", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "10--11", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dreyer:2013:PP, author = "Derek Dreyer and John Field and Roberto Giacobazzi and Michael Hicks and Suresh Jagannathan and Mooly Sagiv and Peter Sewell and Phil Wadler", title = "Principles of {POPL}", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "12--16", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Krishnamurthi:2013:AES, author = "Shriram Krishnamurthi", title = "Artifact evaluation for software conferences", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "17--21", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software and other digital artifacts are amongst the most valuable contributions of computer science. Yet our conferences treat these mostly as second-class artifacts---especially conferences in the software sciences, which ought to know better. This article argues for elevating these other artifacts by making them part of the evaluation process for papers, and reports on experience from an iteration of an Artifact Evaluation Committee for ESEC/FSE 2011.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Flanagan:2013:PES, author = "Cormac Flanagan and K. Rustan M. Leino and Mark Lillibridge and Greg Nelson and James B. Saxe and Raymie Stata", title = "{PLDI 2002}: Extended static checking for {Java}", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "22--33", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software development and maintenance are costly endeavors. The cost can be reduced if more software defects are detected earlier in the development cycle. This paper introduces the Extended Static Checker for Java (ESC/Java), an experimental compile-time program checker that finds common programming errors. The checker is powered by verification-condition generation and automatic theorem-proving techniques. It provides programmers with a simple annotation language with which programmer design decisions can be expressed formally. ESC/Java examines the annotated software and warns of inconsistencies between the design decisions recorded in the annotations and the actual code, and also warns of potential runtime errors in the code. This paper gives an overview of the checker architecture and annotation language and describes our experience applying the checker to tens of thousands of lines of Java programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Findler:2013:ICH, author = "Robert Bruce Findler and Matthias Felleisen", title = "{ICFP 2002}: Contracts for higher-order functions", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "34--45", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Assertions play an important role in the construction of robust software. Their use in programming languages dates back to the 1970s. Eiffel, an object-oriented programming language, wholeheartedly adopted assertions and developed the ``Design by Contract'' philosophy. Indeed, the entire object-oriented community recognizes the value of assertion-based contracts on methods. In contrast, languages with higher-order functions do not support assertion-based contracts. Because predicates on functions are, in general, undecidable, specifying such predicates appears to be meaningless. Instead, the functional languages community developed type systems that statically approximate interesting predicates. In this paper, we show how to support higher-order function contracts in a theoretically well-founded and practically viable manner. Specifically, we introduce ?{$^{CON}$}, a typed lambda calculus with assertions for higher-order functions. The calculus models the assertion monitoring system that we employ in Dr Scheme. We establish basic properties of the model (type soundness, etc.) and illustrate the usefulness of contract checking with examples from Dr Scheme's code base. We believe that the development of an assertion system for higher-order functions serves two purposes. On one hand, the system has strong practical potential because existing type systems simply cannot express many assertions that programmers would like to state. On the other hand, an inspection of a large base of invariants may provide inspiration for the direction of practical future type system research.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Berger:2013:ORC, author = "Emery D. Berger and Benjamin G. Zorn and Kathryn S. McKinley", title = "{OOPSLA 2002}: Reconsidering custom memory allocation", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "46--57", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers hoping to achieve performance improvements often use custom memory allocators. This in-depth study examines eight applications that use custom allocators. Surprisingly, for six of these applications, a state-of-the-art general-purpose allocator (the Lea allocator) performs as well as or better than the custom allocators. The two exceptions use regions, which deliver higher performance (improvements of up to 44\%). Regions also reduce programmer burden and eliminate a source of memory leaks. However, we show that the inability of programmers to free individual objects within regions can lead to a substantial increase in memory consumption. Worse, this limitation precludes the use of regions for common programming idioms, reducing their usefulness. We present a generalization of general-purpose and region-based allocators that we call reaps. Reaps are a combination of regions and heaps, providing a full range of region semantics with the addition of individual object deletion. We show that our implementation of reaps provides high performance, outperforming other allocators with region-like semantics. We then use a case study to demonstrate the space advantages and software engineering benefits of reaps in practice. Our results indicate that programmers needing fast regions should use reaps, and that most programmers considering custom allocators should instead use the Lea allocator.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bacon:2013:PRT, author = "David F. Bacon and Perry Cheng and V. T. Rajan", title = "{POPL 2003}: a real-time garbage collector with low overhead and consistent utilization", journal = j-SIGPLAN, volume = "48", number = "4S", pages = "58--71", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2502508.2502523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 15 15:53:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Now that the use of garbage collection in languages like Java is becoming widely accepted due to the safety and software engineering benefits it provides, there is significant interest in applying garbage collection to hard real-time systems. Past approaches have generally suffered from one of two major flaws: either they were not provably real-time, or they imposed large space overheads to meet the real-time bounds. We present a mostly non-moving, dynamically defragmenting collector that overcomes both of these limitations: by avoiding copying in most cases, space requirements are kept low; and by fully incrementalizing the collector we are able to meet real-time bounds. We implemented our algorithm in the Jikes RVM and show that at real-time resolution we are able to obtain mutator utilization rates of 45\% with only 1.6--2.5 times the actual space required by the application, a factor of 4 improvement in utilization over the best previously published results. Defragmentation causes no more than 4\% of the traced data to be copied.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wu:2013:HSC, author = "Youfeng Wu", title = "{HW\slash SW} co-designed acceleration of dynamic languages", journal = j-SIGPLAN, volume = "48", number = "5", pages = "1--2", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465555", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic Programming Languages, such as Java, JavaScript, PHP, Perl, Python, Ruby, etc., are dominating languages for programming the web. HW/SW co-designed virtual machine can significantly accelerate their executions by transparently leveraging internal HW features via an internal compiler. We also argue for a common API to interface dynamic languages with the HW/SW co-designed virtual machine, so that a single internal compiler can accelerate all major dynamic languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Khudia:2013:LCC, author = "Daya Shanker Khudia and Scott Mahlke", title = "Low cost control flow protection using abstract control signatures", journal = j-SIGPLAN, volume = "48", number = "5", pages = "3--12", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465568", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The continual trend of shrinking feature sizes and reducing voltage levels makes transistors faster and more efficient. However, it also makes them more susceptible to transient hardware faults. Transient faults due to high energy particle strikes or circuit crosstalk can corrupt the output of a program or cause it to crash. Previous studies have reported that as much as 70\% of the transient faults disturb program control flow, making it critical to protect control flow. Traditional approaches employ signatures to check that every control flow transfer in a program is valid. While having high fault coverage, large performance overheads are introduced by such detailed checking. We propose a coarse-grain control flow checking method to detect transient faults in a cost effective way. Our software-only approach is centered on the principle of abstraction: control flow that exhibits simple run-time properties (e.g., proper path length) is almost always completely correct. Our solution targets off-the-shelf commodity embedded systems to provide a low cost protection against transient faults. The proposed technique achieves its efficiency by simplifying signature calculations in each basic block and by performing checking at a coarse-grain level. The coarse-grain signature comparison points are obtained by the use of a region based analysis. In addition, we propose a technique to protect control flow transfers via call and return instructions to ensure all control flow is covered by our technique. Overall, our proposed technique has an average of 11\% performance overhead in comparison to 75\% performance overhead of previously proposed signature based techniques while maintaining approximately the same degree of fault coverage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Chen:2013:BEF, author = "Hao Chen and Chengmo Yang", title = "Boosting efficiency of fault detection and recovery through application-specific comparison and checkpointing", journal = j-SIGPLAN, volume = "48", number = "5", pages = "13--20", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465562", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While the unending technology scaling has brought reliability to the forefront of concerns of semiconductor industry, fault tolerance techniques are still rarely incorporated into existing designs due to their high overhead. One fault tolerance scheme that receives a lot of research attention is duplication and checkpointing. However, most of the techniques in the category employ a blind strategy to compare instruction results, therefore not only generating large overhead in buffering and verifying these values, but also inducing unnecessary rollbacks to recover faults that will never influence subsequent execution. To tackle these issues, we introduce in this paper an approach that identifies the minimum set of instruction results for fault detection and checkpointing. For a given application, the proposed technique first identifies the control and data flow information of each execution hotspot, and then selects only the instruction results that either influence the final program results or are needed during re-execution as the comparison set. Our experimental studies demonstrate that the proposed hotspot-targeting technique is able to reduce nearly 88\% of the comparison overhead and mask over 38\% of the total injected faults of all the injected faults while at the same time delivering full fault coverage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Stilkerich:2013:JSE, author = "Isabella Stilkerich and Michael Strotz and Christoph Erhardt and Martin Hoffmann and Daniel Lohmann and Fabian Scheler and Wolfgang Schr{\"o}der-Preikschat", title = "A {JVM} for soft-error-prone embedded systems", journal = j-SIGPLAN, volume = "48", number = "5", pages = "21--32", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465571", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The reduction of structure sizes in microcontollers, environmental conditions or low supply voltages increase the susceptibility of embedded systems to soft errors. As a result, the employment of fault-detection and fault-tolerance measures is becoming a mandatory task even for moderately critical applications. Accordingly, software-based techniques have recently gained in popularity, and a multitude of approaches that differ in the number and frequency of tolerated errors as well as their associated overhead have been proposed. Using type-safe programming languages to isolate critical software components is very popular among those techniques. An automated application of fault-detection and fault-tolerance measures based on the type system of the programming language and static code analyses is possible. It facilitates an easy evaluation of the protection characteristics and costs, as well as the migration of software to new hardware platforms with different failure rates. Transient faults, however, are not bound to the application code secured by the type system, but can also affect the correctness of the type system itself. Thereby, the type system might lose its ability to isolate critical components. As a consequence, it is essential to also protect the type system itself against soft errors. In this paper, we show how soft errors can affect the integrity of the type system. Furthermore, we provide means to secure it against these faults, thus preserving its isolating character. These measures can be applied selectively to achieve a suitable tradeoff between level of protection and resource consumption.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Finlayson:2013:IPE, author = "Ian Finlayson and Brandon Davis and Peter Gavin and Gang-Ryung Uh and David Whalley and Magnus Sj{\"a}lander and Gary Tyson", title = "Improving processor efficiency by statically pipelining instructions", journal = j-SIGPLAN, volume = "48", number = "5", pages = "33--44", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465559", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A new generation of applications requires reduced power consumption without sacrificing performance. Instruction pipelining is commonly used to meet application performance requirements, but some implementation aspects of pipelining are inefficient with respect to energy usage. We propose static pipelining as a new instruction set architecture to enable more efficient instruction flow through the pipeline, which is accomplished by exposing the pipeline structure to the compiler. While this approach simplifies hardware pipeline requirements, significant modifications to the compiler are required. This paper describes the code generation and compiler optimizations we implemented to exploit the features of this architecture. We show that we can achieve performance and code size improvements despite a very low-level instruction representation. We also demonstrate that static pipelining of instructions reduces energy usage by simplifying hardware, avoiding many unnecessary operations, and allowing the compiler to perform optimizations that are not possible on traditional architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Porpodas:2013:LLA, author = "Vasileios Porpodas and Marcelo Cintra", title = "{LUCAS}: latency-adaptive unified cluster assignment and instruction scheduling", journal = j-SIGPLAN, volume = "48", number = "5", pages = "45--54", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465565", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Clustered VLIW architectures are statically scheduled wide-issue architectures that combine the advantages of wide-issue processors along with the power and frequency scalability of clustered designs. Being statically scheduled, they require that the decision of mapping instructions to clusters be done by the compiler. State-of-the-art code generation for such architectures combines cluster-assignment and instruction scheduling in a single unified pass. The performance of the generated code, however, is very susceptible to the inter-cluster communication latency. This is due to the nature of the two clustering heuristics used. One is aggressive and works well for low inter-cluster latencies, while the other is more conservative and works well only for high latencies. In this paper we propose LUCAS, a novel unified cluster-assignment and instruction-scheduling algorithm that adapts to the inter-cluster latency better than the existing state-of-the-art schemes. LUCAS is a hybrid scheme that performs fine-grain switching between the two state-of-the art clustering heuristics, leading to better scheduling than either of them. It generates better performing code for a wide range of inter-cluster latency values.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Jang:2013:PSP, author = "Hakbeom Jang and Channoh Kim and Jae W. Lee", title = "Practical speculative parallelization of variable-length decompression algorithms", journal = j-SIGPLAN, volume = "48", number = "5", pages = "55--64", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Variable-length coding is widely used for efficient data compression. Typically, the compressor splits the original data into blocks and compresses each block with variable-length codes, hence producing variable-length compressed blocks. Although the compressor can easily exploit ample block-level parallelism, it is much more difficult to extract such coarse-grain parallelism from the decompressor because a block boundary cannot be located until decompression of the previous block is completed. This paper presents novel algorithms to efficiently predict block boundaries and a runtime system that enables efficient block-level parallel decompression, called SDM. The SDM execution model features speculative pipelining with three stages: Scanner, Decompressor, and Merger. The scanner stage employs a high-confidence prediction algorithm that finds compressed block boundaries without fully decompressing individual blocks. This information is communicated to the parallel decompressor stage in which multiple blocks are decompressed in parallel. The decompressed blocks are merged in order by the merger stage to produce the final output. The SDM runtime is specialized to execute this pipeline correctly and efficiently on resource-constrained embedded platforms. With SDM we effectively parallelize three production-grade variable-length decompression algorithms --- zlib, bzip2, and H.264 --- with maximum speedups of $ 2.50 \times $ and $ 8.53 \times $ (and geometric mean speedups of $ 1.96 \times $ and $ 4.04 \times $ ) on 4-core and 36-core embedded platforms, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Chattopadhyay:2013:PPS, author = "Sudipta Chattopadhyay and Lee Kee Chong and Abhik Roychoudhury", title = "Program performance spectrum", journal = j-SIGPLAN, volume = "48", number = "5", pages = "65--76", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465566", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Real-time and embedded applications often need to satisfy several non-functional properties such as timing. Consequently, performance validation is a crucial stage before the deployment of real-time and embedded software. Cache memories are often used to bridge the performance gap between a processor and memory subsystems. As a result, the analysis of caches plays a key role in the performance validation of real-time, embedded software. In this paper, we propose a novel approach to compute the cache performance signature of an entire program. Our technique is based on exploring the input domain through different path programs. Two paths belong to the same path program if they follow the same set of control flow edges but may vary in the iterations of loops encountered. Our experiments with several subject programs show that the different paths grouped into a path program have very similar and often exactly same cache performance. Our path program exploration can be viewed as partitioning the input domain of the program. Each partition is associated with its cache performance and a symbolic formula capturing the set of program inputs which constitutes the partition. We show that such a partitioning technique has wide spread usages in performance prediction, testing, debugging and design space exploration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Moreno:2013:NIP, author = "Carlos Moreno and Sebastian Fischmeister and M. Anwar Hasan", title = "Non-intrusive program tracing and debugging of deployed embedded systems through side-channel analysis", journal = j-SIGPLAN, volume = "48", number = "5", pages = "77--88", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465570", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "One of the hardest aspects of embedded software development is that of debugging, especially when faulty behavior is observed at the production or deployment stage. Non-intrusive observation of the system's behavior is often insufficient to infer the cause of the problem and identify and fix the bug. In this work, we present a novel approach for non-intrusive program tracing aimed at assisting developers in the task of debugging embedded systems at deployment or production stage, where standard debugging tools are usually no longer available. The technique is rooted in cryptography, in particular the area of side-channel attacks. Our proposed technique expands the scope of these cryptographic techniques so that we recover the sequence of operations from power consumption observations (power traces). To this end, we use digital signal processing techniques (in particular, spectral analysis) combined with pattern recognition techniques to determine blocks of source code being executed given the observed power trace. One of the important highlights of our contribution is the fact that the system works on a standard PC, capturing the power traces through the recording input of the sound card. Experimental results are presented and confirm that the approach is viable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Beemster:2013:RCD, author = "Marcel Beemster", title = "The role of {C} in the dark ages of multi-core", journal = j-SIGPLAN, volume = "48", number = "5", pages = "89--90", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465556", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Contrary to predictions of its demise, C remains a dominant programming language, especially in embedded systems. Speed and transparency dictate that it will be so for the next decade, despite its supposed unsuitability for programming parallel architectures. A flexible compiler development system is a unique vehicle to bend the C language and its implementation to the developers' will. Using hard-won experience in applying extended versions of C to diverse parallel architectures, C's potential in the dark ages of multi-core programming is examined.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Wang:2013:FHF, author = "Tianzheng Wang and Duo Liu and Yi Wang and Zili Shao", title = "{FTL 2}: a hybrid {\em f\/}lash {\em t\/}ranslation {\em l\/}ayer with logging for write reduction in flash memory", journal = j-SIGPLAN, volume = "48", number = "5", pages = "91--100", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "NAND flash memory has been widely used to build embedded devices such as smartphones and solid state drives (SSD) because of its high performance, low power consumption, great shock resistance and small form factor. However, its lifetime and performance are greatly constrained by partial page updates, which will lead to early depletion of free pages and frequent garbage collections. On the one hand, partial page updates are prevalent as a large portion of I/O does not modify file contents drastically. On the other hand, general-purpose cache usually does not specifically consider and eliminate duplicated contents, despite its popularity. In this paper, we propose a hybrid approach called FTL$^2$, which employs both logging and mapping techniques in flash translation layer (FTL), to tackle the endurance problem and performance degradation caused by partial page updates in flash memory. FTL$^2$ logs the latest contents in a high-speed temporary storage, called Content Cache to handle partial page updates. Experimental results show that FTL$^2$ can greatly reduce page writes and postpone garbage collections with a small overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Li:2013:CDW, author = "Qingan Li and Lei Jiang and Youtao Zhang and Yanxiang He and Chun Jason Xue", title = "Compiler directed write-mode selection for high performance low power volatile {PCM}", journal = j-SIGPLAN, volume = "48", number = "5", pages = "101--110", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465564", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Micro-Controller Units (MCUs) are widely adopted ubiquitous computing devices. Due to tight cost and energy constraints, MCUs often integrate very limited internal RAM memory on top of Flash storage, which exposes Flash to heavy write traffic and results in short system lifetime. Architecting emerging Phase Change Memory (PCM) is a promising approach for MCUs due to its fast read speed and long write endurance. However, PCM, especially multi-level cell (MLC) PCM, has long write latency and requires large write energy, which diminishes the benefits of its replacement of traditional Flash. By studying MLC PCM write operations, we observe that writing MLC PCM can take advantages of two write modes --- fast write leaves cells in volatile state, and slow write leaves cells in non-volatile state. In this paper, we propose a compiler directed dual-write (CDDW) scheme that selects the best write mode for each write operation to maximize the overall performance and energy efficiency. Our experimental results show that CDDW reduces dynamic energy by 32.4\%(33.8\%) and improves performance by 6.3\%(35.9\%) compared with an all fast(slow) write approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Guan:2013:BBL, author = "Yong Guan and Guohui Wang and Yi Wang and Renhai Chen and Zili Shao", title = "{BLog}: block-level log-block management for {NAND} flash memory storage systems", journal = j-SIGPLAN, volume = "48", number = "5", pages = "111--120", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465560", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Log-block-based FTL (Flash Translation Layer) schemes have been widely used to manage NAND flash memory storage systems in industry. In log-block-based FTLs, a few physical blocks called log blocks are used to hold all page updates from a large amount of data blocks. Frequent page updates in log blocks introduce big overhead so log blocks become the system bottleneck. To address this problem, this paper presents a block-level log-block management scheme called BLog (Block-level Log-Block Management). In BLog, with the block level management, the update pages of a data block can be collected together and put into the same log block as much as possible; therefore, we can effectively reduce the associativities of log blocks so as to reduce the garbage collection overhead. We also propose a novel partial merge operation called reduced-order merge by which we can effectively postpone the garbage collection of log blocks so as to maximally utilize valid pages and reduce unnecessary erase operations in log blocks. Based on BLog, we design an FTL called BLogFTL for MLC NAND flash. We conduct experiments on a mixture of real-world and synthetic traces. The experimental results show that our scheme outperforms the previous log-block-based FTLs for MLC NAND flash.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Mehiaoui:2013:TSO, author = "Asma Mehiaoui and Ernest Wozniak and Sara Tucci-Piergiovanni and Chokri Mraidha and Marco {Di Natale} and Haibo Zeng and Jean-Philippe Babau and Laurent Lemarchand and S{\'e}bastien Gerard", title = "A two-step optimization technique for functions placement, partitioning, and priority assignment in distributed systems", journal = j-SIGPLAN, volume = "48", number = "5", pages = "121--132", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465572", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern development methodologies from the industry and the academia for complex real-time systems define a stage in which application functions are deployed onto an execution platform. The deployment consists of the placement of functions on a distributed network of nodes, the partitioning of functions in tasks and the scheduling of tasks and messages. None of the existing optimization techniques deal with the three stages of the deployment problem at the same time. In this paper, we present a staged approach towards the efficient deployment of real-time functions based on genetic algorithms and mixed integer linear programming techniques. Application to case studies shows the applicability of the method to industry-size systems and the quality of the obtained solutions when compared to the true optimum for small size examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Bouakaz:2013:BME, author = "Adnan Bouakaz and Jean-Pierre Talpin", title = "Buffer minimization in earliest-deadline first scheduling of dataflow graphs", journal = j-SIGPLAN, volume = "48", number = "5", pages = "133--142", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465558", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Symbolic schedulability analysis of dataflow graphs is the process of synthesizing the timing parameters (i.e. periods, phases, and deadlines) of actors so that the task system is schedulable and achieves a high throughput when using a specific scheduling policy. Furthermore, the resulted schedule must ensure that communication buffers are underflow- and overflow-free. This paper describes a (partitioned) earliest-deadline first symbolic schedulability analysis of dataflow graphs that minimizes the buffering requirements. Our scheduling analysis consists of three major steps. (1) The construction of an abstract affine schedule of the graph that excludes overflow and underflow exceptions and minimizes the buffering requirements assuming some precedences between jobs. (2) Symbolic deadlines adjustment that guarantees precedences without the need for lock-based synchronizations. (3) The concretization of the affine schedule using a symbolic, fast-converging, processor-demand analysis for both uniprocessor and multiprocessor systems. Experimental results show that our technique improves the buffering requirements in many cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Geuns:2013:ADM, author = "Stefan J. Geuns and Joost P. H. M. Hausmans and Marco J. G. Bekooij", title = "Automatic dataflow model extraction from modal real-time stream processing applications", journal = j-SIGPLAN, volume = "48", number = "5", pages = "143--152", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465561", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many real-time stream processing applications are initially described as a sequential application containing while-loops, which execute for an unknown number of iterations. These modal applications have to be executed in parallel on an MPSoC system in order to meet their real-time throughput constraints. However, no suitable approach exists that can automatically derive a temporal analysis model from a sequential specification containing while- loops with an unknown number of iterations. This paper introduces an approach to the automatic generation of a Structured Variable-rate Phased Dataflow (SVPDF) model from a sequential specification of a modal application. The real-time requirements of an application can be analyzed despite the presence of while-loops with an unknown number of iterations. It is shown that an algorithm that has a polynomial time computational complexity can be applied on the generated SVPDF model to determine whether a throughput constraint can be met. The enabler for the automatic generation of an SVPDF model is the decoupling of synchronization between tasks that contain different while-loops. A DVB-T radio transceiver illustrates the derivation of the SVPDF model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Wang:2013:PMO, author = "Cheng Wang and Sunita Chandrasekaran and Peng Sun and Barbara Chapman and Jim Holt", title = "Portable mapping of {openMP} to multicore embedded systems using {MCA APIs}", journal = j-SIGPLAN, volume = "48", number = "5", pages = "153--162", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multicore embedded systems are being widely used in telecommunication systems, robotics, medical applications and more.While they offer a high-performance with low-power solution, programming in an efficient way is still a challenge. In order to exploit the capabilities that the hardware offers, software developers are expected to handle many of the low-level details of programming including utilizing DMA, ensuring cache coherency, and inserting synchronization primitives explicitly. The state-of-the-art involves solutions where the software toolchain is too vendor-specific thus tying the software to a particular hardware leaving no room-for portability. In this paper we present a runtime system to explore mapping a high-level programming model, OpenMP, on to multicore embedded systems. A key feature of our scheme is that unlike the existing approaches that largely rely on POSIX threads, our approach leverages the Multicore Association (MCA) APIs as an OpenMP translation layer. The MCA APIs is a set of low-level APIs handling resource management, inter-process communications and task scheduling for multicore embedded systems. By deploying the MCA APIs, our runtime is able to effectively capture the characteristics of multicore embedded systems compared with the POSIX threads. Furthermore, the MCA layer enables our runtime implementation to be portable across various architectures. Thus programmers only need to maintain a single OpenMP code base which is compatible by various compilers, while on the other hand, the code is portable across different possible types of platforms. We have evaluated our runtime system using several embedded benchmarks. The experiments demonstrate promising and competitive performance compared to the native approach for the platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Huber:2013:CWA, author = "Benedikt Huber and Daniel Prokesch and Peter Puschner", title = "Combined {WCET} analysis of bitcode and machine code using control-flow relation graphs", journal = j-SIGPLAN, volume = "48", number = "5", pages = "163--172", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465567", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static program analyses like stack usage analysis and worst-case execution time (WCET) analysis depend on the actual machine code generated by the compiler for the target system. As the analysis of binary code is costly, hard to diagnose and platform dependent, it is preferable to carry out parts of these analyses on a higher-level program representation. To this end, the higher-level code and the machine code need to be related, a difficult task due to the complexity of modern optimizing compilers. In this article, we present a novel representation called control-flow relation graphs, which provide an accurate model of the control-flow relation between machine code and the compiler's intermediate representation. In order to facilitate the integration of our approach in existing compiler frameworks, we develop a construction algorithm that builds the control-flow relation graph from partial mappings provided by the compiler. The WCET calculation method for control-flow relation graphs processes flow information from both the intermediate representation and machine code. Furthermore, we demonstrate the transformation of flow information from the IR to the machine code level, in order to use existing industrial-strength WCET analysis tools operating on machine code. We implemented the construction algorithm within the LLVM compiler framework, along with an implementation of the combined WCET calculation method. The evaluation demonstrates that the approach is able to relate bitcode (LLVM's intermediate representation) and machine code in a precise way, with a WCET increase of at most 2\% when using flow facts on the bitcode level, compared to equivalent ones on the machine-code level. As the methods presented in this article provide a cost-effective way to reuse platform independent flow information, they have the potential to simplify WCET analysis, and popularize its use in the development process of real-time systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '12 conference proceedings.", } @Article{Smaragdakis:2013:LYF, author = "Yannis Smaragdakis", title = "Look up!: your future is in the cloud", journal = j-SIGPLAN, volume = "48", number = "6", pages = "1--2", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462157", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ``Cloud'' is a wonderfully expansive phrase used to denote computation and data storage centralized in a large datacenter and elastically accessed across a network. The concept is not new; web sites and business servers have run in datacenters for a long time. These, however, were specialized applications, outside of the mainstream of desktop programs. The past few years has seen enormous change as the mainstream shifts from a single computer to mobile devices and clusters of computers. Three factors are driving this change. (1) Mobile computing, where apps run on a size- and power-constrained device and would be far less interesting without backend systems to augment computation and storage capacity. (2) Big data, which uses clusters of computers to extract valuable information from vast amounts of unstructured data. (3) Inexpensive, elastic computing, pioneered by Amazon Web Services, which enables everyone to rapidly obtain and use many servers. As a researcher from the language and compiler community, I firmly believe this sea change is at heart a programming problem. Cloud computing is far different from the environment in which most of today's languages and tools were developed, and few programmers have mastered its complexity. New challenges include pervasive parallelism, partial failure, high and variable communication latency, and replication for reliability and throughput.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Cheung:2013:ODB, author = "Alvin Cheung and Armando Solar-Lezama and Samuel Madden", title = "Optimizing database-backed applications with query synthesis", journal = j-SIGPLAN, volume = "48", number = "6", pages = "3--14", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462180", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Object-relational mapping libraries are a popular way for applications to interact with databases because they provide transparent access to the database using the same language as the application. Unfortunately, using such frameworks often leads to poor performance, as modularity concerns encourage developers to implement relational operations in application code. Such application code does not take advantage of the optimized relational implementations that database systems provide, such as efficient implementations of joins or push down of selection predicates. In this paper we present QBS, a system that automatically transforms fragments of application logic into SQL queries. QBS differs from traditional compiler optimizations as it relies on synthesis technology to generate invariants and postconditions for a code fragment. The postconditions and invariants are expressed using a new theory of ordered relations that allows us to reason precisely about both the contents and order of the records produced complex code fragments that compute joins and aggregates. The theory is close in expressiveness to SQL, so the synthesized postconditions can be readily translated to SQL queries. Using 75 code fragments automatically extracted from over 120k lines of open-source code written using the Java Hibernate ORM, we demonstrate that our approach can convert a variety of imperative constructs into relational specifications and significantly improve application performance asymptotically by orders of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Singh:2013:AFG, author = "Rishabh Singh and Sumit Gulwani and Armando Solar-Lezama", title = "Automated feedback generation for introductory programming assignments", journal = j-SIGPLAN, volume = "48", number = "6", pages = "15--26", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462195", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new method for automatically providing feedback for introductory programming problems. In order to use this method, we need a reference implementation of the assignment, and an error model consisting of potential corrections to errors that students might make. Using this information, the system automatically derives minimal corrections to student's incorrect solutions, providing them with a measure of exactly how incorrect a given solution was, as well as feedback about what they did wrong. We introduce a simple language for describing error models in terms of correction rules, and formally define a rule-directed translation strategy that reduces the problem of finding minimal corrections in an incorrect program to the problem of synthesizing a correct program from a sketch. We have evaluated our system on thousands of real student attempts obtained from the Introduction to Programming course at MIT (6.00) and MITx (6.00x). Our results show that relatively simple error models can correct on average 64\% of all incorrect submissions in our benchmark set.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Gvero:2013:CCU, author = "Tihomir Gvero and Viktor Kuncak and Ivan Kuraj and Ruzica Piskac", title = "Complete completion using types and weights", journal = j-SIGPLAN, volume = "48", number = "6", pages = "27--38", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462192", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing modern software typically involves composing functionality from existing libraries. This task is difficult because libraries may expose many methods to the developer. To help developers in such scenarios, we present a technique that synthesizes and suggests valid expressions of a given type at a given program point. As the basis of our technique we use type inhabitation for lambda calculus terms in long normal form. We introduce a succinct representation for type judgements that merges types into equivalence classes to reduce the search space, then reconstructs any desired number of solutions on demand. Furthermore, we introduce a method to rank solutions based on weights derived from a corpus of code. We implemented the algorithm and deployed it as a plugin for the Eclipse IDE for Scala. We show that the techniques we incorporated greatly increase the effectiveness of the approach. Our evaluation benchmarks are code examples from programming practice; we make them available for future comparisons.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Johnson:2013:FCP, author = "Nick P. Johnson and Taewook Oh and Ayal Zaks and David I. August", title = "Fast condensation of the program dependence graph", journal = j-SIGPLAN, volume = "48", number = "6", pages = "39--50", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491960", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aggressive compiler optimizations are formulated around the Program Dependence Graph (PDG). Many techniques, including loop fission and parallelization are concerned primarily with dependence cycles in the PDG. The Directed Acyclic Graph of Strongly Connected Components (DAGSCC) represents these cycles directly. The naive method to construct the DAGSCC first computes the full PDG. This approach limits adoption of aggressive optimizations because the number of analysis queries grows quadratically with program size, making DAGSCC construction expensive. Consequently, compilers optimize small scopes with weaker but faster analyses. We observe that many PDG edges do not affect the DAGSCC and that ignoring them cannot affect clients of the DAGSCC. Exploiting this insight, we present an algorithm to omit those analysis queries to compute the DAGSCC efficiently. Across 366 hot loops from 20 SPEC2006 benchmarks, this method computes the DAGSCC in half of the time using half as many queries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{ElWazeer:2013:SVD, author = "Khaled ElWazeer and Kapil Anand and Aparna Kotha and Matthew Smithson and Rajeev Barua", title = "Scalable variable and data type detection in a binary rewriter", journal = j-SIGPLAN, volume = "48", number = "6", pages = "51--60", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462165", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present scalable static analyses to recover variables, data types, and function prototypes from stripped x86 executables (without symbol or debug information) and obtain a functional intermediate representation (IR) for analysis and rewriting purposes. Our techniques on average run $ 352 \times $ faster than current techniques and still have the same precision. This enables analyzing executables as large as millions of instructions in minutes which is not possible using existing techniques. Our techniques can recover variables allocated to the floating point stack, unlike current techniques. We have integrated our techniques to obtain a compiler level IR that works correctly if recompiled and produces the same output as the input executable. We demonstrate scalability, precision and correctness of our proposed techniques by evaluating them on the complete SPEC2006 benchmarks suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Rajaram:2013:FRT, author = "Bharghava Rajaram and Vijay Nagarajan and Susmit Sarkar and Marco Elver", title = "Fast {RMWs} for {TSO}: semantics and implementation", journal = j-SIGPLAN, volume = "48", number = "6", pages = "61--72", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462196", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Read-Modify-Write (RMW) instructions are widely used as the building blocks of a variety of higher level synchronization constructs, including locks, barriers, and lock-free data structures. Unfortunately, they are expensive in architectures such as x86 and SPARC which enforce (variants of) Total-Store-Order (TSO). A key reason is that RMWs in these architectures are ordered like a memory barrier, incurring the cost of a write-buffer drain in the critical path. Such strong ordering semantics are dictated by the requirements of the strict atomicity definition (type-1) that existing TSO RMWs use. Programmers often do not need such strong semantics. Besides, weakening the atomicity definition of TSO RMWs, would also weaken their ordering --- thereby leading to more efficient hardware implementations. In this paper we argue for TSO RMWs to use weaker atomicity definitions --- we consider two weaker definitions: type-2 and type-3, with different relaxed ordering differences. We formally specify how such weaker RMWs would be ordered, and show that type-2 RMWs, in particular, can seamlessly replace existing type-1 RMWs in common synchronization idioms --- except in situations where a type-1 RMW is used as a memory barrier. Recent work has shown that the new C/C++11 concurrency model can be realized by generating conventional (type-1) RMWs for C/C++11 SC-atomic-writes and/or SC-atomic-reads. We formally prove that this is equally valid using the proposed type-2 RMWs; type-3 RMWs, on the other hand, could be used for SC-atomic-reads (and optionally SC-atomic-writes). We further propose efficient microarchitectural implementations for type-2 (type-3) RMWs --- simulation results show that our implementation reduces the cost of an RMW by up to 58.9\% (64.3\%), which translates into an overall performance improvement of up to 9.0\% (9.2\%) on a set of parallel programs, including those from the SPLASH-2, PARSEC, and STAMP benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Gordon:2013:RGR, author = "Colin S. Gordon and Michael D. Ernst and Dan Grossman", title = "Rely-guarantee references for refinement types over aliased mutable data", journal = j-SIGPLAN, volume = "48", number = "6", pages = "73--84", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reasoning about side effects and aliasing is the heart of verifying imperative programs. Unrestricted side effects through one reference can invalidate assumptions about an alias. We present a new type system approach to reasoning about safe assumptions in the presence of aliasing and side effects, unifying ideas from reference immutability type systems and rely-guarantee program logics. Our approach, rely-guarantee references, treats multiple references to shared objects similarly to multiple threads in rely-guarantee program logics. We propose statically associating rely and guarantee conditions with individual references to shared objects. Multiple aliases to a given object may coexist only if the guarantee condition of each alias implies the rely condition for all other aliases. We demonstrate that existing reference immutability type systems are special cases of rely-guarantee references. In addition to allowing precise control over state modification, rely-guarantee references allow types to depend on mutable data while still permitting flexible aliasing. Dependent types whose denotation is stable over the actions of the rely and guarantee conditions for a reference and its data will not be invalidated by any action through any alias. We demonstrate this with refinement (subset) types that may depend on mutable data. As a special case, we derive the first reference immutability type system with dependent types over immutable data. We show soundness for our approach and describe experience using rely-guarantee references in a dependently-typed monadic DSL in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Titzer:2013:HCF, author = "Ben L. Titzer", title = "Harmonizing classes, functions, tuples, and type parameters in {Virgil III}", journal = j-SIGPLAN, volume = "48", number = "6", pages = "85--94", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491962", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Languages are becoming increasingly multi-paradigm. Subtype polymorphism in statically-typed object-oriented languages is being supplemented with parametric polymorphism in the form of generics. Features like first-class functions and lambdas are appearing everywhere. Yet existing languages like Java, C\#, C++, D, and Scala seem to accrete ever more complexity when they reach beyond their original paradigm into another; inevitably older features have some rough edges that lead to nonuniformity and pitfalls. Given a fresh start, a new language designer is faced with a daunting array of potential features. Where to start? What is important to get right first, and what can be added later? What features must work together, and what features are orthogonal? We report on our experience with Virgil III, a practical language with a careful balance of classes, functions, tuples and type parameters. Virgil intentionally lacks many advanced features, yet we find its core feature set enables new species of design patterns that bridge multiple paradigms and emulate features not directly supported such as interfaces, abstract data types, ad hoc polymorphism, and variant types. Surprisingly, we find variance for function types and tuple types often replaces the need for other kinds of type variance when libraries are designed in a more functional style.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Burckhardt:2013:ACF, author = "Sebastian Burckhardt and Manuel Fahndrich and Peli de Halleux and Sean McDirmid and Michal Moskal and Nikolai Tillmann and Jun Kato", title = "{It}'s alive! {Continuous} feedback in {UI} programming", journal = j-SIGPLAN, volume = "48", number = "6", pages = "95--104", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462170", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Live programming allows programmers to edit the code of a running program and immediately see the effect of the code changes. This tightening of the traditional edit-compile-run cycle reduces the cognitive gap between program code and execution, improving the learning experience of beginning programmers while boosting the productivity of seasoned ones. Unfortunately, live programming is difficult to realize in practice as imperative languages lack well-defined abstraction boundaries that make live programming responsive or its feedback comprehensible. This paper enables live programming for user interface programming by cleanly separating the rendering and non-rendering aspects of a UI program, allowing the display to be refreshed on a code change without restarting the program. A type and effect system formalizes this separation and provides an evaluation model that incorporates the code update step. By putting live programming on a more formal footing, we hope to enable critical and technical discussion of live programming systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{DeVito:2013:TMS, author = "Zachary DeVito and James Hegarty and Alex Aiken and Pat Hanrahan and Jan Vitek", title = "{Terra}: a multi-stage language for high-performance computing", journal = j-SIGPLAN, volume = "48", number = "6", pages = "105--116", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462166", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High-performance computing applications, such as auto-tuners and domain-specific languages, rely on generative programming techniques to achieve high performance and portability. However, these systems are often implemented in multiple disparate languages and perform code generation in a separate process from program execution, making certain optimizations difficult to engineer. We leverage a popular scripting language, Lua, to stage the execution of a novel low-level language, Terra. Users can implement optimizations in the high-level language, and use built-in constructs to generate and execute high-performance Terra code. To simplify meta-programming, Lua and Terra share the same lexical environment, but, to ensure performance, Terra code can execute independently of Lua's runtime. We evaluate our design by reimplementing existing multi-language systems entirely in Terra. Our Terra-based auto-tuner for BLAS routines performs within 20\% of ATLAS, and our DSL for stencil computations runs 2.3x faster than hand-written C.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Li:2013:SIA, author = "Jiajia Li and Guangming Tan and Mingyu Chen and Ninghui Sun", title = "{SMAT}: an input adaptive auto-tuner for sparse matrix-vector multiplication", journal = j-SIGPLAN, volume = "48", number = "6", pages = "117--126", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462181", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sparse Matrix Vector multiplication (SpMV) is an important kernel in both traditional high performance computing and emerging data-intensive applications. By far, SpMV libraries are optimized by either application-specific or architecture-specific approaches, making the libraries become too complicated to be used extensively in real applications. In this work we develop a Sparse Matrix-vector multiplication Auto-Tuning system (SMAT) to bridge the gap between specific optimizations and general-purpose usage. SMAT provides users with a unified programming interface in compressed sparse row (CSR) format and automatically determines the optimal format and implementation for any input sparse matrix at runtime. For this purpose, SMAT leverages a learning model, which is generated in an off-line stage by a machine learning method with a training set of more than 2000 matrices from the UF sparse matrix collection, to quickly predict the best combination of the matrix feature parameters. Our experiments show that SMAT achieves impressive performance of up to 51GFLOPS in single-precision and 37GFLOPS in double-precision on mainstream x86 multi-core processors, which are both more than 3 times faster than the Intel MKL library. We also demonstrate its adaptability in an algebraic multigrid solver from Hypre library with above 20\% performance improvement reported.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Kong:2013:WPT, author = "Martin Kong and Richard Veras and Kevin Stock and Franz Franchetti and Louis-No{\"e}l Pouchet and P. Sadayappan", title = "When polyhedral transformations meet {SIMD} code generation", journal = j-SIGPLAN, volume = "48", number = "6", pages = "127--138", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462187", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data locality and parallelism are critical optimization objectives for performance on modern multi-core machines. Both coarse-grain parallelism (e.g., multi-core) and fine-grain parallelism (e.g., vector SIMD) must be effectively exploited, but despite decades of progress at both ends, current compiler optimization schemes that attempt to address data locality and both kinds of parallelism often fail at one of the three objectives. We address this problem by proposing a 3-step framework, which aims for integrated data locality, multi-core parallelism and SIMD execution of programs. We define the concept of vectorizable codelets, with properties tailored to achieve effective SIMD code generation for the codelets. We leverage the power of a modern high-level transformation framework to restructure a program to expose good ISA-independent vectorizable codelets, exploiting multi-dimensional data reuse. Then, we generate ISA-specific customized code for the codelets, using a collection of lower-level SIMD-focused optimizations. We demonstrate our approach on a collection of numerical kernels that we automatically tile, parallelize and vectorize, exhibiting significant performance improvements over existing compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Schneider:2013:PLS, author = "Fred B. Schneider", title = "Programming languages in security: keynote", journal = j-SIGPLAN, volume = "48", number = "6", pages = "139--140", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Huang:2013:CRL, author = "Jeff Huang and Charles Zhang and Julian Dolby", title = "{CLAP}: recording local executions to reproduce concurrency failures", journal = j-SIGPLAN, volume = "48", number = "6", pages = "141--152", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462167", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present CLAP, a new technique to reproduce concurrency bugs. CLAP has two key steps. First, it logs thread local execution paths at runtime. Second, offline, it computes memory dependencies that accord with the logged execution and are able to reproduce the observed bug. The second step works by combining constraints from the thread paths and constraints based on a memory model, and computing an execution with a constraint solver. CLAP has four major advantages. First, logging purely local execution of each thread is substantially cheaper than logging memory interactions, which enables CLAP to be efficient compared to previous approaches. Second, our logging does not require any synchronization and hence with no added memory barriers or fences; this minimizes perturbation and missed bugs due to extra synchronizations foreclosing certain racy behaviors. Third, since it uses no synchronization, we extend CLAP to work on a range of relaxed memory models, such as TSO and PSO, in addition to sequential consistency. Fourth, CLAP can compute a much simpler execution than the original one, that reveals the bug with minimal thread context switches. To mitigate the scalability issues, we also present an approach to parallelize constraint solving, which theoretically scales our technique to programs with arbitrary execution length. Experimental results on a variety of multithreaded benchmarks and real world concurrent applications validate these advantages by showing that our technique is effective in reproducing concurrency bugs even under relaxed memory models; furthermore, it is significantly more efficient than a state-of-the-art technique that records shared memory dependencies, reducing execution time overhead by 45\% and log size by 88\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Elmas:2013:CDS, author = "Tayfun Elmas and Jacob Burnim and George Necula and Koushik Sen", title = "{CONCURRIT}: a domain specific language for reproducing concurrency bugs", journal = j-SIGPLAN, volume = "48", number = "6", pages = "153--164", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462162", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present CONCURRIT, a domain-specific language (DSL) for reproducing concurrency bugs. Given some partial information about the nature of a bug in an application, a programmer can write a CONCURRIT script to formally and concisely specify a set of thread schedules to explore in order to find a schedule exhibiting the bug. Further, the programmer can specify how these thread schedules should be searched to find a schedule that reproduces the bug. We implemented CONCURRIT as an embedded DSL in C++, which uses manual or automatic source instrumentation to partially control the scheduling of the software under test. Using CONCURRIT, we were able to write concise tests to reproduce concurrency bugs in a variety of benchmarks, including the Mozilla's SpiderMonkey JavaScript engine, Memcached, Apache's HTTP server, and MySQL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Schaefer:2013:DDA, author = "Max Sch{\"a}efer and Manu Sridharan and Julian Dolby and Frank Tip", title = "Dynamic determinacy analysis", journal = j-SIGPLAN, volume = "48", number = "6", pages = "165--174", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462168", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an analysis for identifying determinate variables and expressions that always have the same value at a given program point. This information can be exploited by client analyses and tools to, e.g., identify dead code or specialize uses of dynamic language constructs such as eval, replacing them with equivalent static constructs. Our analysis is completely dynamic and only needs to observe a single execution of the program, yet the determinacy facts it infers hold for any execution. We present a formal soundness proof of the analysis for a simple imperative language, and a prototype implementation that handles full JavaScript. Finally, we report on two case studies that explored how static analysis for JavaScript could leverage the information gathered by dynamic determinacy analysis. We found that in some cases scalability of static pointer analysis was improved dramatically, and that many uses of runtime code generation could be eliminated.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Zhao:2013:FVS, author = "Jianzhou Zhao and Santosh Nagarakatte and Milo M. K. Martin and Steve Zdancewic", title = "Formal verification of {SSA}-based optimizations for {LLVM}", journal = j-SIGPLAN, volume = "48", number = "6", pages = "175--186", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462164", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern compilers, such as LLVM and GCC, use a static single assignment (SSA) intermediate representation (IR) to simplify and enable many advanced optimizations. However, formally verifying the correctness of SSA-based optimizations is challenging because SSA properties depend on a function's entire control-flow graph. This paper addresses this challenge by developing a proof technique for proving SSA-based program invariants and compiler optimizations. We use this technique in the Coq proof assistant to create mechanized correctness proofs of several ``micro'' transformations that form the building blocks for larger SSA optimizations. To demonstrate the utility of this approach, we formally verify a variant of LLVM's mem2reg transformation in Vellvm, a Coq-based formal semantics of the LLVM IR. The extracted implementation generates code with performance comparable to that of LLVM's unverified implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Morisset:2013:CTT, author = "Robin Morisset and Pankaj Pawan and Francesco Zappa Nardelli", title = "Compiler testing via a theory of sound optimisations in the {C11\slash C++11} memory model", journal = j-SIGPLAN, volume = "48", number = "6", pages = "187--196", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491967", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compilers sometimes generate correct sequential code but break the concurrency memory model of the programming language: these subtle compiler bugs are observable only when the miscompiled functions interact with concurrent contexts, making them particularly hard to detect. In this work we design a strategy to reduce the hard problem of hunting concurrency compiler bugs to differential testing of sequential code and build a tool that puts this strategy to work. Our first contribution is a theory of sound optimisations in the C11/C++11 memory model, covering most of the optimisations we have observed in real compilers and validating the claim that common compiler optimisations are sound in the C11/C++11 memory model. Our second contribution is to show how, building on this theory, concurrency compiler bugs can be identified by comparing the memory trace of compiled code against a reference memory trace for the source code. Our tool identified several mistaken write introductions and other unexpected behaviours in the latest release of the gcc compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Chen:2013:TCF, author = "Yang Chen and Alex Groce and Chaoqiang Zhang and Weng-Keen Wong and Xiaoli Fern and Eric Eide and John Regehr", title = "Taming compiler fuzzers", journal = j-SIGPLAN, volume = "48", number = "6", pages = "197--208", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462173", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aggressive random testing tools (``fuzzers'') are impressively effective at finding compiler bugs. For example, a single test-case generator has resulted in more than 1,700 bugs reported for a single JavaScript engine. However, fuzzers can be frustrating to use: they indiscriminately and repeatedly find bugs that may not be severe enough to fix right away. Currently, users filter out undesirable test cases using ad hoc methods such as disallowing problematic features in tests and grepping test results. This paper formulates and addresses the fuzzer taming problem: given a potentially large number of random test cases that trigger failures, order them such that diverse, interesting test cases are highly ranked. Our evaluation shows our ability to solve the fuzzer taming problem for 3,799 test cases triggering 46 bugs in a C compiler and 2,603 test cases triggering 28 bugs in a JavaScript engine.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Blackshear:2013:ACS, author = "Sam Blackshear and Shuvendu K. Lahiri", title = "Almost-correct specifications: a modular semantic framework for assigning confidence to warnings", journal = j-SIGPLAN, volume = "48", number = "6", pages = "209--218", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462188", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modular assertion checkers are plagued with false alarms due to the need for precise environment specifications (preconditions and callee postconditions). Even the fully precise checkers report assertion failures under the most demonic environments allowed by unconstrained or partial specifications. The inability to preclude overly adversarial environments makes such checkers less attractive to developers and severely limits the adoption of such tools in the development cycle. In this work, we propose a parameterized framework for prioritizing the assertion failures reported by a modular verifier, with the goal of suppressing warnings from overly demonic environments. We formalize it almost-correct specifications as the minimal weakening of an angelic specification (over a set of predicates) that precludes any dead code intraprocedurally. Our work is inspired by and generalizes some aspects of semantic inconsistency detection. Our formulation allows us to lift this idea to a general class of warnings. We have developed a prototype {\tt acspec}, which we use to explore a few instantiations of the framework and report preliminary findings on a diverse set of C benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Cook:2013:RAN, author = "Byron Cook and Eric Koskinen", title = "Reasoning about nondeterminism in programs", journal = j-SIGPLAN, volume = "48", number = "6", pages = "219--230", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491969", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Branching-time temporal logics (e.g. CTL, CTL*, modal mu-calculus) allow us to ask sophisticated questions about the nondeterminism that appears in systems. Applications of this type of reasoning include planning, games, security analysis, disproving, precondition synthesis, environment synthesis, etc. Unfortunately, existing automatic branching-time verification tools have limitations that have traditionally restricted their applicability (e.g. push-down systems only, universal path quantifiers only, etc). In this paper we introduce an automation strategy that lifts many of these previous restrictions. Our method works reliably for properties with non-trivial mixtures of universal and existential modal operators. Furthermore, our approach is designed to support (possibly infinite-state) programs. The basis of our approach is the observation that existential reasoning can be reduced to universal reasoning if the system's state-space is appropriately restricted. This restriction on the state-space must meet a constraint derived from recent work on proving non-termination. The observation leads to a new route for implementation based on existing tools. To demonstrate the practical viability of our approach, we report on the results applying our preliminary implementation to a set of benchmarks drawn from the Windows operating system, the PostgreSQL database server, SoftUpdates patching system, as well as other hand-crafted examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Qiu:2013:NPS, author = "Xiaokang Qiu and Pranav Garg and Andrei Stefanescu and Parthasarathy Madhusudan", title = "Natural proofs for structure, data, and separation", journal = j-SIGPLAN, volume = "48", number = "6", pages = "231--242", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462169", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose natural proofs for reasoning with programs that manipulate data-structures against specifications that describe the structure of the heap, the data stored within it, and separation and framing of sub-structures. Natural proofs are a subclass of proofs that are amenable to completely automated reasoning, that provide sound but incomplete procedures, and that capture common reasoning tactics in program verification. We develop a dialect of separation logic over heaps, called Dryad, with recursive definitions that avoids explicit quantification. We develop ways to reason with heaplets using classical logic over the theory of sets, and develop natural proofs for reasoning using proof tactics involving disciplined unfoldings and formula abstractions. Natural proofs are encoded into decidable theories of first-order logic so as to be discharged using SMT solvers. We also implement the technique and show that a large class of more than 100 correct programs that manipulate data-structures are amenable to full functional correctness using the proposed natural proof method. These programs are drawn from a variety of sources including standard data-structures, the Schorr--Waite algorithm for garbage collection, a large number of low-level C routines from the Glib library and OpenBSD library, the Linux kernel, and routines from a secure verified OS-browser project. Our work is the first that we know of that can handle such a wide range of full functional verification properties of heaps automatically, given pre/post and loop invariant annotations. We believe that this work paves the way for deductive verification technology to be used by programmers who do not (and need not) understand the internals of the underlying logic solvers, significantly increasing their applicability in building reliable systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Yu:2013:GDS, author = "Hongtao Yu and Hou-Jen Ko and Zhiyuan Li", title = "General data structure expansion for multi-threading", journal = j-SIGPLAN, volume = "48", number = "6", pages = "243--252", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462182", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Among techniques for parallelizing sequential codes, privatization is a common and significant transformation performed by both compilers and runtime parallelizing systems. Without privatization, repetitive updates to the same data structures often introduce spurious data dependencies that hide the inherent parallelism. Unfortunately, it remains a significant challenge to compilers to automatically privatize dynamic and recursive data structures which appear frequently in real applications written in languages such as C/C++. This is because such languages lack a naming mechanism to define the address range of a pointer-based data structure, in contrast to arrays with explicitly declared bounds. In this paper we present a novel solution to this difficult problem by expanding general data structures such that memory accesses issued from different threads to contentious data structures are directed to different data fields. Based on compile-time type checking and a data dependence graph, this aggressive extension to the traditional scalar and array expansion isolates the address ranges among different threads, without struggling with privatization based on thread-private stacks, such that the targeted loop can be effectively parallelized. With this method fully implemented in GCC, experiments are conducted on a set of programs from well-known benchmark suites such as Mibench, MediaBench II and SPECint. Results show that the new approach can lead to a high speedup when executing the transformed code on multiple cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Hung:2013:AAS, author = "Wei-Lun Hung and Vijay K. Garg", title = "{AutoSynch}: an automatic-signal monitor based on predicate tagging", journal = j-SIGPLAN, volume = "48", number = "6", pages = "253--262", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462175", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "{Most programming languages use monitors with explicit signals for synchronization in shared-memory programs. Requiring programmers to signal threads explicitly results in many concurrency bugs due to missed notifications, or notifications on wrong condition variables. In this paper, we describe an implementation of an automatic signaling monitor in Java called AutoSynch that eliminates such concurrency bugs by removing the burden of signaling from the programmer. We show that the belief that automatic signaling monitors are prohibitively expensive is wrong. For most problems, programs based on AutoSynch are almost as fast as those based on explicit signaling. For some, AutoSynch is even faster than explicit signaling because it never uses signalAll, whereas the programmers end up using signalAll with the explicit signal mechanism. AutoSynch} achieves efficiency in synchronization based on three novel ideas. We introduce an operation called closure that enables the predicate evaluation in every thread, thereby reducing context switches during the execution of the program. Secondly, AutoSynch avoids signalAll by using a property called relay invariance that guarantees that whenever possible there is always at least one thread whose condition is true which has been signaled. Finally, AutoSynch uses a technique called predicate tagging to efficiently determine a thread that should be signaled. To evaluate the efficiency of AutoSynch, we have implemented many different well-known synchronization problems such as the producers/consumers problem, the readers/writers problems, and the dining philosophers problem. The results show that AutoSynch is almost as efficient as the explicit-signal monitor and even more efficient for some cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Golan-Gueta:2013:CLF, author = "Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv and Eran Yahav", title = "Concurrent libraries with foresight", journal = j-SIGPLAN, volume = "48", number = "6", pages = "263--274", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462172", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Linearizable libraries provide operations that appear to execute atomically. Clients, however, may need to execute a sequence of operations (a composite operation) atomically. We consider the problem of extending a linearizable library to support arbitrary atomic composite operations by clients. We introduce a novel approach in which the concurrent library ensures atomicity of composite operations by exploiting information (foresight) provided by its clients. We use a correctness condition, based on a notion of dynamic right-movers, that guarantees that composite operations execute atomically without deadlocks, and without using rollbacks. We present a static analysis to infer the foresight information required by our approach, allowing a compiler to automatically insert the foresight information into the client. This relieves the client programmer of this burden and simplifies writing client code. We present a generic technique for extending the library implementation to realize foresight-based synchronization. This technique is used to implement a general-purpose Java library for Map data structures --- the library permits composite operations to simultaneously work with multiple instances of Map data structures. We use the Maps library and the static analysis to enforce atomicity of a wide selection of real-life Java composite operations. Our experiments indicate that our approach enables realizing efficient and scalable synchronization for real-life composite operations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Blackshear:2013:TPR, author = "Sam Blackshear and Bor-Yuh Evan Chang and Manu Sridharan", title = "{Thresher}: precise refutations for heap reachability", journal = j-SIGPLAN, volume = "48", number = "6", pages = "275--286", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462186", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a precise, path-sensitive static analysis for reasoning about heap reachability, that is, whether an object can be reached from another variable or object via pointer dereferences. Precise reachability information is useful for a number of clients, including static detection of a class of Android memory leaks. For this client, we found the heap reachability information computed by a state-of-the-art points-to analysis was too imprecise, leading to numerous false-positive leak reports. Our analysis combines a symbolic execution capable of path-sensitivity and strong updates with abstract heap information computed by an initial flow-insensitive points-to analysis. This novel mixed representation allows us to achieve both precision and scalability by leveraging the pre-computed points-to facts to guide execution and prune infeasible paths. We have evaluated our techniques in the Thresher tool, which we used to find several developer-confirmed leaks in Android applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Udupa:2013:TSP, author = "Abhishek Udupa and Arun Raghavan and Jyotirmoy V. Deshmukh and Sela Mador-Haim and Milo M. K. Martin and Rajeev Alur", title = "{TRANSIT}: specifying protocols with concolic snippets", journal = j-SIGPLAN, volume = "48", number = "6", pages = "287--296", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462174", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the maturing of technology for model checking and constraint solving, there is an emerging opportunity to develop programming tools that can transform the way systems are specified. In this paper, we propose a new way to program distributed protocols using concolic snippets. Concolic snippets are sample execution fragments that contain both concrete and symbolic values. The proposed approach allows the programmer to describe the desired system partially using the traditional model of communicating extended finite-state-machines (EFSM), along with high-level invariants and concrete execution fragments. Our synthesis engine completes an EFSM skeleton by inferring guards and updates from the given fragments which is then automatically analyzed using a model checker with respect to the desired invariants. The counterexamples produced by the model checker can then be used by the programmer to add new concrete execution fragments that describe the correct behavior in the specific scenario corresponding to the counterexample. We describe TRANSIT, a language and prototype implementation of the proposed specification methodology for distributed protocols. Experimental evaluations of TRANSIT to specify cache coherence protocols show that (1) the algorithm for expression inference from concolic snippets can synthesize expressions of size 15 involving typical operators over commonly occurring types, (2) for a classical directory-based protocol, TRANSIT automatically generates, in a few seconds, a complete implementation from a specification consisting of the EFSM structure and a few concrete examples for every transition, and (3) a published partial description of the SGI Origin cache coherence protocol maps directly to symbolic examples and leads to a complete implementation in a few iterations, with the programmer correcting counterexamples resulting from underspecified transitions by adding concrete examples in each iteration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Gao:2013:UMR, author = "Tiejun Gao and Karin Strauss and Stephen M. Blackburn and Kathryn S. McKinley and Doug Burger and James Larus", title = "Using managed runtime systems to tolerate holes in wearable memories", journal = j-SIGPLAN, volume = "48", number = "6", pages = "297--308", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462171", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "{New memory technologies, such as phase-change memory (PCM), promise denser and cheaper main memory, and are expected to displace DRAM. However, many of them experience permanent failures far more quickly than DRAM. DRAM mechanisms that handle permanent failures rely on very low failure rates and, if directly applied to PCM, are extremely inefficient: Discarding a page when the first line fails wastes 98\% of the memory. This paper proposes low complexity cooperative software and hardware that handle failure rates as high as 50\%. Our approach makes error handling transparent to the application by using the memory abstraction offered by managed languages. Once hardware error correction for a memory line is exhausted, rather than discarding the entire page, the hardware communicates the failed line to a failure-aware OS and runtime. The runtime ensures memory allocations never use failed lines and moves data when lines fail during program execution. This paper describes minimal extensions to an Immix mark-region garbage collector, which correctly utilizes pages with failed physical lines by skipping over failures. This paper also proposes hardware support that clusters failed lines at one end of a memory region to reduce fragmentation and improve performance under failures. Contrary to accepted hardware wisdom that advocates for wear-leveling, we show that with software support non-uniform failures delay the impact of memory failure. Together, these mechanisms incur no performance overhead when there are no failures and at failure levels of 10\% to 50\% suffer only an average overhead of 4\% and 12\%}, respectively. These results indicate that hardware and software cooperation can greatly extend the life of wearable memories.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Cohen:2013:LPC, author = "Nachshon Cohen and Erez Petrank", title = "Limitations of partial compaction: towards practical bounds", journal = j-SIGPLAN, volume = "48", number = "6", pages = "309--320", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491973", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compaction of a managed heap is considered a costly operation, and is avoided as much as possible in commercial runtimes. Instead, partial compaction is often used to defragment parts of the heap and avoid space blow up. Previous study of compaction limitation provided some initial asymptotic bounds but no implications for practical systems. In this work, we extend the theory to obtain better bounds and make them strong enough to become meaningful for modern systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Desai:2013:PSA, author = "Ankush Desai and Vivek Gupta and Ethan Jackson and Shaz Qadeer and Sriram Rajamani and Damien Zufferey", title = "{P}: safe asynchronous event-driven programming", journal = j-SIGPLAN, volume = "48", number = "6", pages = "321--332", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462184", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe the design and implementation of P, a domain-specific language to write asynchronous event driven code. P allows the programmer to specify the system as a collection of interacting state machines, which communicate with each other using events. P unifies modeling and programming into one activity for the programmer. Not only can a P program be compiled into executable code, but it can also be tested using model checking techniques. P allows the programmer to specify the environment, used to ``close'' the system during testing, as nondeterministic ghost machines. Ghost machines are erased during compilation to executable code; a type system ensures that the erasure is semantics preserving. The P language is designed so that a P program can be checked for responsiveness---the ability to handle every event in a timely manner. By default, a machine needs to handle every event that arrives in every state. But handling every event in every state is impractical. The language provides a notion of deferred events where the programmer can annotate when she wants to delay processing an event. The default safety checker looks for presence of unhandled events. The language also provides default liveness checks that an event cannot be potentially deferred forever. P was used to implement and verify the core of the USB device driver stack that ships with Microsoft Windows 8. The resulting driver is more reliable and performs better than its prior incarnation (which did not use P); we have more confidence in the robustness of its design due to the language abstractions and verification provided by P.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Green:2013:QSQ, author = "Alexander S. Green and Peter LeFanu Lumsdaine and Neil J. Ross and Peter Selinger and Beno{\^\i}t Valiron", title = "{Quipper}: a scalable quantum programming language", journal = j-SIGPLAN, volume = "48", number = "6", pages = "333--342", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462177", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The field of quantum algorithms is vibrant. Still, there is currently a lack of programming languages for describing quantum computation on a practical scale, i.e., not just at the level of toy problems. We address this issue by introducing Quipper, a scalable, expressive, functional, higher-order quantum programming language. Quipper has been used to program a diverse set of non-trivial quantum algorithms, and can generate quantum gate representations using trillions of gates. It is geared towards a model of computation that uses a classical computer to control a quantum device, but is not dependent on any particular model of quantum hardware. Quipper has proven effective and easy to use, and opens the door towards using formal methods to analyze quantum algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Isradisaikul:2013:REP, author = "Chinawat Isradisaikul and Andrew C. Myers", title = "Reconciling exhaustive pattern matching with objects", journal = j-SIGPLAN, volume = "48", number = "6", pages = "343--354", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Pattern matching, an important feature of functional languages, is in conflict with data abstraction and extensibility, which are central to object-oriented languages. Modal abstraction offers an integration of deep pattern matching and convenient iteration abstractions into an object-oriented setting; however, because of data abstraction, it is challenging for a compiler to statically verify properties such as exhaustiveness. In this work, we extend modal abstraction in the JMatch language to support static, modular reasoning about exhaustiveness and redundancy. New matching specifications allow these properties to be checked using an SMT solver. We also introduce expressive pattern-matching constructs. Our evaluation shows that these new features enable more concise code and that the performance of checking exhaustiveness and redundancy is acceptable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Bodden:2013:SLS, author = "Eric Bodden and T{\'a}rsis Tol{\^e}do and M{\'a}rcio Ribeiro and Claus Brabrand and Paulo Borba and Mira Mezini", title = "{SPL LIFT}: statically analyzing software product lines in minutes instead of years", journal = j-SIGPLAN, volume = "48", number = "6", pages = "355--364", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491976", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A software product line (SPL) encodes a potentially large variety of software products as variants of some common code base. Up until now, re-using traditional static analyses for SPLs was virtually intractable, as it required programmers to generate and analyze all products individually. In this work, however, we show how an important class of existing inter-procedural static analyses can be transparently lifted to SPLs. Without requiring programmers to change a single line of code, our approach SPLLIFT automatically converts any analysis formulated for traditional programs within the popular IFDS framework for inter-procedural, finite, distributive, subset problems to an SPL-aware analysis formulated in the IDE framework, a well-known extension to IFDS. Using a full implementation based on Heros, Soot, CIDE and JavaBDD, we show that with SPLLIFT one can reuse IFDS-based analyses without changing a single line of code. Through experiments using three static analyses applied to four Java-based product lines, we were able to show that our approach produces correct results and outperforms the traditional approach by several orders of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Zhang:2013:FOA, author = "Xin Zhang and Mayur Naik and Hongseok Yang", title = "Finding optimum abstractions in parametric dataflow analysis", journal = j-SIGPLAN, volume = "48", number = "6", pages = "365--376", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462185", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a technique to efficiently search a large family of abstractions in order to prove a query using a parametric dataflow analysis. Our technique either finds the cheapest such abstraction or shows that none exists. It is based on counterexample-guided abstraction refinement but applies a novel meta-analysis on abstract counterexample traces to efficiently find abstractions that are incapable of proving the query. We formalize the technique in a generic framework and apply it to two analyses: a type-state analysis and a thread-escape analysis. We demonstrate the effectiveness of the technique on a suite of Java benchmark programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Apinis:2013:HCW, author = "Kalmer Apinis and Helmut Seidl and Vesal Vojdani", title = "How to combine widening and narrowing for non-monotonic systems of equations", journal = j-SIGPLAN, volume = "48", number = "6", pages = "377--386", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462190", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-trivial analysis problems require complete lattices with infinite ascending and descending chains. In order to compute reasonably precise post-fixpoints of the resulting systems of equations, Cousot and Cousot have suggested accelerated fixpoint iteration by means of widening and narrowing. The strict separation into phases, however, may unnecessarily give up precision that cannot be recovered later. While widening is also applicable if equations are non-monotonic, this is no longer the case for narrowing. A narrowing iteration to improve a given post-fixpoint, additionally, must assume that all right-hand sides are monotonic. The latter assumption, though, is not met in presence of widening. It is also not met by equation systems corresponding to context-sensitive interprocedural analysis, possibly combining context-sensitive analysis of local information with flow-insensitive analysis of globals. As a remedy, we present a novel operator that combines a given widening operator with a given narrowing operator. We present adapted versions of round-robin as well as of worklist iteration, local, and side-effecting solving algorithms for the combined operator and prove that the resulting solvers always return sound results and are guaranteed to terminate for monotonic systems whenever only finitely many unknowns (constraint variables) are encountered.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Swamy:2013:VHO, author = "Nikhil Swamy and Joel Weinberger and Cole Schlesinger and Juan Chen and Benjamin Livshits", title = "Verifying higher-order programs with the {Dijkstra} monad", journal = j-SIGPLAN, volume = "48", number = "6", pages = "387--398", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491978", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern programming languages, ranging from Haskell and ML, to JavaScript, C\# and Java, all make extensive use of higher-order state. This paper advocates a new verification methodology for higher-order stateful programs, based on a new monad of predicate transformers called the Dijkstra monad. Using the Dijkstra monad has a number of benefits. First, the monad naturally yields a weakest pre-condition calculus. Second, the computed specifications are structurally simpler in several ways, e.g., single-state post-conditions are sufficient (rather than the more complex two-state post-conditions). Finally, the monad can easily be varied to handle features like exceptions and heap invariants, while retaining the same type inference algorithm. We implement the Dijkstra monad and its type inference algorithm for the F* programming language. Our most extensive case study evaluates the Dijkstra monad and its F* implementation by using it to verify JavaScript programs. Specifically, we describe a tool chain that translates programs in a subset of JavaScript decorated with assertions and loop invariants to F*. Once in F*, our type inference algorithm computes verification conditions and automatically discharges their proofs using an SMT solver. We use our tools to prove that a core model of the JavaScript runtime in F* respects various invariants and that a suite of JavaScript source programs are free of runtime errors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Sergey:2013:MAI, author = "Ilya Sergey and Dominique Devriese and Matthew Might and Jan Midtgaard and David Darais and Dave Clarke and Frank Piessens", title = "Monadic abstract interpreters", journal = j-SIGPLAN, volume = "48", number = "6", pages = "399--410", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2491979", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent developments in the systematic construction of abstract interpreters hinted at the possibility of a broad unification of concepts in static analysis. We deliver that unification by showing context-sensitivity, polyvariance, flow-sensitivity, reachability-pruning, heap-cloning and cardinality-bounding to be independent of any particular semantics. Monads become the unifying agent between these concepts and between semantics. For instance, by plugging the same ``context-insensitivity monad'' into a monadically-parameterized semantics for Java or for the lambda calculus, it yields the expected context-insensitive analysis. To achieve this unification, we develop a systematic method for transforming a concrete semantics into a monadically-parameterized abstract machine. Changing the monad changes the behavior of the machine. By changing the monad, we recover a spectrum of machines---from the original concrete semantics to a monovariant, flow- and context-insensitive static analysis with a singly-threaded heap and weak updates. The monadic parameterization also suggests an abstraction over the ubiquitous monotone fixed-point computation found in static analysis. This abstraction makes it straightforward to instrument an analysis with high-level strategies for improving precision and performance, such as abstract garbage collection and widening. While the paper itself runs the development for continuation-passing style, our generic implementation replays it for direct-style lambda-calculus and Featherweight Java to support generality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Czaplicki:2013:AFR, author = "Evan Czaplicki and Stephen Chong", title = "Asynchronous functional reactive programming for {GUIs}", journal = j-SIGPLAN, volume = "48", number = "6", pages = "411--422", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462161", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphical user interfaces (GUIs) mediate many of our interactions with computers. Functional Reactive Programming (FRP) is a promising approach to GUI design, providing high-level, declarative, compositional abstractions to describe user interactions and time-dependent computations. We present Elm, a practical FRP language focused on easy creation of responsive GUIs. Elm has two major features: simple declarative support for Asynchronous FRP; and purely functional graphical layout. Asynchronous FRP allows the programmer to specify when the global ordering of event processing can be violated, and thus enables efficient concurrent execution of FRP programs; long-running computation can be executed asynchronously and not adversely affect the responsiveness of the user interface. Layout in Elm is achieved using a purely functional declarative framework that makes it simple to create and combine text, images, and video into rich multimedia displays. Together, Elm's two major features simplify the complicated task of creating responsive and usable GUIs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Kastrinis:2013:HCS, author = "George Kastrinis and Yannis Smaragdakis", title = "Hybrid context-sensitivity for points-to analysis", journal = j-SIGPLAN, volume = "48", number = "6", pages = "423--434", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462191", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Context-sensitive points-to analysis is valuable for achieving high precision with good performance. The standard flavors of context-sensitivity are call-site-sensitivity (kCFA) and object-sensitivity. Combining both flavors of context-sensitivity increases precision but at an infeasibly high cost. We show that a selective combination of call-site- and object-sensitivity for Java points-to analysis is highly profitable. Namely, by keeping a combined context only when analyzing selected language features, we can closely approximate the precision of an analysis that keeps both contexts at all times. In terms of speed, the selective combination of both kinds of context not only vastly outperforms non-selective combinations but is also faster than a mere object-sensitive analysis. This result holds for a large array of analyses (e.g., 1-object-sensitive, 2-object-sensitive with a context-sensitive heap, type-sensitive) establishing a new set of performance/precision sweet spots.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Zhang:2013:FAD, author = "Qirun Zhang and Michael R. Lyu and Hao Yuan and Zhendong Su", title = "Fast algorithms for {Dyck--CFL}-reachability with applications to alias analysis", journal = j-SIGPLAN, volume = "48", number = "6", pages = "435--446", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462159", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The context-free language (CFL) reachability problem is a well-known fundamental formulation in program analysis. In practice, many program analyses, especially pointer analyses, adopt a restricted version of CFL-reachability, Dyck-CFL-reachability, and compute on edge-labeled bidirected graphs. Solving the all-pairs Dyck-CFL-reachability on such bidirected graphs is expensive. For a bidirected graph with n nodes and m edges, the traditional dynamic programming style algorithm exhibits a subcubic time complexity for the Dyck language with k kinds of parentheses. When the underlying graphs are restricted to bidirected trees, an algorithm with O(n log n log k) time complexity was proposed recently. This paper studies the Dyck-CFL-reachability problems on bidirected trees and graphs. In particular, it presents two fast algorithms with O(n) and O(n + m log m) time complexities on trees and graphs respectively. We have implemented and evaluated our algorithms on a state-of-the-art alias analysis for Java. Results on standard benchmarks show that our algorithms achieve orders of magnitude speedup and consume less memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Sankaranarayanan:2013:SAP, author = "Sriram Sankaranarayanan and Aleksandar Chakarov and Sumit Gulwani", title = "Static analysis for probabilistic programs: inferring whole program properties from finitely many paths", journal = j-SIGPLAN, volume = "48", number = "6", pages = "447--458", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462179", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose an approach for the static analysis of probabilistic programs that sense, manipulate, and control based on uncertain data. Examples include programs used in risk analysis, medical decision making and cyber-physical systems. Correctness properties of such programs take the form of queries that seek the probabilities of assertions over program variables. We present a static analysis approach that provides guaranteed interval bounds on the values (assertion probabilities) of such queries. First, we observe that for probabilistic programs, it is possible to conclude facts about the behavior of the entire program by choosing a finite, adequate set of its paths. We provide strategies for choosing such a set of paths and verifying its adequacy. The queries are evaluated over each path by a combination of symbolic execution and probabilistic volume-bound computations. Each path yields interval bounds that can be summed up with a ``coverage'' bound to yield an interval that encloses the probability of assertion for the program as a whole. We demonstrate promising results on a suite of benchmarks from many different sources including robotic manipulators and medical decision making programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Liang:2013:MVL, author = "Hongjin Liang and Xinyu Feng", title = "Modular verification of linearizability with non-fixed linearization points", journal = j-SIGPLAN, volume = "48", number = "6", pages = "459--470", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462189", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Locating linearization points (LPs) is an intuitive approach for proving linearizability, but it is difficult to apply the idea in Hoare-style logic for formal program verification, especially for verifying algorithms whose LPs cannot be statically located in the code. In this paper, we propose a program logic with a lightweight instrumentation mechanism which can verify algorithms with non-fixed LPs, including the most challenging ones that use the helping mechanism to achieve lock-freedom (as in HSY elimination-based stack), or have LPs depending on unpredictable future executions (as in the lazy set algorithm), or involve both features. We also develop a thread-local simulation as the meta-theory of our logic, and show it implies contextual refinement, which is equivalent to linearizability. Using our logic we have successfully verified various classic algorithms, some of which are used in the java.util.concurrent package.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Sewell:2013:TVV, author = "Thomas Arthur Leck Sewell and Magnus O. Myreen and Gerwin Klein", title = "Translation validation for a verified {OS} kernel", journal = j-SIGPLAN, volume = "48", number = "6", pages = "471--482", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462183", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We extend the existing formal verification of the seL4 operating system microkernel from 9500 lines of C source code to the binary level. We handle all functions that were part of the previous verification. Like the original verification, we currently omit the assembly routines and volatile accesses used to control system hardware. More generally, we present an approach for proving refinement between the formal semantics of a program on the C source level and its formal semantics on the binary level, thus checking the validity of compilation, including some optimisations, and linking, and extending static properties proved of the source code to the executable. We make use of recent improvements in SMT solvers to almost fully automate this process. We handle binaries generated by unmodified gcc 4.5.1 at optimisation level 1, and can handle most of seL4 even at optimisation level 2.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Guha:2013:MVN, author = "Arjun Guha and Mark Reitblatt and Nate Foster", title = "Machine-verified network controllers", journal = j-SIGPLAN, volume = "48", number = "6", pages = "483--494", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462178", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In many areas of computing, techniques ranging from testing to formal modeling to full-blown verification have been successfully used to help programmers build reliable systems. But although networks are critical infrastructure, they have largely resisted analysis using formal techniques. Software-defined networking (SDN) is a new network architecture that has the potential to provide a foundation for network reasoning, by standardizing the interfaces used to express network programs and giving them a precise semantics. This paper describes the design and implementation of the first machine-verified SDN controller. Starting from the foundations, we develop a detailed operational model for OpenFlow (the most popular SDN platform) and formalize it in the Coq proof assistant. We then use this model to develop a verified compiler and run-time system for a high-level network programming language. We identify bugs in existing languages and tools built without formal foundations, and prove that these bugs are absent from our system. Finally, we describe our prototype implementation and our experiences using it to build practical applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Nowatzki:2013:GCC, author = "Tony Nowatzki and Michael Sartin-Tarm and Lorenzo {De Carli} and Karthikeyan Sankaralingam and Cristian Estan and Behnam Robatmili", title = "A general constraint-centric scheduling framework for spatial architectures", journal = j-SIGPLAN, volume = "48", number = "6", pages = "495--506", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462163", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Specialized execution using spatial architectures provides energy efficient computation, but requires effective algorithms for spatially scheduling the computation. Generally, this has been solved with architecture-specific heuristics, an approach which suffers from poor compiler/architect productivity, lack of insight on optimality, and inhibits migration of techniques between architectures. Our goal is to develop a scheduling framework usable for all spatial architectures. To this end, we expresses spatial scheduling as a constraint satisfaction problem using Integer Linear Programming (ILP). We observe that architecture primitives and scheduler responsibilities can be related through five abstractions: placement of computation, routing of data, managing event timing, managing resource utilization, and forming the optimization objectives. We encode these responsibilities as 20 general ILP constraints, which are used to create schedulers for the disparate TRIPS, DySER, and PLUG architectures. Our results show that a general declarative approach using ILP is implementable, practical, and typically matches or outperforms specialized schedulers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Lifflander:2013:STL, author = "Jonathan Lifflander and Sriram Krishnamoorthy and Laxmikant V. Kale", title = "{Steal Tree}: low-overhead tracing of work stealing schedulers", journal = j-SIGPLAN, volume = "48", number = "6", pages = "507--518", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462193", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Work stealing is a popular approach to scheduling task-parallel programs. The flexibility inherent in work stealing when dealing with load imbalance results in seemingly irregular computation structures, complicating the study of its runtime behavior. In this paper, we present an approach to efficiently trace async-finish parallel programs scheduled using work stealing. We identify key properties that allow us to trace the execution of tasks with low time and space overheads. We also study the usefulness of the proposed schemes in supporting algorithms for data-race detection and retentive stealing presented in the literature. We demonstrate that the perturbation due to tracing is within the variation in the execution time with 99\% confidence and the traces are concise, amounting to a few tens of kilobytes per thread in most cases. We also demonstrate that the traces enable significant reductions in the cost of detecting data races and result in low, stable space overheads in supporting retentive stealing for async-finish programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Ragan-Kelley:2013:HLC, author = "Jonathan Ragan-Kelley and Connelly Barnes and Andrew Adams and Sylvain Paris and Fr{\'e}do Durand and Saman Amarasinghe", title = "{Halide}: a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines", journal = j-SIGPLAN, volume = "48", number = "6", pages = "519--530", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462176", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Image processing pipelines combine the challenges of stencil computations and stream programs. They are composed of large graphs of different stencil stages, as well as complex reductions, and stages with global or data-dependent access patterns. Because of their complex structure, the performance difference between a naive implementation of a pipeline and an optimized one is often an order of magnitude. Efficient implementations require optimization of both parallelism and locality, but due to the nature of stencils, there is a fundamental tension between parallelism, locality, and introducing redundant recomputation of shared values. We present a systematic model of the tradeoff space fundamental to stencil pipelines, a schedule representation which describes concrete points in this space for each stage in an image processing pipeline, and an optimizing compiler for the Halide image processing language that synthesizes high performance implementations from a Halide algorithm and a schedule. Combining this compiler with stochastic search over the space of schedules enables terse, composable programs to achieve state-of-the-art performance on a wide range of real image processing pipelines, and across different hardware architectures, including multicores with SIMD, and heterogeneous CPU+GPU execution. From simple Halide programs written in a few hours, we demonstrate performance up to 5x faster than hand-tuned C, intrinsics, and CUDA implementations optimized by experts over weeks or months, for image processing applications beyond the reach of past automatic compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '13 conference proceedings.", } @Article{Jia:2013:SID, author = "Ning Jia and Chun Yang and Jing Wang and Dong Tong and Keyi Wang", title = "{SPIRE}: improving dynamic binary translation through {SPC}-indexed indirect branch redirecting", journal = j-SIGPLAN, volume = "48", number = "7", pages = "1--12", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Dynamic binary translation system must perform an address translation for every execution of indirect branch instructions. The procedure to convert Source binary Program Counter (SPC) address to Translated Program Counter (TPC) address always takes more than 10 instructions, becoming a major source of performance overhead. This paper proposes a novel mechanism called SPc-Indexed REdirecting (SPIRE), which can significantly reduce the indirect branch handling overhead. SPIRE doesn't rely on hash lookup and address mapping table to perform address translation. It reuses the source binary code space to build a SPC-indexed redirecting table. This table can be indexed directly by SPC address without hashing. With SPIRE, the indirect branch can jump to the originally SPC address without address translation. The trampoline residing in the SPC address will redirect the control flow to related code cache. Only 2-6 instructions are needed to handle an indirect branch execution. As part of the source binary would be overwritten, a shadow page mechanism is explored to keep transparency of the corrupt source binary code page. Online profiling is adopted to reduce the memory overhead. We have implemented SPIRE on an x86 to x86 DBT system, and discussed the implementation issues on different guest and host architectures. The experiments show that, compared with hash lookup mechanism, SPIRE can reduce the performance overhead by 36.2\% on average, up to 51.4\%, while only 5.6\% extra memory is needed. SPIRE can cooperate with other indirect branch handling mechanisms easily, and we believe the idea of SPIRE can also be applied on other occasions that need address translation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{vonKoch:2013:LRB, author = "Tobias J. K. Edler von Koch and Bj{\"o}rn Franke", title = "Limits of region-based dynamic binary parallelization", journal = j-SIGPLAN, volume = "48", number = "7", pages = "13--22", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Efficiently executing sequential legacy binaries on chip multi-processors (CMPs) composed of many, small cores is one of today's most pressing problems. Single-threaded execution is a suboptimal option due to CMPs' lower single-core performance, while multi-threaded execution relies on prior parallelization, which is severely hampered by the low-level binary representation of applications compiled and optimized for a single-core target. A recent technology to address this problem is Dynamic Binary Parallelization (DBP), which creates a Virtual Execution Environment (VEE) taking advantage of the underlying multicore host to transparently parallelize the sequential binary executable. While still in its infancy, DBP has received broad interest within the research community. The combined use of DBP and thread-level speculation (TLS) has been proposed as a technique to accelerate legacy uniprocessor code on modern CMPs. In this paper, we investigate the limits of DBP and seek to gain an understanding of the factors contributing to these limits and the costs and overheads of its implementation. We have performed an extensive evaluation using a parameterizable DBP system targeting a CMP with light-weight architectural TLS support. We demonstrate that there is room for a significant reduction of up to 54\% in the number of instructions on the critical paths of legacy SPEC CPU2006 benchmarks. However, we show that it is much harder to translate these savings into actual performance improvements, with a realistic hardware-supported implementation achieving a speedup of 1.09 on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hsu:2013:IDB, author = "Chun-Chen Hsu and Pangfeng Liu and Jan-Jan Wu and Pen-Chung Yew and Ding-Yong Hong and Wei-Chung Hsu and Chien-Min Wang", title = "Improving dynamic binary optimization through early-exit guided code region formation", journal = j-SIGPLAN, volume = "48", number = "7", pages = "23--32", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Most dynamic binary translators (DBT) and optimizers (DBO) target binary traces, i.e. frequently executed paths, as code regions to be translated and optimized. Code region formation is the most important first step in all DBTs and DBOs. The quality of the dynamically formed code regions determines the extent and the types of optimization opportunities that can be exposed to DBTs and DBOs, and thus, determines the ultimate quality of the final optimized code. The Next-Executing-Tail (NET) trace formation method used in HP Dynamo is an early example of such techniques. Many existing trace formation schemes are variants of NET. They work very well for most binary traces, but they also suffer a major problem: the formed traces may contain a large number of early exits that could be branched out during the execution. If this happens frequently, the program execution will spend more time in the slow binary interpreter or in the unoptimized code regions than in the optimized traces in code cache. The benefit of the trace optimization is thus lost. Traces/regions with frequently taken early-exits are called delinquent traces/regions. Our empirical study shows that at least 8 of the 12 SPEC CPU2006 integer benchmarks have delinquent traces. In this paper, we propose a light-weight region formation technique called Early-Exit Guided Region Formation (EEG) to improve the quality of the formed traces/regions. It iteratively identifies and merges delinquent regions into larger code regions. We have implemented our EEG algorithm in two LLVM-based multi-threaded DBTs targeting ARM and IA32 instruction set architecture (ISA), respectively. Using SPEC CPU2006 benchmark suite with reference inputs, our results show that compared to an NET-variant currently used in QEMU, a state-of-the-art retargetable DBT, EEG can achieve a significant performance improvement of up to 72\% (27\% on average), and to 49\% (23\% on average) for IA32 and ARM, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kaufmann:2013:SCO, author = "Marco Kaufmann and Rainer G. Spallek", title = "Superblock compilation and other optimization techniques for a {Java}-based {DBT} machine emulator", journal = j-SIGPLAN, volume = "48", number = "7", pages = "33--40", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Superblock compilation techniques such as control flow graph (CFG) or trace compilation have become a widely adopted approach to increase the performance of dynamically compiling virtual machines even further. While this was shown to be successful for many conventional virtual machines, it did not result in a higher performance for Java-based DBT machine emulators so far. These emulators dynamically translate application binaries of a target machine into Java bytecode, which is then eventually compiled into the native code of the emulating host by the Java Virtual Machine (JVM). Successful superblock compilation techniques for this class of emulators must consider the special requirements that result from the two-stage translation as well as the characteristics of the JVM, such as the inability of most Java JIT compilers to handle large bytecode methods efficiently. In this paper, we present a superblock compilation approach for a Java-based DBT machine emulator that generates a performance increase of up to 90 percent and of 32 percent on average. The key idea of our design is to provide a large scope over the control flow of target applications across basic block boundaries for the JVM, while still keeping small bytecode methods for the execution units. In addition, we also present two further optimizations --- interpreter context elimination and program counter elimination --- which increase the emulation performance by 16 percent again. In total, the optimization techniques discussed in this paper provide an average performance gain of 48 percent for the surveyed emulator.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jo:2013:ELM, author = "Changyeon Jo and Erik Gustafsson and Jeongseok Son and Bernhard Egger", title = "Efficient live migration of virtual machines using shared storage", journal = j-SIGPLAN, volume = "48", number = "7", pages = "41--50", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Live migration of virtual machines (VM) across distinct physical hosts is an important feature of virtualization technology for maintenance, load-balancing and energy reduction, especially so for data centers operators and cluster service providers. Several techniques have been proposed to reduce the downtime of the VM being transferred, often at the expense of the total migration time. In this work, we present a technique to reduce the total time required to migrate a running VM from one host to another while keeping the downtime to a minimum. Based on the observation that modern operating systems use the better part of the physical memory to cache data from secondary storage, our technique tracks the VM's I/O operations to the network-attached storage device and maintains an updated mapping of memory pages that currently reside in identical form on the storage device. During the iterative pre-copy live migration process, instead of transferring those pages from the source to the target host, the memory-to-disk mapping is sent to the target host which then fetches the contents directly from the network-attached storage device. We have implemented our approach into the Xen hypervisor and ran a series of experiments with Linux HVM guests. On average, the presented technique shows a reduction of up over 30\% on average of the total transfer time for a series of benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chiang:2013:IBM, author = "Jui-Hao Chiang and Han-Lin Li and Tzi-cker Chiueh", title = "Introspection-based memory de-duplication and migration", journal = j-SIGPLAN, volume = "48", number = "7", pages = "51--62", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Memory virtualization abstracts a physical machine's memory resource and presents to the virtual machines running on it a piece of physical memory that could be shared, compressed and moved. To optimize the memory resource utilization by fully leveraging the flexibility afforded by memory virtualization, it is essential that the hypervisor have some sense of how the guest VMs use their allocated physical memory. One way to do this is virtual machine introspection (VMI), which interprets byte values in a guest memory space into semantically meaningful data structures. However, identifying a guest VM's memory usage information such as free memory pool is non-trivial. This paper describes a bootstrapping VM introspection technique that could accurately extract free memory pool information from multiple versions of Windows and Linux without kernel version-specific hard-coding, how to apply this technique to improve the efficiency of memory de-duplication and memory state migration, and the resulting improvement in memory de-duplication speed, gain in additional memory pages de-duplicated, and reduction in traffic loads associated with memory state migration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cui:2013:VMV, author = "Lei Cui and Jianxin Li and Bo Li and Jinpeng Huai and Chunming Hu and Tianyu Wo and Hussain Al-Aqrabi and Lu Liu", title = "{VMScatter}: migrate virtual machines to many hosts", journal = j-SIGPLAN, volume = "48", number = "7", pages = "63--72", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451528", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Live virtual machine migration is a technique often used to migrate an entire OS with running applications in a non-disruptive fashion. Prior works concerned with one-to-one live migration with many techniques have been proposed such as pre-copy, post-copy and log/replay. In contrast, we propose VMScatter, a one-to-many migration method to migrate virtual machines from one to many other hosts simultaneously. First, by merging the identical pages within or across virtual machines, VMScatter multicasts only a single copy of these pages to associated target hosts for avoiding redundant transmission. This is impactful practically when the same OS and similar applications running in the virtual machines where there are plenty of identical pages. Second, we introduce a novel grouping algorithm to decide the placement of virtual machines, distinguished from the previous schedule algorithms which focus on the workload for load balance or power saving, we also focus on network traffic, which is a critical metric in data-intensive data centers. Third, we schedule the multicast sequence of packets to reduce the network overhead introduced by joining or quitting the multicast groups of target hosts. Compared to traditional live migration technique in QEMU/KVM, VMScatter reduces 74.2\% of the total transferred data, 69.1\% of the total migration time and achieves the network traffic reduction from 50.1\% to 70.3\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhou:2013:OVM, author = "Ruijin Zhou and Fang Liu and Chao Li and Tao Li", title = "Optimizing virtual machine live storage migration in heterogeneous storage environment", journal = j-SIGPLAN, volume = "48", number = "7", pages = "73--84", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Virtual machine (VM) live storage migration techniques significantly increase the mobility and manageability of virtual machines in the era of cloud computing. On the other hand, as solid state drives (SSDs) become increasingly popular in data centers, VM live storage migration will inevitably encounter heterogeneous storage environments. Nevertheless, conventional migration mechanisms do not consider the speed discrepancy and SSD's wear-out issue, which not only causes significant performance degradation but also shortens SSD's lifetime. This paper, for the first time, addresses the efficiency of VM live storage migration in heterogeneous storage environments from a multi-dimensional perspective, i.e., user experience, device wearing, and manageability. We derive a flexible metric (migration cost), which captures various design preference. Based on that, we propose and prototype three new storage migration strategies, namely: (1) Low Redundancy (LR), which generates the least amount of redundant writes; (2) Source-based Low Redundancy (SLR), which keeps the balance between IO performance and write redundancy; and (3) Asynchronous IO Mirroring, which seeks the highest IO performance. The evaluation of our prototyped system shows that our techniques outperform existing live storage migration by a significant margin. Furthermore, by adaptively mixing our proposed schemes, the cost of massive VM live storage migration can be even lower than that of only using the best of individual mechanism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Song:2013:PLM, author = "Xiang Song and Jicheng Shi and Ran Liu and Jian Yang and Haibo Chen", title = "Parallelizing live migration of virtual machines", journal = j-SIGPLAN, volume = "48", number = "7", pages = "85--96", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Live VM migration is one of the major primitive operations to manage virtualized cloud platforms. Such operation is usually mission-critical and disruptive to the running services, and thus should be completed as fast as possible. Unfortunately, with the increasing amount of resources configured to a VM, such operations are becoming increasingly time-consuming. In this paper, we make a comprehensive analysis on the parallelization opportunities of live VM migration on two popular open-source VMMs (i.e., Xen and KVM). By leveraging abundant resources like CPU cores and NICs in contemporary server platforms, we design and implement a system called PMigrate that leverages data parallelism and pipeline parallelism to parallelize the operation. As the parallelization framework requires intensive mmap/munmap operations that tax the address space management system in an operating system, we further propose an abstraction called range lock, which improves scalability of concurrent mutation to the address space of an operating system (i.e., Linux) by selectively replacing the per-process address space lock inside kernel with dynamic and fine-grained range locks that exclude costly operations on the requesting address range from using the per-process lock. Evaluation with our working prototype on Xen and KVM shows that PMigrate accelerates the live VM migration ranging from 2.49X to 9.88X, and decreases the downtime ranging from 1.9X to 279.89X. Performance analysis shows that our integration of range lock to Linux significantly improves parallelism in mutating the address space in VM migration and thus boosts the performance ranging from 2.06X to 3.05X. We also show that PMigrate makes only small disruption to other co-hosted production VMs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fu:2013:EUD, author = "Yangchun Fu and Zhiqiang Lin", title = "{EXTERIOR}: using a dual-{VM} based external shell for guest-{OS} introspection, configuration, and recovery", journal = j-SIGPLAN, volume = "48", number = "7", pages = "97--110", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "This paper presents EXTERIOR, a dual-VM architecture based external shell that can be used for trusted, timely out-of-VM management of guest-OS such as introspection, configuration, and recovery. Inspired by recent advances in virtual machine introspection (VMI), EXTERIOR leverages an isolated, secure virtual machine (SVM) to introspect the kernel state of a guest virtual machine (GVM). However, it goes far beyond the read-only capability of the traditional VMI, and can perform automatic, fine-grained guest-OS writable operations. The key idea of EXTERIOR is to use a dual-VM architecture in which a SVM runs a kernel identical to that of the GVM to create the necessary environment for a running process (e.g., {\tt rmmod}, {\rr bkill}), and dynamically and transparently redirect and update the memory state at the VMM layer from SVM to GVM, thereby achieving the same effect in terms of kernel state updates of running the same trusted in-VM program inside the shell of GVM. A proof-of-concept EXTERIOR has been implemented. The experimental results show that EXTERIOR can be used for a timely administration of guest-OS, including introspection and (re)configuration of the guest-OS state and timely response of kernel malware intrusions, without any user account in the guest-OS.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dai:2013:LVM, author = "Yuehua Dai and Yong Qi and Jianbao Ren and Yi Shi and Xiaoguang Wang and Xuan Yu", title = "A lightweight {VMM} on many core for high performance computing", journal = j-SIGPLAN, volume = "48", number = "7", pages = "111--120", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Traditional Virtual Machine Monitor (VMM) virtualizes some devices and instructions, which induces performance overhead to guest operating systems. Furthermore, the virtualization contributes a large amount of codes to VMM, which makes a VMM prone to bugs and vulnerabilities. On the other hand, in cloud computing, cloud service provider configures virtual machines based on requirements which are specified by customers in advance. As resources in a multi-core server increase to more than adequate in the future, virtualization is not necessary although it provides convenience for cloud computing. Based on the above observations, this paper presents an alternative way for constructing a VMM: configuring a booting interface instead of virtualization technology. A lightweight virtual machine monitor --- OSV is proposed based on this idea. OSV can host multiple full functional Linux kernels with little performance overhead. There are only 6 hyper-calls in OSV. The Linux running on top of OSV is intercepted only for the inter-processor interrupts. The resource isolation is implemented with hardware-assist virtualization. The resource sharing is controlled by distributed protocols embedded in current operating systems. We implement a prototype of OSV on AMD Opteron processor based 32-core servers with SVM and cache-coherent NUMA architectures. OSV can host up to 8 Linux kernels on the server with less than 10 lines of code modifications to Linux kernel. OSV has about 8000 lines of code which can be easily tuned and debugged. The experiment results show that OSV VMM has 23.7\% performance improvement compared with Xen VMM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yamada:2013:TFT, author = "Hiroshi Yamada and Kenji Kono", title = "Traveling forward in time to newer operating systems using {ShadowReboot}", journal = j-SIGPLAN, volume = "48", number = "7", pages = "121--130", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Operating system (OS) reboots are an essential part of updating kernels and applications on laptops and desktop PCs. Long downtime during OS reboots severely disrupts users' computational activities. This long disruption discourages the users from conducting OS reboots, failing to enforce them to conduct software updates. This paper presents ShadowReboot, a virtual machine monitor (VMM)-based approach that shortens downtime of OS reboots in software updates. ShadowReboot conceals OS reboot activities from user's applications by spawning a VM dedicated to an OS reboot and systematically producing the rebooted state where the updated kernel and applications are ready for use. ShadowReboot provides an illusion to the users that the guest OS travels forward in time to the rebooted state. ShadowReboot offers the following advantages. It can be used to apply patches to the kernels and even system configuration updates. Next, it does not require any special patch requiring detailed knowledge about the target kernels. Lastly, it does not require any target kernel modification. We implemented a prototype in VirtualBox 4.0.10 OSE. Our experimental results show that ShadowReboot successfully updated software on unmodified commodity OS kernels and shortened the downtime of commodity OS reboots on five Linux distributions (Fedora, Ubuntu, Gentoo, Cent, and SUSE) by 91 to 98\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jantz:2013:PPO, author = "Michael R. Jantz and Prasad A. Kulkarni", title = "Performance potential of optimization phase selection during dynamic {JIT} compilation", journal = j-SIGPLAN, volume = "48", number = "7", pages = "131--142", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Phase selection is the process of customizing the applied set of compiler optimization phases for individual functions or programs to improve performance of generated code. Researchers have recently developed novel feature-vector based heuristic techniques to perform phase selection during online JIT compilation. While these heuristics improve program startup speed, steady-state performance was not seen to benefit over the default fixed single sequence baseline. Unfortunately, it is still not conclusively known whether this lack of steady-state performance gain is due to a failure of existing online phase selection heuristics, or because there is, indeed, little or no speedup to be gained by phase selection in online JIT environments. The goal of this work is to resolve this question, while examining the phase selection related behavior of optimizations, and assessing and improving the effectiveness of existing heuristic solutions. We conduct experiments to find and understand the potency of the factors that can cause the phase selection problem in JIT compilers. Next, using long-running genetic algorithms we determine that program-wide and method-specific phase selection in the HotSpot JIT compiler can produce ideal steady-state performance gains of up to 15\% (4.3\% average) and 44\% (6.2\% average) respectively. We also find that existing state-of-the-art heuristic solutions are unable to realize these performance gains (in our experimental setup), discuss possible causes, and show that exploiting knowledge of optimization phase behavior can help improve such heuristic solutions. Our work develops a robust open-source production-quality framework using the HotSpot JVM to further explore this problem in the future.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lameed:2013:MAS, author = "Nurudeen A. Lameed and Laurie J. Hendren", title = "A modular approach to on-stack replacement in {LLVM}", journal = j-SIGPLAN, volume = "48", number = "7", pages = "143--154", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "On-stack replacement (OSR) is a technique that allows a virtual machine to interrupt running code during the execution of a function/method, to re-optimize the function on-the-fly using an optimizing JIT compiler, and then to resume the interrupted function at the point and state at which it was interrupted. OSR is particularly useful for programs with potentially long-running loops, as it allows dynamic optimization of those loops as soon as they become hot. This paper presents a modular approach to implementing OSR for the LLVM compiler infrastructure. This is an important step forward because LLVM is gaining popular support, and adding the OSR capability allows compiler developers to develop new dynamic techniques. In particular, it will enable more sophisticated LLVM-based JIT compiler approaches. Indeed, other compiler/VM developers can use our approach because it is a clean modular addition to the standard LLVM distribution. Further, our approach is defined completely at the LLVM-IR level and thus does not require any modifications to the target code generation. The OSR implementation can be used by different compilers to support a variety of dynamic optimizations. As a demonstration of our OSR approach, we have used it to support dynamic inlining in McVM. McVM is a virtual machine for MATLAB which uses a LLVM-based JIT compiler. MATLAB is a popular dynamic language for scientific and engineering applications that typically manipulate large matrices and often contain long-running loops, and is thus an ideal target for dynamic JIT compilation and OSRs. Using our McVM example, we demonstrate reasonable overheads for our benchmark set, and performance improvements when using it to perform dynamic inlining.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jantz:2013:FAG, author = "Michael R. Jantz and Carl Strickland and Karthik Kumar and Martin Dimitrov and Kshitij A. Doshi", title = "A framework for application guidance in virtual memory systems", journal = j-SIGPLAN, volume = "48", number = "7", pages = "155--166", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451543", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "This paper proposes a collaborative approach in which applications can provide guidance to the operating system regarding allocation and recycling of physical memory. The operating system incorporates this guidance to decide which physical page should be used to back a particular virtual page. The key intuition behind this approach is that application software, as a generator of memory accesses, is best equipped to inform the operating system about the relative access rates and overlapping patterns of usage of its own address space. It is also capable of steering its own algorithms in order to keep its dynamic memory footprint under check when there is a need to reduce power or to contain the spillover effects from bursts in demand. Application software, working cooperatively with the operating system, can therefore help the latter schedule memory more effectively and efficiently than when the operating system is forced to act alone without such guidance. It is particularly difficult to achieve power efficiency without application guidance since power expended in memory is a function not merely of the intensity with which memory is accessed in time but also how many physical ranks are affected by an application's memory usage. Our framework introduces an abstraction called ``colors'' for the application to communicate its intent to the operating system. We modify the operating system to receive this communication in an efficient way, and to organize physical memory pages into intermediate level grouping structures called ``trays'' which capture the physically independent access channels and self-refresh domains, so that it can apply this guidance without entangling the application in lower level details of power or bandwidth management. This paper describes how we re-architect the memory management of a recent Linux kernel to realize a three way collaboration between hardware, supervisory software, and application tasks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2013:TVR, author = "Chen Chen and Petros Maniatis and Adrian Perrig and Amit Vasudevan and Vyas Sekar", title = "Towards verifiable resource accounting for outsourced computation", journal = j-SIGPLAN, volume = "48", number = "7", pages = "167--178", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451546", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Outsourced computation services should ideally only charge customers for the resources used by their applications. Unfortunately, no verifiable basis for service providers and customers to reconcile resource accounting exists today. This leads to undesirable outcomes for both providers and consumers-providers cannot prove to customers that they really devoted the resources charged, and customers cannot verify that their invoice maps to their actual usage. As a result, many practical and theoretical attacks exist, aimed at charging customers for resources that their applications did not consume. Moreover, providers cannot charge consumers precisely, which causes them to bear the cost of unaccounted resources or pass these costs inefficiently to their customers. We introduce ALIBI, a first step toward a vision for verifiable resource accounting. ALIBI places a minimal, trusted reference monitor underneath the service provider's software platform. This monitor observes resource allocation to customers' guest virtual machines and reports those observations to customers, for verifiable reconciliation. In this paper, we show that ALIBI efficiently and verifiably tracks guests' memory use and CPU-cycle consumption.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhou:2013:LPC, author = "Ruijin Zhou and Tao Li", title = "Leveraging phase change memory to achieve efficient virtual machine execution", journal = j-SIGPLAN, volume = "48", number = "7", pages = "179--190", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451547", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "Virtualization technology is being widely adopted by servers and data centers in the cloud computing era to improve resource utilization and energy efficiency. Nevertheless, the heterogeneous memory demands from multiple virtual machines (VM) make it more challenging to design efficient memory systems. Even worse, mission critical VM management activities (e.g. checkpointing) could incur significant runtime overhead due to intensive IO operations. In this paper, we propose to leverage the adaptable and non-volatile features of the emerging phase change memory (PCM) to achieve efficient virtual machine execution. Towards this end, we exploit VM-aware PCM management mechanisms, which (1) smartly tune SLC/MLC page allocation within a single VM and across different VMs and (2) keep critical checkpointing pages in PCM to reduce I/O traffic. Experimental results show that our single VM design (IntraVM) improves performance by 10\% and 20\% compared to pure SLC- and MLC- based systems. Further incorporating VM-aware resource management schemes (IntraVM+InterVM) increases system performance by 15\%. In addition, our design saves 46\% of checkpoint/restore duration and reduces 50\% of overall IO penalty to the system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ouyang:2013:PTS, author = "Jiannan Ouyang and John R. Lange", title = "Preemptable ticket spinlocks: improving consolidated performance in the cloud", journal = j-SIGPLAN, volume = "48", number = "7", pages = "191--200", month = jul, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517326.2451549", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:55:17 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "VEE '13 Conference proceedings.", abstract = "When executing inside a virtual machine environment, OS level synchronization primitives are faced with significant challenges due to the scheduling behavior of the underlying virtual machine monitor. Operations that are ensured to last only a short amount of time on real hardware, are capable of taking considerably longer when running virtualized. This change in assumptions has significant impact when an OS is executing inside a critical region that is protected by a spinlock. The interaction between OS level spinlocks and VMM scheduling is known as the Lock Holder Preemption problem and has a significant impact on overall VM performance. However, with the use of ticket locks instead of generic spinlocks, virtual environments must also contend with waiters being preempted before they are able to acquire the lock. This has the effect of blocking access to a lock, even if the lock itself is available. We identify this scenario as the Lock Waiter Preemption problem. In order to solve both problems we introduce Preemptable Ticket spinlocks, a new locking primitive that is designed to enable a VM to always make forward progress by relaxing the ordering guarantees offered by ticket locks. We show that the use of Preemptable Ticket spinlocks improves VM performance by 5.32X on average, when running on a non paravirtual VMM, and by 7.91X when running on a VMM that supports a paravirtual locking interface, when executing a set of microbenchmarks as well as a realistic e-commerce benchmark.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yang:2013:PSC, author = "Chao Yang and Wei Xue and Haohuan Fu and Lin Gan and Linfeng Li and Yangtong Xu and Yutong Lu and Jiachang Sun and Guangwen Yang and Weimin Zheng", title = "A peta-scalable {CPU-GPU} algorithm for global atmospheric simulations", journal = j-SIGPLAN, volume = "48", number = "8", pages = "1--12", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Developing highly scalable algorithms for global atmospheric modeling is becoming increasingly important as scientists inquire to understand behaviors of the global atmosphere at extreme scales. Nowadays, heterogeneous architecture based on both processors and accelerators is becoming an important solution for large-scale computing. However, large-scale simulation of the global atmosphere brings a severe challenge to the development of highly scalable algorithms that fit well into state-of-the-art heterogeneous systems. Although successes have been made on GPU-accelerated computing in some top-level applications, studies on fully exploiting heterogeneous architectures in global atmospheric modeling are still very less to be seen, due in large part to both the computational difficulties of the mathematical models and the requirement of high accuracy for long term simulations. In this paper, we propose a peta-scalable hybrid algorithm that is successfully applied in a cubed-sphere shallow-water model in global atmospheric simulations. We employ an adjustable partition between CPUs and GPUs to achieve a balanced utilization of the entire hybrid system, and present a pipe-flow scheme to conduct conflict-free inter-node communication on the cubed-sphere geometry and to maximize communication-computation overlap. Systematic optimizations for multithreading on both GPU and CPU sides are performed to enhance computing throughput and improve memory efficiency. Our experiments demonstrate nearly ideal strong and weak scalabilities on up to 3,750 nodes of the Tianhe-1A. The largest run sustains a performance of 0.8 Pflops in double precision (32\% of the peak performance), using 45,000 CPU cores and 3,750 GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lifflander:2013:APF, author = "Jonathan Lifflander and Phil Miller and Laxmikant Kale", title = "Adoption protocols for fanout-optimal fault-tolerant termination detection", journal = j-SIGPLAN, volume = "48", number = "8", pages = "13--22", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Termination detection is relevant for signaling completion (all processors are idle and no messages are in flight) of many operations in distributed systems, including work stealing algorithms, dynamic data exchange, and dynamically structured computations. In the face of growing supercomputers with increasing likelihood that each job may encounter faults, it is important for high-performance computing applications that rely on termination detection that such an algorithm be able to tolerate the inevitable faults. We provide a trio of new practical fault tolerance schemes for a standard approach to termination detection that are easy to implement, present low overhead in both theory and practice, and have scalable costs when recovering from faults. These schemes tolerate all single-process faults, and are probabilistically tolerant of faults affecting multiple processes. We combine the theoretical failure probabilities we can calculate for each algorithm with historical fault records from real machines to show that these algorithms have excellent overall survivability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yuki:2013:ADA, author = "Tomofumi Yuki and Paul Feautrier and Sanjay Rajopadhye and Vijay Saraswat", title = "Array dataflow analysis for polyhedral {X10} programs", journal = j-SIGPLAN, volume = "48", number = "8", pages = "23--34", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "This paper addresses the static analysis of an important class of X10 programs, namely those with finish/async parallelism, and affine loops and array reference structure as in the polyhedral model. For such programs our analysis can certify whenever a program is deterministic or flags races. Our key contributions are (i) adaptation of array dataflow analysis from the polyhedral model to programs with finish/async parallelism, and (ii) use of the array dataflow analysis result to certify determinacy. We distinguish our work from previous approaches by combining the precise statement instance-wise and array element-wise analysis capability of the polyhedral model with finish/async programs that are more expressive than DOALL parallelism commonly considered in the polyhedral literature. We show that our approach is exact (no false negative/positives) and more precise than previous approaches, but is limited to programs that fit the polyhedral model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Prountzos:2013:BCA, author = "Dimitrios Prountzos and Keshav Pingali", title = "Betweenness centrality: algorithms and implementations", journal = j-SIGPLAN, volume = "48", number = "8", pages = "35--46", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Betweenness centrality is an important metric in the study of social networks, and several algorithms for computing this metric exist in the literature. This paper makes three contributions. First, we show that the problem of computing betweenness centrality can be formulated abstractly in terms of a small set of operators that update the graph. Second, we show that existing parallel algorithms for computing betweenness centrality can be viewed as implementations of different schedules for these operators, permitting all these algorithms to be formulated in a single framework. Third, we derive a new asynchronous parallel algorithm for betweenness centrality that (i) works seamlessly for both weighted and unweighted graphs, (ii) can be applied to large graphs, and (iii) is able to extract large amounts of parallelism. We implemented this algorithm and compared it against a number of publicly available implementations of previous algorithms on two different multicore architectures. Our results show that the new algorithm is the best performing one in most cases, particularly for large graphs and large thread counts, and is always competitive against other algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Xiang:2013:CAM, author = "Lingxiang Xiang and Michael Lee Scott", title = "Compiler aided manual speculation for high performance concurrent data structures", journal = j-SIGPLAN, volume = "48", number = "8", pages = "47--56", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Speculation is a well-known means of increasing parallelism among concurrent methods that are usually but not always independent. Traditional nonblocking data structures employ a particularly restrictive form of speculation. Software transactional memory (STM) systems employ a much more general---though typically blocking---form, and there is a wealth of options in between. Using several different concurrent data structures as examples, we show that manual addition of speculation to traditional lock-based code can lead to significant performance improvements. Successful speculation requires careful consideration of profitability, and of how and when to validate consistency. Unfortunately, it also requires substantial modifications to code structure and a deep understanding of the memory model. These latter requirements make it difficult to use in its purely manual form, even for expert programmers. To simplify the process, we present a compiler tool, CSpec, that automatically generates speculative code from baseline lock-based code with user annotations. Compiler-aided manual speculation keeps the original code structure for better readability and maintenance, while providing the flexibility to chose speculation and validation strategies. Experiments on UltraSPARC and x86 platforms demonstrate that with a small number annotations added to lock-based code, CSpec can generate speculative code that matches the performance of best-effort hand-written versions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wu:2013:CAA, author = "Bo Wu and Zhijia Zhao and Eddy Zheng Zhang and Yunlian Jiang and Xipeng Shen", title = "Complexity analysis and algorithm design for reorganizing data to minimize non-coalesced memory accesses on {GPU}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "57--68", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "The performance of Graphic Processing Units (GPU) is sensitive to irregular memory references. Some recent work shows the promise of data reorganization for eliminating non-coalesced memory accesses that are caused by irregular references. However, all previous studies have employed simple, heuristic methods to determine the new data layouts to create. As a result, they either do not provide any performance guarantee or are effective to only some limited scenarios. This paper contributes a fundamental study to the problem. It systematically analyzes the inherent complexity of the problem in various settings, and for the first time, proves that the problem is NP-complete. It then points out the limitations of existing techniques and reveals that in practice, the essence for designing an appropriate data reorganization algorithm can be reduced to a tradeoff among space, time, and complexity. Based on that insight, it develops two new data reorganization algorithms to overcome the limitations of previous methods. Experiments show that an assembly composed of the new algorithms and a previous algorithm can circumvent the inherent complexity in finding optimal data layouts, making it feasible to minimize non-coalesced memory accesses for a variety of irregular applications and settings that are beyond the reach of existing techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Le:2013:CEW, author = "Nhat Minh L{\^e} and Antoniu Pop and Albert Cohen and Francesco Zappa Nardelli", title = "Correct and efficient work-stealing for weak memory models", journal = j-SIGPLAN, volume = "48", number = "8", pages = "69--80", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Chase and Lev's concurrent deque is a key data structure in shared-memory parallel programming and plays an essential role in work-stealing schedulers. We provide the first correctness proof of an optimized implementation of Chase and Lev's deque on top of the POWER and ARM architectures: these provide very relaxed memory models, which we exploit to improve performance but considerably complicate the reasoning. We also study an optimized x86 and a portable C11 implementation, conducting systematic experiments to evaluate the impact of memory barrier optimizations. Our results demonstrate the benefits of hand tuning the deque code when running on top of relaxed memory models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bergstrom:2013:DOF, author = "Lars Bergstrom and Matthew Fluet and Mike Rainey and John Reppy and Stephen Rosen and Adam Shaw", title = "Data-only flattening for nested data parallelism", journal = j-SIGPLAN, volume = "48", number = "8", pages = "81--92", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Data parallelism has proven to be an effective technique for high-level programming of a certain class of parallel applications, but it is not well suited to irregular parallel computations. Blelloch and others proposed nested data parallelism (NDP) as a language mechanism for programming irregular parallel applications in a declarative data-parallel style. The key to this approach is a compiler transformation that flattens the NDP computation and data structures into a form that can be executed efficiently on a wide-vector SIMD architecture. Unfortunately, this technique is ill suited to execution on today's multicore machines. We present a new technique, called data-only flattening, for the compilation of NDP, which is suitable for multicore architectures. Data-only flattening transforms nested data structures in order to expose programs to various optimizations while leaving control structures intact. We present a formal semantics of data-only flattening in a core language with a rewriting system. We demonstrate the effectiveness of this technique in the Parallel ML implementation and we report encouraging experimental results across various benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morozov:2013:DMT, author = "Dmitriy Morozov and Gunther Weber", title = "Distributed merge trees", journal = j-SIGPLAN, volume = "48", number = "8", pages = "93--102", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Improved simulations and sensors are producing datasets whose increasing complexity exhausts our ability to visualize and comprehend them directly. To cope with this problem, we can detect and extract significant features in the data and use them as the basis for subsequent analysis. Topological methods are valuable in this context because they provide robust and general feature definitions. As the growth of serial computational power has stalled, data analysis is becoming increasingly dependent on massively parallel machines. To satisfy the computational demand created by complex datasets, algorithms need to effectively utilize these computer architectures. The main strength of topological methods, their emphasis on global information, turns into an obstacle during parallelization. We present two approaches to alleviate this problem. We develop a distributed representation of the merge tree that avoids computing the global tree on a single processor and lets us parallelize subsequent queries. To account for the increasing number of cores per processor, we develop a new data structure that lets us take advantage of multiple shared-memory cores to parallelize the work on a single node. Finally, we present experiments that illustrate the strengths of our approach as well as help identify future challenges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morrison:2013:FCQ, author = "Adam Morrison and Yehuda Afek", title = "Fast concurrent queues for x86 processors", journal = j-SIGPLAN, volume = "48", number = "8", pages = "103--112", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442527", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Conventional wisdom in designing concurrent data structures is to use the most powerful synchronization primitive, namely compare-and-swap (CAS), and to avoid contended hot spots. In building concurrent FIFO queues, this reasoning has led researchers to propose combining-based concurrent queues. This paper takes a different approach, showing how to rely on fetch-and-add (F\&A), a less powerful primitive that is available on x86 processors, to construct a nonblocking (lock-free) linearizable concurrent FIFO queue which, despite the F\&A being a contended hot spot, outperforms combining-based implementations by 1.5x to 2.5x in all concurrency levels on an x86 server with four multicore processors, in both single-processor and multi-processor executions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wamhoff:2013:FIP, author = "Jons-Tobias Wamhoff and Christof Fetzer and Pascal Felber and Etienne Rivi{\`e}re and Gilles Muller", title = "{FastLane}: improving performance of software transactional memory for low thread counts", journal = j-SIGPLAN, volume = "48", number = "8", pages = "113--122", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442528", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Software transactional memory (STM) can lead to scalable implementations of concurrent programs, as the relative performance of an application increases with the number of threads that support it. However, the absolute performance is typically impaired by the overheads of transaction management and instrumented accesses to shared memory. This often leads STM-based programs with low thread counts to perform worse than a sequential, non-instrumented version of the same application. In this paper, we propose FastLane, a new STM algorithm that bridges the performance gap between sequential execution and classical STM algorithms when running on few cores. FastLane seeks to reduce instrumentation costs and thus performance degradation in its target operation range. We introduce a novel algorithm that differentiates between two types of threads: One thread (the master) executes transactions pessimistically without ever aborting, thus with minimal instrumentation and management costs, while other threads (the helpers) can commit speculative transactions only when they do not conflict with the master. Helpers thus contribute to the application progress without impairing on the performance of the master. We implement FastLane as an extension of a state-of-the-art STM runtime system and compiler. Multiple code paths are produced for execution on a single, few, and many cores. The runtime system selects the code path providing the best throughput, depending on the number of cores available on the target machine. Evaluation results indicate that our approach provides promising performance at low thread counts: FastLane almost systematically wins over a classical STM in the 1-6 threads range, and often performs better than sequential execution of the non-instrumented version of the same application starting with 2 threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Barthe:2013:RVS, author = "Gilles Barthe and Juan Manuel Crespo and Sumit Gulwani and Cesar Kunz and Mark Marron", title = "From relational verification to {SIMD} loop synthesis", journal = j-SIGPLAN, volume = "48", number = "8", pages = "123--134", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Existing pattern-based compiler technology is unable to effectively exploit the full potential of SIMD architectures. We present a new program synthesis based technique for auto-vectorizing performance critical innermost loops. Our synthesis technique is applicable to a wide range of loops, consistently produces performant SIMD code, and generates correctness proofs for the output code. The synthesis technique, which leverages existing work on relational verification methods, is a novel combination of deductive loop restructuring, synthesis condition generation and a new inductive synthesis algorithm for producing loop-free code fragments. The inductive synthesis algorithm wraps an optimized depth-first exploration of code sequences inside a CEGIS loop. Our technique is able to quickly produce SIMD implementations (up to 9 instructions in 0.12 seconds) for a wide range of fundamental looping structures. The resulting SIMD implementations outperform the original loops by 2.0x-3.7x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shun:2013:LLG, author = "Julian Shun and Guy E. Blelloch", title = "{Ligra}: a lightweight graph processing framework for shared memory", journal = j-SIGPLAN, volume = "48", number = "8", pages = "135--146", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442530", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "There has been significant recent interest in parallel frameworks for processing graphs due to their applicability in studying social networks, the Web graph, networks in biology, and unstructured meshes in scientific simulation. Due to the desire to process large graphs, these systems have emphasized the ability to run on distributed memory machines. Today, however, a single multicore server can support more than a terabyte of memory, which can fit graphs with tens or even hundreds of billions of edges. Furthermore, for graph algorithms, shared-memory multicores are generally significantly more efficient on a per core, per dollar, and per joule basis than distributed memory systems, and shared-memory algorithms tend to be simpler than their distributed counterparts. In this paper, we present a lightweight graph processing framework that is specific for shared-memory parallel/multicore machines, which makes graph traversal algorithms easy to write. The framework has two very simple routines, one for mapping over edges and one for mapping over vertices. Our routines can be applied to any subset of the vertices, which makes the framework useful for many graph traversal algorithms that operate on subsets of the vertices. Based on recent ideas used in a very fast algorithm for breadth-first search (BFS), our routines automatically adapt to the density of vertex sets. We implement several algorithms in this framework, including BFS, graph radii estimation, graph connectivity, betweenness centrality, PageRank and single-source shortest paths. Our algorithms expressed using this framework are very simple and concise, and perform almost as well as highly optimized code. Furthermore, they get good speedups on a 40-core machine and are significantly more efficient than previously reported results using graph frameworks on machines with many more cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nasre:2013:MAG, author = "Rupesh Nasre and Martin Burtscher and Keshav Pingali", title = "Morph algorithms on {GPUs}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "147--156", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "There is growing interest in using GPUs to accelerate graph algorithms such as breadth-first search, computing page-ranks, and finding shortest paths. However, these algorithms do not modify the graph structure, so their implementation is relatively easy compared to general graph algorithms like mesh generation and refinement, which morph the underlying graph in non-trivial ways by adding and removing nodes and edges. We know relatively little about how to implement morph algorithms efficiently on GPUs. In this paper, we present and study four morph algorithms: (i) a computational geometry algorithm called Delaunay Mesh Refinement (DMR), (ii) an approximate SAT solver called Survey Propagation (SP), (iii) a compiler analysis called Points-To Analysis (PTA), and (iv) Boruvka's Minimum Spanning Tree algorithm (MST). Each of these algorithms modifies the graph data structure in different ways and thus poses interesting challenges. We overcome these challenges using algorithmic and GPU-specific optimizations. We propose efficient techniques to perform concurrent subgraph addition, subgraph deletion, conflict detection and several optimizations to improve the scalability of morph algorithms. For an input mesh with 10 million triangles, our DMR code achieves an 80x speedup over the highly optimized serial Triangle program and a 2.3x speedup over a multicore implementation running with 48 threads. Our SP code is 3x faster than a multicore implementation with 48 threads on an input with 1 million literals. The PTA implementation is able to analyze six SPEC 2000 benchmark programs in just 74 milliseconds, achieving a geometric mean speedup of 9.3x over a 48-thread multicore version. Our MST code is slower than a multicore version with 48 threads for sparse graphs but significantly faster for denser graphs. This work provides several insights into how other morph algorithms can be efficiently implemented on GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Calciu:2013:NAR, author = "Irina Calciu and Dave Dice and Yossi Lev and Victor Luchangco and Virendra J. Marathe and Nir Shavit", title = "{NUMA}-aware reader-writer locks", journal = j-SIGPLAN, volume = "48", number = "8", pages = "157--166", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442532", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Non-Uniform Memory Access (NUMA) architectures are gaining importance in mainstream computing systems due to the rapid growth of multi-core multi-chip machines. Extracting the best possible performance from these new machines will require us to revisit the design of the concurrent algorithms and synchronization primitives which form the building blocks of many of today's applications. This paper revisits one such critical synchronization primitive --- the reader-writer lock. We present what is, to the best of our knowledge, the first family of reader-writer lock algorithms tailored to NUMA architectures. We present several variations which trade fairness between readers and writers for higher concurrency among readers and better back-to-back batching of writers from the same NUMA node. Our algorithms leverage the lock cohorting technique to manage synchronization between writers in a NUMA-friendly fashion, binary flags to coordinate readers and writers, and simple distributed reader counter implementations to enable NUMA-friendly concurrency among readers. The end result is a collection of surprisingly simple NUMA-aware algorithms that outperform the state-of-the-art reader-writer locks by up to a factor of 10 in our microbenchmark experiments. To evaluate our algorithms in a realistic setting we also present performance results of the {\tt kccachetest} benchmark of the Kyoto-Cabinet distribution, an open-source database which makes heavy use of pthread reader-writer locks. Our locks boost the performance of {\tt kccachetest} by up to 40\% over the best prior alternatives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2013:OAO, author = "Zizhong Chen", title = "{Online-ABFT}: an online algorithm based fault tolerance scheme for soft error detection in iterative methods", journal = j-SIGPLAN, volume = "48", number = "8", pages = "167--176", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Soft errors are one-time events that corrupt the state of a computing system but not its overall functionality. Large supercomputers are especially susceptible to soft errors because of their large number of components. Soft errors can generally be detected offline through the comparison of the final computation results of two duplicated computations, but this approach often introduces significant overhead. This paper presents Online-ABFT, a simple but efficient online soft error detection technique that can detect soft errors in the widely used Krylov subspace iterative methods in the middle of the program execution so that the computation efficiency can be improved through the termination of the corrupted computation in a timely manner soon after a soft error occurs. Based on a simple verification of orthogonality and residual, Online-ABFT is easy to implement and highly efficient. Experimental results demonstrate that, when this online error detection approach is used together with checkpointing, it improves the time to obtain correct results by up to several orders of magnitude over the traditional offline approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Friedley:2013:OPE, author = "Andrew Friedley and Torsten Hoefler and Greg Bronevetsky and Andrew Lumsdaine and Ching-Chen Ma", title = "Ownership passing: efficient distributed memory programming on multi-core systems", journal = j-SIGPLAN, volume = "48", number = "8", pages = "177--186", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "The number of cores in multi- and many-core high-performance processors is steadily increasing. MPI, the de-facto standard for programming high-performance computing systems offers a distributed memory programming model. MPI's semantics force a copy from one process' send buffer to another process' receive buffer. This makes it difficult to achieve the same performance on modern hardware than shared memory programs which are arguably harder to maintain and debug. We propose generalizing MPI's communication model to include ownership passing, which make it possible to fully leverage the shared memory hardware of multi- and many-core CPUs to stream communicated data concurrently with the receiver's computations on it. The benefits and simplicity of message passing are retained by extending MPI with calls to send (pass) ownership of memory regions, instead of their contents, between processes. Ownership passing is achieved with a hybrid MPI implementation that runs MPI processes as threads and is mostly transparent to the user. We propose an API and a static analysis technique to transform legacy MPI codes automatically and transparently to the programmer, demonstrating that this scheme is easy to use in practice. Using the ownership passing technique, we see up to 51\% communication speedups over a standard message passing implementation on state-of-the art multicore systems. Our analysis and interface will lay the groundwork for future development of MPI-aware optimizing compilers and multi-core specific optimizations, which will be key for success in current and next-generation computing platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Meyerovich:2013:PSS, author = "Leo A. Meyerovich and Matthew E. Torok and Eric Atkinson and Rastislav Bodik", title = "Parallel schedule synthesis for attribute grammars", journal = j-SIGPLAN, volume = "48", number = "8", pages = "187--196", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "We examine how to synthesize a parallel schedule of structured traversals over trees. In our system, programs are declaratively specified as attribute grammars. Our synthesizer automatically, correctly, and quickly schedules the attribute grammar as a composition of parallel tree traversals. Our downstream compiler optimizes for GPUs and multicore CPUs. We provide support for designing efficient schedules. First, we introduce a declarative language of schedules where programmers may constrain any part of the schedule and the synthesizer will complete and autotune the rest. Furthermore, the synthesizer answers debugging queries about how schedules may be completed. We evaluate our approach with two case studies. First, we created the first parallel schedule for a large fragment of CSS and report a 3X multicore speedup. Second, we created an interactive GPU-accelerated animation of over 100,000 nodes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Deo:2013:PSA, author = "Mrinal Deo and Sean Keely", title = "Parallel suffix array and least common prefix for the {GPU}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "197--206", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Suffix Array (SA) is a data structure formed by sorting the suffixes of a string into lexicographic order. SAs have been used in a variety of applications, most notably in pattern matching and Burrows--Wheeler Transform (BWT) based lossless data compression. SAs have also become the data structure of choice for many, if not all, string processing problems to which suffix tree methodology is applicable. Over the last two decades researchers have proposed many suffix array construction algorithm (SACAs). We do a systematic study of the main classes of SACAs with the intent of mapping them onto a data parallel architecture like the GPU. We conclude that skew algorithm [12], a linear time recursive algorithm, is the best candidate for GPUs as all its phases can be efficiently mapped to a data parallel hardware. Our OpenCL implementation of skew algorithm achieves a throughput of up to 25 MStrings/sec and a speedup of up to 34x and 5.8x over a single threaded CPU implementation using a discrete GPU and APU respectively. We also compare our OpenCL implementation against the fastest known CPU implementation based on induced copying and achieve a speedup of up to 3.7x. Using SA we construct BWT on GPU and achieve a speedup of 11x over the fastest known BWT on GPU. Suffix arrays are often augmented with the longest common prefix (LCP) information. We design a novel high-performance parallel algorithm for computing LCP on the GPU. Our GPU implementation of LCP achieves a speedup of up to 25x and 4.3x on discrete GPU and APU respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2013:SDR, author = "Yufei Chen and Haibo Chen", title = "Scalable deterministic replay in a parallel full-system emulator", journal = j-SIGPLAN, volume = "48", number = "8", pages = "207--218", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Full-system emulation has been an extremely useful tool in developing and debugging systems software like operating systems and hypervisors. However, current full-system emulators lack the support for deterministic replay, which limits the reproducibility of concurrency bugs that is indispensable for analyzing and debugging the essentially multi-threaded systems software. This paper analyzes the challenges in supporting deterministic replay in parallel full-system emulators and makes a comprehensive study on the sources of non-determinism. Unlike application-level replay systems, our system, called ReEmu, needs to log sources of non-determinism in both the guest software stack and the dynamic binary translator for faithful replay. To provide scalable and efficient record and replay on multicore machines, ReEmu makes several notable refinements to the CREW protocol that replays shared memory systems. First, being aware of the performance bottlenecks in frequent lock operations in the CREW protocol, ReEmu refines the CREW protocol with a seqlock-like design, to avoid serious contention and possible starvation in instrumentation code tracking dependence of racy accesses on a shared memory object. Second, to minimize the required log files, ReEmu only logs minimal local information regarding accesses to a shared memory location, but instead relies on an offline log processing tool to derive precise shared memory dependence for faithful replay. Third, ReEmu adopts an automatic lock clustering mechanism that clusters a set of uncontended memory objects to a bulk to reduce the frequencies of lock operations, which noticeably boost performance. Our prototype ReEmu is based on our open-source COREMU system and supports scalable and efficient record and replay of full-system environments (both x64 and ARM). Performance evaluation shows that ReEmu has very good performance scalability on an Intel multicore machine. It incurs only 68.9\% performance overhead on average (ranging from 51.8\% to 94.7\%) over vanilla COREMU to record five PARSEC benchmarks running on a 16-core emulated system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Acar:2013:SPP, author = "Umut A. Acar and Arthur Chargueraud and Mike Rainey", title = "Scheduling parallel programs by work stealing with private deques", journal = j-SIGPLAN, volume = "48", number = "8", pages = "219--228", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442538", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Work stealing has proven to be an effective method for scheduling parallel programs on multicore computers. To achieve high performance, work stealing distributes tasks between concurrent queues, called deques, which are assigned to each processor. Each processor operates on its deque locally except when performing load balancing via steals. Unfortunately, concurrent deques suffer from two limitations: (1) local deque operations require expensive memory fences in modern weak-memory architectures, (2) they can be very difficult to extend to support various optimizations and flexible forms of task distribution strategies needed many applications, e.g., those that do not fit nicely into the divide-and-conquer, nested data parallel paradigm. For these reasons, there has been a lot recent interest in implementations of work stealing with non-concurrent deques, where deques remain entirely private to each processor and load balancing is performed via message passing. Private deques eliminate the need for memory fences from local operations and enable the design and implementation of efficient techniques for reducing task-creation overheads and improving task distribution. These advantages, however, come at the cost of communication. It is not known whether work stealing with private deques enjoys the theoretical guarantees of concurrent deques and whether they can be effective in practice. In this paper, we propose two work-stealing algorithms with private deques and prove that the algorithms guarantee similar theoretical bounds as work stealing with concurrent deques. For the analysis, we use a probabilistic model and consider a new parameter, the branching depth of the computation. We present an implementation of the algorithm as a C++ library and show that it compares well to Cilk on a range of benchmarks. Since our approach relies on private deques, it enables implementing flexible task creation and distribution strategies. As a specific example, we show how to implement task coalescing and steal-half strategies, which can be important in fine-grain, non-divide-and-conquer algorithms such as graph algorithms, and apply them to the depth-first-search problem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yan:2013:SFS, author = "Shengen Yan and Guoping Long and Yunquan Zhang", title = "{StreamScan}: fast scan algorithms for {GPUs} without global barrier synchronization", journal = j-SIGPLAN, volume = "48", number = "8", pages = "229--238", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Scan (also known as prefix sum) is a very useful primitive for various important parallel algorithms, such as sort, BFS, SpMV, compaction and so on. Current state of the art of GPU based scan implementation consists of three consecutive Reduce-Scan-Scan phases. This approach requires at least two global barriers and 3N (N is the problem size) global memory accesses. In this paper we propose StreamScan, a novel approach to implement scan on GPUs with only one computation phase. The main idea is to restrict synchronization to only adjacent workgroups, and thereby eliminating global barrier synchronization completely. The new approach requires only 2N global memory accesses and just one kernel invocation. On top of this we propose two important optimizations to further boost performance speedups, namely thread grouping to eliminate unnecessary local barriers, and register optimization to expand the on chip problem size. We designed an auto-tuning framework to search the parameter space automatically to generate highly optimized codes for both AMD and Nvidia GPUs. We implemented our technique with OpenCL. Compared with previous fast scan implementations, experimental results not only show promising performance speedups, but also reveal dramatic different optimization tradeoffs between Nvidia and AMD GPU platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Heumann:2013:TEM, author = "Stephen T. Heumann and Vikram S. Adve and Shengjie Wang", title = "The tasks with effects model for safe concurrency", journal = j-SIGPLAN, volume = "48", number = "8", pages = "239--250", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442540", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Today's widely-used concurrent programming models either provide weak safety guarantees, making it easy to write code with subtle errors, or are limited in the class of programs that they can express. We propose a new concurrent programming model based on tasks with effects that offers strong safety guarantees while still providing the flexibility needed to support the many ways that concurrency is used in complex applications. The core unit of work in our model is a dynamically-created task. The model's key feature is that each task has programmer-specified effects, and a run-time scheduler is used to ensure that two tasks are run concurrently only if they have non-interfering effects. Through the combination of statically verifying the declared effects of tasks and using an effect-aware run-time scheduler, our model is able to guarantee strong safety properties, including data race freedom and atomicity. It is also possible to use our model to write programs and computations that can be statically proven to behave deterministically. We describe the tasks with effects programming model and provide a formal dynamic semantics for it. We also describe our implementation of this model in an extended version of Java and evaluate its use in several programs exhibiting various patterns of concurrency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bonetta:2013:TPE, author = "Daniele Bonetta and Walter Binder and Cesare Pautasso", title = "{TigerQuoll}: parallel event-based {JavaScript}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "251--260", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "JavaScript, the most popular language on the Web, is rapidly moving to the server-side, becoming even more pervasive. Still, JavaScript lacks support for shared memory parallelism, making it challenging for developers to exploit multicores present in both servers and clients. In this paper we present TigerQuoll, a novel API and runtime for parallel programming in JavaScript. TigerQuoll features an event-based API and a parallel runtime allowing applications to exploit a mutable shared memory space. The programming model of TigerQuoll features automatic consistency and concurrency management, such that developers do not have to deal with shared-data synchronization. TigerQuoll supports an innovative transaction model that allows for eventual consistency to speed up high-contention workloads. Experiments show that TigerQuoll applications scale well, allowing one to implement common parallelism patterns in JavaScript.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dice:2013:UHT, author = "Dave Dice and Yossi Lev and Yujie Liu and Victor Luchangco and Mark Moir", title = "Using hardware transactional memory to correct and simplify and readers-writer lock algorithm", journal = j-SIGPLAN, volume = "48", number = "8", pages = "261--270", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442542", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Designing correct synchronization algorithms is notoriously difficult, as evidenced by a bug we have identified that has apparently gone unnoticed in a well-known synchronization algorithm for nearly two decades. We use hardware transactional memory (HTM) to construct a corrected version of the algorithm. This version is significantly simpler than the original and furthermore improves on it by eliminating usage constraints and reducing space requirements. Performance of the HTM-based algorithm is competitive with the original in ``normal'' conditions, but it does suffer somewhat under heavy contention. We successfully apply some optimizations to help close this gap, but we also find that they are incompatible with known techniques for improving progress properties. We discuss ways in which future HTM implementations may address these issues. Finally, although our focus is on how effectively HTM can correct and simplify the algorithm, we also suggest bug fixes and workarounds that do not depend on HTM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cascaval:2013:ZPW, author = "Calin Cascaval and Seth Fowler and Pablo Montesinos-Ortego and Wayne Piekarski and Mehrdad Reshadi and Behnam Robatmili and Michael Weber and Vrajesh Bhavsar", title = "{ZOOMM}: a parallel {Web} browser engine for multicore mobile devices", journal = j-SIGPLAN, volume = "48", number = "8", pages = "271--280", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442543", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "We explore the challenges in expressing and managing concurrency in browsers on mobile devices. Browsers are complex applications that implement multiple standards, need to support legacy behavior, and are highly dynamic and interactive. We present ZOOMM, a highly concurrent web browser engine prototype and show how concurrency is effectively exploited at different levels: speed up computation performance, preload network resources, and preprocess resources outside the critical path of page loading. On a dual-core Android mobile device we demonstrate that ZOOMM is two times faster than the native WebKit based browser when loading the set of pages defined in the Vellamo benchmark.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Grasso:2013:APS, author = "Ivan Grasso and Klaus Kofler and Biagio Cosenza and Thomas Fahringer", title = "Automatic problem size sensitive task partitioning on heterogeneous parallel systems", journal = j-SIGPLAN, volume = "48", number = "8", pages = "281--282", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "In this paper we propose a novel approach which automatizes task partitioning in heterogeneous systems. Our framework is based on the Insieme Compiler and Runtime infrastructure. The compiler translates a single-device OpenCL program into a multi-device OpenCL program. The runtime system then performs dynamic task partitioning based on an offline-generated prediction model. In order to derive the prediction model, we use a machine learning approach that incorporates static program features as well as dynamic, input sensitive features. Our approach has been evaluated over a suite of 23 programs and achieves performance improvements compared to an execution of the benchmarks on a single CPU and a single GPU only.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2013:DLO, author = "Jun Liu and Wei Ding and Ohyoung Jang and Mahmut Kandemir", title = "Data layout optimization for {GPGPU} architectures", journal = j-SIGPLAN, volume = "48", number = "8", pages = "283--284", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442546", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "GPUs are being widely used in accelerating general-purpose applications, leading to the emergence of GPGPU architectures. New programming models, e.g., Compute Unified Device Architecture (CUDA), have been proposed to facilitate programming general-purpose computations in GPGPUs. However, writing high-performance CUDA codes manually is still tedious and difficult. In particular, the organization of the data in the memory space can greatly affect the performance due to the unique features of a custom GPGPU memory hierarchy. In this work, we propose an automatic data layout transformation framework to solve the key issues associated with a GPGPU memory hierarchy (i.e., channel skewing, data coalescing, and bank conflicts). Our approach employs a widely applicable strategy based on a novel concept called data localization. Specifically, we try to optimize the layout of the arrays accessed in affine loop nests, for both the device memory and shared memory, at both coarse grain and fine grain parallelization levels. We performed an experimental evaluation of our data layout optimization strategy using 15 benchmarks on an NVIDIA CUDA GPU device. The results show that the proposed data transformation approach brings around 4.3X speedup on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Padmanabhan:2013:DTO, author = "Shobana Padmanabhan and Yixin Chen and Roger D. Chamberlain", title = "Decomposition techniques for optimal design-space exploration of streaming applications", journal = j-SIGPLAN, volume = "48", number = "8", pages = "285--286", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442547", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Streaming data programs are an important class of applications, for which queueing network models are frequently available. While the design space can be large, decomposition techniques can be effective at design space reduction. We introduce two decomposition techniques called convex decomposition and unchaining and present implications for a biosequence search application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Yu:2013:EDA, author = "Xiaodong Yu and Michela Becchi", title = "Exploring different automata representations for efficient regular expression matching on {GPUs}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "287--288", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Regular expression matching is a central task in several networking (and search) applications and has been accelerated on a variety of parallel architectures. All solutions are based on finite automata (either in deterministic or non-deterministic form), and mostly focus on effective memory representations for such automata. Recently, a handful of work has proposed efficient regular expression matching designs for GPUs; however, most of them aim at achieving good performance on small datasets. Nowadays, practical solutions must support the increased size and complexity of real world datasets. In this work, we explore the deployment and optimization of different GPU designs of regular expression matching engines, focusing on large datasets containing a large number of complex patterns.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Edmonds:2013:EGA, author = "Nick Edmonds and Jeremiah Willcock and Andrew Lumsdaine", title = "Expressing graph algorithms using generalized active messages", journal = j-SIGPLAN, volume = "48", number = "8", pages = "289--290", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442549", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Recently, graph computation has emerged as an important class of high-performance computing application whose characteristics differ markedly from those of traditional, compute-bound, kernels. Libraries such as BLAS, LAPACK, and others have been successful in codifying best practices in numerical computing. The data-driven nature of graph applications necessitates a more complex application stack incorporating runtime optimization. In this paper, we present a method of phrasing graph algorithms as collections of asynchronous, concurrently executing, concise code fragments which may be invoked both locally and in remote address spaces. A runtime layer performs a number of dynamic optimizations, including message coalescing, message combining, and software routing. Practical implementations and performance results are provided for a number of representative algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lu:2013:MLP, author = "Ligang Lu and Karen Magerlein", title = "Multi-level parallel computing of reverse time migration for seismic imaging on {Blue Gene/Q}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "291--292", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442550", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Blue Gene/Q (BG/Q) is an early representative of increasing scale and thread count that will characterize future HPC systems: large counts of nodes, cores, and threads; and a rich programming environment with many degrees of freedom in parallel computing optimization. So it is both a challenge and an opportunity to it to accelerate the seismic imaging applications to the unprecedented levels that will significantly advance the technologies for the oil and gas industry. In this work we aim to address two important questions: how HPC systems with high levels of scale and thread count will perform in real applications; and how systems with many degrees of freedom in parallel programming can be calibrated to achieve optimal performance. Based on BG/Q's architecture features and RTM workload characteristics, we developed massive domain partition, MPI, and SIMD Our detailed deep analyses in various aspects of optimization also provide valuable experience and insights into how can be utilized to facilitate the advance of seismic imaging technologies. Our BG/Q RTM solution achieved a 14.93x speedup over the BG/P implementation. Our multi-level parallelism strategies for Reverse Time Migration (RTM) seismic imaging computing on BG/Q provides an example of how HPC systems like BG/Q can accelerate applications to a new level.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Park:2013:PPB, author = "Changhee Park and Guy L. {Steele, Jr.} and Jean-Baptiste Tristan", title = "Parallel programming with big operators", journal = j-SIGPLAN, volume = "48", number = "8", pages = "293--294", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442551", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "In the sciences, it is common to use the so-called ``big operator'' notation to express the iteration of a binary operator (the reducer) over a collection of values. Such a notation typically assumes that the reducer is associative and abstracts the iteration process. Consequently, from a programming point-of-view, we can organize the reducer operations to minimize the depth of the overall reduction, allowing a potentially parallel evaluation of a big operator expression. We believe that the big operator notation is indeed an effective construct to express parallel computations in the Generate/Map/Reduce programming model, and our goal is to introduce it in programming languages to support parallel programming. The effective definition of such a big operator expression requires a simple way to generate elements, and a simple way to declare algebraic properties of the reducer (such as its identity, or its commutativity). In this poster, we want to present an extension of Scala with support for big operator expressions. We show how big operator expressions are defined and how the API is organized to support the simple definition of reducers with their algebraic properties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Afek:2013:PHL, author = "Yehuda Afek and Amir Levy and Adam Morrison", title = "Programming with hardware lock elision", journal = j-SIGPLAN, volume = "48", number = "8", pages = "295--296", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442552", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "We present a simple yet effective technique for improving performance of lock-based code using the hardware lock elision (HLE) feature in Intel's upcoming Haswell processor. We also describe how to extend Haswell's HLE mechanism to achieve a similar effect to our lock elision scheme entirely in hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lu:2013:REM, author = "Kai Lu and Xu Zhou and Xiaoping Wang and Wenzhe Zhang and Gen Li", title = "{RaceFree}: an efficient multi-threading model for determinism", journal = j-SIGPLAN, volume = "48", number = "8", pages = "297--298", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442553", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Current deterministic systems generally incur large overhead due to the difficulty of detecting and eliminating data races. This paper presents RaceFree, a novel multi-threading runtime that adopts a relaxed deterministic model to provide a data-race-free environment for parallel programs. This model cuts off unnecessary shared-memory communication by isolating threads in separated memories, which eliminates direct data races. Meanwhile, we leverage the happen-before relation defined by applications themselves as one-way communication pipes to perform necessary thread communication. Shared-memory communication is transparently converted to message-passing style communication by our Memory Modification Propagation (MMP) mechanism, which propagates local memory modifications to other threads through the happen-before relation pipes. The overhead of RaceFree is 67.2\% according to our tests on parallel benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shun:2013:RCT, author = "Julian Shun and Guy E. Blelloch and Jeremy T. Fineman and Phillip B. Gibbons", title = "Reducing contention through priority updates", journal = j-SIGPLAN, volume = "48", number = "8", pages = "299--300", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442554", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Diamos:2013:RAM, author = "Gregory Diamos and Haicheng Wu and Jin Wang and Ashwin Lele and Sudhakar Yalamanchili", title = "Relational algorithms for multi-bulk-synchronous processors", journal = j-SIGPLAN, volume = "48", number = "8", pages = "301--302", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442555", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Relational databases remain an important application infrastructure for organizing and analyzing massive volumes of data. At the same time, processor architectures are increasingly gravitating towards Multi-Bulk-Synchronous processor (Multi-BSP) architectures employing throughput-optimized memory systems, lightweight multi-threading, and Single-Instruction Multiple-Data (SIMD) core organizations. This paper explores the mapping of primitive relational algebra operations onto such architectures to improve the throughput of data warehousing applications built on relational databases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Carvalho:2013:RET, author = "Fernando Miguel Carvalho and Jo{\~a}o Cachopo", title = "Runtime elision of transactional barriers for captured memory", journal = j-SIGPLAN, volume = "48", number = "8", pages = "303--304", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442556", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "In this paper, we propose a new technique that can identify transaction-local memory (i.e. captured memory ), in managed environments, while having a low runtime overhead. We implemented our proposal in a well known STM framework (Deuce) and we tested it in STMBench7 with two different STMs: TL2 and LSA. In both STMs the performance improved significantly (4 times and 2.6 times, respectively). Moreover, running the STAMP benchmarks with our approach shows improvements of 7 times in the best case for the Vacation application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Park:2013:SDR, author = "Chang-Seo Park and Koushik Sen and Costin Iancu", title = "Scalable data race detection for partitioned global address space programs", journal = j-SIGPLAN, volume = "48", number = "8", pages = "305--306", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Contemporary and future programming languages for HPC promote hybrid parallelism and shared memory abstractions using a global address space. In this programming style, data races occur easily and are notoriously hard to find. Previous work on data race detection for shared memory programs reports 10X-100X slowdowns for non-scientific programs. Previous work on distributed memory programs instruments only communication operations. In this paper we present the first complete implementation of data race detection at scale for UPC programs. Our implementation tracks local and global memory references in the program and it uses two techniques to reduce the overhead: (1) hierarchical function and instruction level sampling; and (2) exploiting the runtime persistence of aliasing and locality specific to Partitioned Global Address Space applications. The results indicate that both techniques are required in practice: well optimized instruction sampling introduces overheads as high as 6500\% (65X slowdown), while each technique in separation is able to reduce it to 1000\% (10X slowdown). When applying the optimizations in conjunction our tool finds all previously known data races in our benchmark programs with at most 50\% overhead. Furthermore, while previous results illustrate the benefits of function level sampling, our experiences show that this technique does not work for scientific programs: instruction sampling or a hybrid approach is required.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dice:2013:SSC, author = "Dave Dice and Yossi Lev and Mark Moir", title = "Scalable statistics counters", journal = j-SIGPLAN, volume = "48", number = "8", pages = "307--308", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442558", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Naive statistics counters that are commonly used to monitor system events and performance become a scalability bottleneck as systems become larger and more NUMA; furthermore some are so inaccurate that they are not useful. We present a number of techniques to address these problems, evaluating solutions in terms of performance, scalability, space overhead, and accuracy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wozniak:2013:SSD, author = "Justin M. Wozniak and Timothy G. Armstrong and Michael Wilde and Daniel S. Katz and Ewing Lusk and Ian T. Foster", title = "{Swift/T}: scalable data flow programming for many-task applications", journal = j-SIGPLAN, volume = "48", number = "8", pages = "309--310", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442559", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Swift/T, a novel programming language implementation for highly scalable data flow programs, is presented.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cai:2013:TST, author = "Yan Cai and Ke Zhai and Shangru Wu and W. K. Chan", title = "{TeamWork}: synchronizing threads globally to detect real deadlocks for multithreaded programs", journal = j-SIGPLAN, volume = "48", number = "8", pages = "311--312", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442560", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "This paper presents the aim of TeamWork, our ongoing effort to develop a comprehensive dynamic deadlock confirmation tool for multithreaded programs. It also presents a refined object abstraction algorithm that refines the existing stack hash abstraction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{ElMehdiDiouri:2013:TEE, author = "Mohammed {El Mehdi Diouri} and Olivier Gl{\"u}ck and Laurent Lef{\`e}vre and Franck Cappello", title = "Towards an energy estimator for fault tolerance protocols", journal = j-SIGPLAN, volume = "48", number = "8", pages = "313--314", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442561", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Checkpointing protocols have different energy consumption depending on parameters like application features and platform characteristics. To select a protocol for a given execution, we propose an energy estimator that relies on an energy calibration of the considered platform and a user description of the execution settings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wimmer:2013:WSC, author = "Martin Wimmer and Daniel Cederman and Jesper Larsson Tr{\"a}ff and Philippas Tsigas", title = "Work-stealing with configurable scheduling strategies", journal = j-SIGPLAN, volume = "48", number = "8", pages = "315--316", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442562", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Work-stealing systems are typically oblivious to the nature of the tasks they are scheduling. They do not know or take into account how long a task will take to execute or how many subtasks it will spawn. Moreover, task execution order is typically determined by an underlying task storage data structure, and cannot be changed. There are thus possibilities for optimizing task parallel executions by providing information on specific tasks and their preferred execution order to the scheduling system. We investigate generalizations of work-stealing and introduce a framework enabling applications to dynamically provide hints on the nature of specific tasks using scheduling strategies. Strategies can be used to independently control both local task execution and steal order. Strategies allow optimizations on specific tasks, in contrast to more conventional scheduling policies that are typically global in scope. Strategies are composable and allow different, specific scheduling choices for different parts of an application simultaneously. We have implemented a work-stealing system based on our strategy framework. A series of benchmarks demonstrates beneficial effects that can be achieved with scheduling strategies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhou:2013:WED, author = "Bowen Zhou and Milind Kulkarni and Saurabh Bagchi", title = "{WuKong}: effective diagnosis of bugs at large system scales", journal = j-SIGPLAN, volume = "48", number = "8", pages = "317--318", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "A key challenge in developing large scale applications (both in system size and in input size) is finding bugs that are latent at the small scales of testing, only manifesting when a program is deployed at large scales. Traditional statistical techniques fail because no error-free run is available at deployment scales for training purposes. Prior work used scaling models to detect anomalous behavior at large scales without being trained on correct behavior at that scale. However, that work cannot localize bugs automatically. In this paper, we extend that work in three ways: (i) we develop an automatic diagnosis technique, based on feature reconstruction; (ii) we design a heuristic to effectively prune the feature space; and (iii) we validate our design through one fault-injection study, finding that our system can effectively localize bugs in a majority of cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Norell:2013:IPD, author = "Ulf Norell", title = "Interactive programming with dependent types", journal = j-SIGPLAN, volume = "48", number = "9", pages = "1--2", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500610", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In dependently typed languages run-time values can appear in types, making it possible to give programs more precise types than in languages without dependent types. This can range from keeping track of simple invariants like the length of a list, to full functional correctness. In addition to having some correctness guarantees on the final program, assigning more precise types to programs means that you can get more assistance from the type checker while writing them. This is what I focus on here, demonstrating how the programming environment of Agda can help you when developing dependently typed programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Traytel:2013:VDP, author = "Dmitriy Traytel and Tobias Nipkow", title = "Verified decision procedures for {MSO} on words based on derivatives of regular expressions", journal = j-SIGPLAN, volume = "48", number = "9", pages = "3--12", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500612", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Monadic second-order logic on finite words (MSO) is a decidable yet expressive logic into which many decision problems can be encoded. Since MSO formulas correspond to regular languages, equivalence of MSO formulas can be reduced to the equivalence of some regular structures (e.g. automata). This paper presents a verified functional decision procedure for MSO formulas that is not based on automata but on regular expressions. Functional languages are ideally suited for this task: regular expressions are data types and functions on them are defined by pattern matching and recursion and are verified by structural induction. Decision procedures for regular expression equivalence have been formalized before, usually based on Brzozowski derivatives. Yet, for a straightforward embedding of MSO formulas into regular expressions an extension of regular expressions with a projection operation is required. We prove total correctness and completeness of an equivalence checker for regular expressions extended in that way. We also define a language-preserving translation of formulas into regular expressions with respect to two different semantics of MSO. Our results have been formalized and verified in the theorem prover Isabelle. Using Isabelle's code generation facility, this yields purely functional, formally verified programs that decide equivalence of MSO formulas.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Broadbent:2013:CSC, author = "Christopher Broadbent and Arnaud Carayol and Matthew Hague and Olivier Serre", title = "{C-SHORe}: a collapsible approach to higher-order verification", journal = j-SIGPLAN, volume = "48", number = "9", pages = "13--24", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500589", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Higher-order recursion schemes (HORS) have recently received much attention as a useful abstraction of higher-order functional programs with a number of new verification techniques employing HORS model-checking as their centrepiece. This paper contributes to the ongoing quest for a truly scalable model-checker for HORS by offering a different, automata theoretic perspective. We introduce the first practical model-checking algorithm that acts on a generalisation of pushdown automata equi-expressive with HORS called collapsible pushdown systems (CPDS). At its core is a substantial modification of a recently studied saturation algorithm for CPDS. In particular it is able to use information gathered from an approximate forward reachability analysis to guide its backward search. Moreover, we introduce an algorithm that prunes the CPDS prior to model-checking and a method for extracting counter-examples in negative instances. We compare our tool with the state-of-the-art verification tools for HORS and obtain encouraging results. In contrast to some of the main competition tackling the same problem, our algorithm is fixed-parameter tractable, and we also offer significantly improved performance over the only previously published tool of which we are aware that also enjoys this property. The tool and additional material are available from http://cshore.cs.rhul.ac.uk.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Petersen:2013:ASV, author = "Leaf Petersen and Dominic Orchard and Neal Glew", title = "Automatic {SIMD} vectorization for {Haskell}", journal = j-SIGPLAN, volume = "48", number = "9", pages = "25--36", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500605", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Expressing algorithms using immutable arrays greatly simplifies the challenges of automatic SIMD vectorization, since several important classes of dependency violations cannot occur. The Haskell programming language provides libraries for programming with immutable arrays, and compiler support for optimizing them to eliminate the overhead of intermediate temporary arrays. We describe an implementation of automatic SIMD vectorization in a Haskell compiler which gives substantial vector speedups for a range of programs written in a natural programming style. We compare performance with that of programs compiled by the Glasgow Haskell Compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Mainland:2013:EVI, author = "Geoffrey Mainland and Roman Leshchinskiy and Simon Peyton Jones", title = "Exploiting vector instructions with generalized stream fusion", journal = j-SIGPLAN, volume = "48", number = "9", pages = "37--48", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500601", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Stream fusion is a powerful technique for automatically transforming high-level sequence-processing functions into efficient implementations. It has been used to great effect in Haskell libraries for manipulating byte arrays, Unicode text, and unboxed vectors. However, some operations, like vector append, still do not perform well within the standard stream fusion framework. Others, like SIMD computation using the SSE and AVX instructions available on modern x86 chips, do not seem to fit in the framework at all. In this paper we introduce generalized stream fusion, which solves these issues. The key insight is to bundle together multiple stream representations, each tuned for a particular class of stream consumer. We also describe a stream representation suited for efficient computation with SSE instructions. Our ideas are implemented in modified versions of the GHC compiler and vector library. Benchmarks show that high-level Haskell code written using our compiler and libraries can produce code that is faster than both compiler- and hand-vectorized C.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{McDonell:2013:OPF, author = "Trevor L. McDonell and Manuel M. T. Chakravarty and Gabriele Keller and Ben Lippmeier", title = "Optimising purely functional {GPU} programs", journal = j-SIGPLAN, volume = "48", number = "9", pages = "49--60", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500595", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Purely functional, embedded array programs are a good match for SIMD hardware, such as GPUs. However, the naive compilation of such programs quickly leads to both code explosion and an excessive use of intermediate data structures. The resulting slow-down is not acceptable on target hardware that is usually chosen to achieve high performance. In this paper, we discuss two optimisation techniques, sharing recovery and array fusion, that tackle code explosion and eliminate superfluous intermediate structures. Both techniques are well known from other contexts, but they present unique challenges for an embedded language compiled for execution on a GPU. We present novel methods for implementing sharing recovery and array fusion, and demonstrate their effectiveness on a set of benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Bernardy:2013:TTC, author = "Jean-Philippe Bernardy and Moulin Guilhem", title = "Type-theory in color", journal = j-SIGPLAN, volume = "48", number = "9", pages = "61--72", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500577", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dependent type-theory aims to become the standard way to formalize mathematics at the same time as displacing traditional platforms for high-assurance programming. However, current implementations of type theory are still lacking, in the sense that some obvious truths require explicit proofs, making type-theory awkward to use for many applications, both in formalization and programming. In particular, notions of erasure are poorly supported. In this paper we propose an extension of type-theory with colored terms, color erasure and interpretation of colored types as predicates. The result is a more powerful type-theory: some definitions and proofs may be omitted as they become trivial, it becomes easier to program with precise types, and some parametricity results can be internalized.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Devriese:2013:TSM, author = "Dominique Devriese and Frank Piessens", title = "Typed syntactic meta-programming", journal = j-SIGPLAN, volume = "48", number = "9", pages = "73--86", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500575", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel set of meta-programming primitives for use in a dependently-typed functional language. The types of our meta-programs provide strong and precise guarantees about their termination, correctness and completeness. Our system supports type-safe construction and analysis of terms, types and typing contexts. Unlike alternative approaches, they are written in the same style as normal programs and use the language's standard functional computational model. We formalise the new meta-programming primitives, implement them as an extension of Agda, and provide evidence of usefulness by means of two compelling applications in the fields of datatype-generic programming and proof tactics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Ziliani:2013:MMT, author = "Beta Ziliani and Derek Dreyer and Neelakantan R. Krishnaswami and Aleksandar Nanevski and Viktor Vafeiadis", title = "{Mtac}: a monad for typed tactic programming in {Coq}", journal = j-SIGPLAN, volume = "48", number = "9", pages = "87--100", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500579", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effective support for custom proof automation is essential for large scale interactive proof development. However, existing languages for automation via *tactics* either (a) provide no way to specify the behavior of tactics within the base logic of the accompanying theorem prover, or (b) rely on advanced type-theoretic machinery that is not easily integrated into established theorem provers. We present Mtac, a lightweight but powerful extension to Coq that supports dependently-typed tactic programming. Mtac tactics have access to all the features of ordinary Coq programming, as well as a new set of typed tactical primitives. We avoid the need to touch the trusted kernel typechecker of Coq by encapsulating uses of these new tactical primitives in a *monad*, and instrumenting Coq so that it executes monadic tactics during type inference.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Dolan:2013:FSF, author = "Stephen Dolan", title = "Fun with semirings: a functional pearl on the abuse of linear algebra", journal = j-SIGPLAN, volume = "48", number = "9", pages = "101--110", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500613", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Describing a problem using classical linear algebra is a very well-known problem-solving technique. If your question can be formulated as a question about real or complex matrices, then the answer can often be found by standard techniques. It's less well-known that very similar techniques still apply where instead of real or complex numbers we have a closed semiring, which is a structure with some analogue of addition and multiplication that need not support subtraction or division. We define a typeclass in Haskell for describing closed semirings, and implement a few functions for manipulating matrices and polynomials over them. We then show how these functions can be used to calculate transitive closures, find shortest or longest or widest paths in a graph, analyse the data flow of imperative programs, optimally pack knapsacks, and perform discrete event simulations, all by just providing an appropriate underlying closed semiring.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Bernardy:2013:EDC, author = "Jean-Philippe Bernardy and Koen Claessen", title = "Efficient divide-and-conquer parsing of practical context-free languages", journal = j-SIGPLAN, volume = "48", number = "9", pages = "111--122", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500576", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a divide-and-conquer algorithm for parsing context-free languages efficiently. Our algorithm is an instance of Valiant's (1975), who reduced the problem of parsing to matrix multiplications. We show that, while the conquer step of Valiant's is O ( n$^3$ ) in the worst case, it improves to O (log n$^3$ ), under certain conditions satisfied by many useful inputs. These conditions occur for example in program texts written by humans. The improvement happens because the multiplications involve an overwhelming majority of empty matrices. This result is relevant to modern computing: divide-and-conquer algorithms can be parallelized relatively easily.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Mairson:2013:FGT, author = "Harry George Mairson", title = "Functional geometry and the {Trait{\'e} de Lutherie}: functional pearl", journal = j-SIGPLAN, volume = "48", number = "9", pages = "123--132", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500617", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a functional programming approach to the design of outlines of eighteenth-century string instruments. The approach is based on the research described in Fran{\c{c}}ois Denis's book, Trait{\'e} de lutherie. The programming vernacular for Denis's instructions, which we call functional geometry, is meant to reiterate the historically justified language and techniques of this musical instrument design. The programming metaphor is entirely Euclidean, involving straightedge and compass constructions, with few (if any) numbers, and no Cartesian equations or grid. As such, it is also an interesting approach to teaching programming and mathematics without numerical calculation or equational reasoning. The advantage of this language-based, functional approach to lutherie is founded in the abstract characterization of common patterns in instrument design. These patterns include not only the abstraction of common straightedge and compass constructions, but of higher-order conceptualization of the instrument design process. We also discuss the role of arithmetic, geometric, harmonic, and subharmonic proportions, and the use of their rational approximants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Brady:2013:PRA, author = "Edwin Brady", title = "Programming and reasoning with algebraic effects and dependent types", journal = j-SIGPLAN, volume = "48", number = "9", pages = "133--144", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500581", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "One often cited benefit of pure functional programming is that pure code is easier to test and reason about, both formally and informally. However, real programs have side-effects including state management, exceptions and interactions with the outside world. Haskell solves this problem using monads to capture details of possibly side-effecting computations --- it provides monads for capturing state, I/O, exceptions, non-determinism, libraries for practical purposes such as CGI and parsing, and many others, as well as monad transformers for combining multiple effects. Unfortunately, useful as monads are, they do not compose very well. Monad transformers can quickly become unwieldy when there are lots of effects to manage, leading to a temptation in larger programs to combine everything into one coarse-grained state and exception monad. In this paper I describe an alternative approach based on handling algebraic effects, implemented in the IDRIS programming language. I show how to describe side effecting computations, how to write programs which compose multiple fine-grained effects, and how, using dependent types, we can use this approach to reason about states in effectful programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Kammar:2013:HA, author = "Ohad Kammar and Sam Lindley and Nicolas Oury", title = "Handlers in action", journal = j-SIGPLAN, volume = "48", number = "9", pages = "145--158", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500590", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Plotkin and Pretnar's handlers for algebraic effects occupy a sweet spot in the design space of abstractions for effectful computation. By separating effect signatures from their implementation, algebraic effects provide a high degree of modularity, allowing programmers to express effectful programs independently of the concrete interpretation of their effects. A handler is an interpretation of the effects of an algebraic computation. The handler abstraction adapts well to multiple settings: pure or impure, strict or lazy, static types or dynamic types. This is a position paper whose main aim is to popularise the handler abstraction. We give a gentle introduction to its use, a collection of illustrative examples, and a straightforward operational semantics. We describe our Haskell implementation of handlers in detail, outline the ideas behind our OCaml, SML, and Racket implementations, and present experimental results comparing handlers with existing code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Jones:2013:CSS, author = "Simon Peyton Jones", title = "Computer science as a school subject", journal = j-SIGPLAN, volume = "48", number = "9", pages = "159--160", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500609", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computer science is one of the richest, most exciting disciplines on the planet, yet any teenager will tell you that ICT (as it is called in UK schools --- ``information and communication technology'') is focused almost entirely on the use and application of computers, and in practice covers nothing about how computers work, nor programming, nor anything of the discipline of computer science as we understand it. Over the last two decades, computing at school has drifted from writing adventure games on the BBC Micro to writing business plans in Excel. This is bad for our young people's education, and it is bad for our economy. Nor is this phenomenon restricted to the UK: many countries are struggling with the same issues. Our young people should be educated not only in the application and use of digital technology, but also in how it works, and its foundational principles. Lacking such knowledge renders them powerless in the face of complex and opaque technology, disenfranchises them from making informed decisions about the digital society, and deprives our nations of a well-qualified stream of students enthusiastic and able to envision and design new digital systems. Can anything be done, given the enormous inertia of our various countries' educational systems? Sometimes, yes. After a decade of stasis, change has come to the UK. Over the last 18 months, there has been a wholesale reform of the English school computing curriculum, and substantial movement in Scotland and Wales. It now seems likely that computer science will, for the first time, become part of every child's education. This change has been driven not by institutions or by the government, but by a grass-roots movement of parents, teachers, university academics, software developers, and others. A key agent in this grass-roots movement---although not the only one---is the Computing At School Working Group (CAS). In this talk I will describe how CAS was born and developed, and the radical changes that have taken place since in the UK. I hope that this may be encouraging for those pushing water uphill in other parts of the world, and I will also try to draw out some lessons from our experience that may be useful to others.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Schmidt-Schauss:2013:CSH, author = "Manfred Schmidt-Schau{\ss} and David Sabel", title = "Correctness of an {STM} {Haskell} implementation", journal = j-SIGPLAN, volume = "48", number = "9", pages = "161--172", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500585", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A concurrent implementation of software transactional memory in Concurrent Haskell using a call-by-need functional language with processes and futures is given. The description of the small-step operational semantics is precise and explicit, and employs an early abort of conflicting transactions. A proof of correctness of the implementation is given for a contextual semantics with may- and should-convergence. This implies that our implementation is a correct evaluator for an abstract specification equipped with a big-step semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Pottier:2013:PPM, author = "Fran{\c{c}}ois Pottier and Jonathan Protzenko", title = "Programming with permissions in {Mezzo}", journal = j-SIGPLAN, volume = "48", number = "9", pages = "173--184", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500598", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Mezzo, a typed programming language of ML lineage. Mezzo is equipped with a novel static discipline of duplicable and affine permissions, which controls aliasing and ownership. This rules out certain mistakes, including representation exposure and data races, and enables new idioms, such as gradual initialization, memory re-use, and (type)state changes. Although the core static discipline disallows sharing a mutable data structure, Mezzo offers several ways of working around this restriction, including a novel dynamic ownership control mechanism which we dub ``adoption and abandon''.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Abel:2013:WRC, author = "Andreas M. Abel and Brigitte Pientka", title = "Wellfounded recursion with copatterns: a unified approach to termination and productivity", journal = j-SIGPLAN, volume = "48", number = "9", pages = "185--196", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500591", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we study strong normalization of a core language based on System F-omega which supports programming with finite and infinite structures. Building on our prior work, finite data such as finite lists and trees are defined via constructors and manipulated via pattern matching, while infinite data such as streams and infinite trees is defined by observations and synthesized via copattern matching. In this work, we take a type-based approach to strong normalization by tracking size information about finite and infinite data in the type. This guarantees compositionality. More importantly, the duality of pattern and copatterns provide a unifying semantic concept which allows us for the first time to elegantly and uniformly support both well-founded induction and coinduction by mere rewriting. The strong normalization proof is structured around Girard's reducibility candidates. As such our system allows for non-determinism and does not rely on coverage. Since System F-omega is general enough that it can be the target of compilation for the Calculus of Constructions, this work is a significant step towards representing observation-centric infinite data in proof assistants such as Coq and Agda.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Atkey:2013:PCG, author = "Robert Atkey and Conor McBride", title = "Productive coprogramming with guarded recursion", journal = j-SIGPLAN, volume = "48", number = "9", pages = "197--208", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500597", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Total functional programming offers the beguiling vision that, just by virtue of the compiler accepting a program, we are guaranteed that it will always terminate. In the case of programs that are not intended to terminate, e.g., servers, we are guaranteed that programs will always be productive. Productivity means that, even if a program generates an infinite amount of data, each piece will be generated in finite time. The theoretical underpinning for productive programming with infinite output is provided by the category theoretic notion of final coalgebras. Hence, we speak of co programming with non-well-founded co data, as a dual to programming with well-founded data like finite lists and trees. Systems that offer facilities for productive coprogramming, such as the proof assistants Coq and Agda, currently do so through syntactic guardedness checkers. Syntactic guardedness checkers ensure that all self-recursive calls are guarded by a use of a constructor. Such a check ensures productivity. Unfortunately, these syntactic checks are not compositional, and severely complicate coprogramming. Guarded recursion, originally due to Nakano, is tantalising as a basis for a flexible and compositional type-based approach to coprogramming. However, as we show, by itself, guarded recursion is not suitable for coprogramming due to the fact that there is no way to make finite observations on pieces of infinite data. In this paper, we introduce the concept of clock variables that index Nakano's guarded recursion. Clock variables allow us to ``close over'' the generation of infinite data, and to make finite observations, something that is not possible with guarded recursion alone.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Hinze:2013:USR, author = "Ralf Hinze and Nicolas Wu and Jeremy Gibbons", title = "Unifying structured recursion schemes", journal = j-SIGPLAN, volume = "48", number = "9", pages = "209--220", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500578", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Folds over inductive datatypes are well understood and widely used. In their plain form, they are quite restricted; but many disparate generalisations have been proposed that enjoy similar calculational benefits. There have also been attempts to unify the various generalisations: two prominent such unifications are the 'recursion schemes from comonads' of Uustalu, Vene and Pardo, and our own 'adjoint folds'. Until now, these two unified schemes have appeared incompatible. We show that this appearance is illusory: in fact, adjoint folds subsume recursion schemes from comonads. The proof of this claim involves standard constructions in category theory that are nevertheless not well known in functional programming: Eilenberg-Moore categories and bialgebras.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Krishnaswami:2013:HOF, author = "Neelakantan R. Krishnaswami", title = "Higher-order functional reactive programming without spacetime leaks", journal = j-SIGPLAN, volume = "48", number = "9", pages = "221--232", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500588", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional reactive programming (FRP) is an elegant approach to declaratively specify reactive systems. However, the powerful abstractions of FRP have historically made it difficult to predict and control the resource usage of programs written in this style. In this paper, we give a new language for higher-order reactive programming. Our language generalizes and simplifies prior type systems for reactive programming, by supporting the use of streams of streams, first-class functions, and higher-order operations. We also support many temporal operations beyond streams, such as terminatable streams, events, and even resumptions with first-class schedulers. Furthermore, our language supports an efficient implementation strategy permitting us to eagerly deallocate old values and statically rule out spacetime leaks, a notorious source of inefficiency in reactive programs. Furthermore, these memory guarantees are achieved without the use of a complex substructural type discipline. We also show that our implementation strategy of eager deallocation is safe, by showing the soundness of our type system with a novel step-indexed Kripke logical relation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Jeffrey:2013:FRP, author = "Alan Jeffrey", title = "Functional reactive programming with liveness guarantees", journal = j-SIGPLAN, volume = "48", number = "9", pages = "233--244", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500584", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional Reactive Programming (FRP) is an approach to the development of reactive systems which provides a pure functional interface, but which may be implemented as an abstraction of an imperative event-driven layer. FRP systems typically provide a model of behaviours (total time-indexed values, implemented as pull systems) and event sources (partial time-indexed values, implemented as push systems). In this paper, we investigate a type system for event-driven FRP programs which provide liveness guarantees, that is every input event is guaranteed to generate an output event. We show that FRP can be implemented on top of a model of sets and relations, and that the isomorphism between event sources and behaviours corresponds to the isomorphism between relations and set-valued functions. We then implement sets and relations using a model of continuations using the usual double-negation CPS transform. The implementation of behaviours as pull systems based on futures, and of event sources as push systems based on the observer pattern, thus arises from first principles. We also discuss a Java implementation of the FRP model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Morihata:2013:SCP, author = "Akimasa Morihata", title = "A short cut to parallelization theorems", journal = j-SIGPLAN, volume = "48", number = "9", pages = "245--256", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500580", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The third list-homomorphism theorem states that if a function is both foldr and foldl, it has a divide-and-conquer parallel implementation as well. In this paper, we develop a theory for obtaining such parallelization theorems. The key is a new proof of the third list-homomorphism theorem based on shortcut deforestation. The proof implies that there exists a divide-and-conquer parallel program of the form of h ( x ' merge ' y ) = h$_1$ x odot h$_2$ y, where h is the subject of parallelization, merge is the operation of integrating independent substructures, h$_1$ and h$_2$ are computations applied to substructures, possibly in parallel, and odot merges the results calculated for substructures, if (i) h can be specified by two certain forms of iterative programs, and (ii) merge can be implemented by a function of a certain polymorphic type. Therefore, when requirement (ii) is fulfilled, h has a divide-and-conquer implementation if h has two certain forms of implementations. We show that our approach is applicable to structure-consuming operations by catamorphisms (folds), structure-generating operations by anamorphisms (unfolds), and their generalizations called hylomorphisms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Axelsson:2013:UCP, author = "Emil Axelsson and Koen Claessen", title = "Using circular programs for higher-order syntax: functional pearl", journal = j-SIGPLAN, volume = "48", number = "9", pages = "257--262", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500614", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This pearl presents a novel technique for constructing a first-order syntax tree directly from a higher-order interface. We exploit circular programming to generate names for new variables, resulting in a simple yet efficient method. Our motivating application is the design of embedded languages supporting variable binding, where it is convenient to use higher-order syntax when constructing programs, but first-order syntax when processing or transforming programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Balabonski:2013:WOM, author = "Thibaut Balabonski", title = "Weak optimality, and the meaning of sharing", journal = j-SIGPLAN, volume = "48", number = "9", pages = "263--274", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500606", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we investigate laziness and optimal evaluation strategies for functional programming languages. We consider the weak lambda-calculus as a basis of functional programming languages, and we adapt to this setting the concepts of optimal reductions that were defined for the full lambda-calculus. We prove that the usual implementation of call-by-need using sharing is optimal, that is, normalizing any lambda-term with call-by-need requires exactly the same number of reduction steps as the shortest reduction sequence in the weak lambda-calculus without sharing. Furthermore, we prove that optimal reduction sequences without sharing are not computable. Hence sharing is the only computable means to reach weak optimality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Weirich:2013:SFE, author = "Stephanie Weirich and Justin Hsu and Richard A. Eisenberg", title = "System {FC} with explicit kind equality", journal = j-SIGPLAN, volume = "48", number = "9", pages = "275--286", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500599", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "System FC, the core language of the Glasgow Haskell Compiler, is an explicitly-typed variant of System F with first-class type equality proofs called coercions. This extensible proof system forms the foundation for type system extensions such as type families (type-level functions) and Generalized Algebraic Datatypes (GADTs). Such features, in conjunction with kind polymorphism and datatype promotion, support expressive compile-time reasoning. However, the core language lacks explicit kind equality proofs. As a result, type-level computation does not have access to kind-level functions or promoted GADTs, the type-level analogues to expression-level features that have been so useful. In this paper, we eliminate such discrepancies by introducing kind equalities to System FC. Our approach is based on dependent type systems with heterogeneous equality and the ``Type-in-Type'' axiom, yet it preserves the metatheoretic properties of FC. In particular, type checking is simple, decidable and syntax directed. We prove the preservation and progress theorems for the extended language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Sculthorpe:2013:CMP, author = "Neil Sculthorpe and Jan Bracker and George Giorgidze and Andy Gill", title = "The constrained-monad problem", journal = j-SIGPLAN, volume = "48", number = "9", pages = "287--298", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500602", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In Haskell, there are many data types that would form monads were it not for the presence of type-class constraints on the operations on that data type. This is a frustrating problem in practice, because there is a considerable amount of support and infrastructure for monads that these data types cannot use. Using several examples, we show that a monadic computation can be restructured into a normal form such that the standard monad class can be used. The technique is not specific to monads, and we show how it can also be applied to other structures, such as applicative functors. One significant use case for this technique is domain-specific languages, where it is often desirable to compile a deep embedding of a computation to some other language, which requires restricting the types that can appear in that computation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Svenningsson:2013:SCR, author = "Josef David Svenningsson and Bo Joel Svensson", title = "Simple and compositional reification of monadic embedded languages", journal = j-SIGPLAN, volume = "48", number = "9", pages = "299--304", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500611", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When writing embedded domain specific languages in Haskell, it is often convenient to be able to make an instance of the Monad class to take advantage of the do-notation and the extensive monad libraries. Commonly it is desirable to compile such languages rather than just interpret them. This introduces the problem of monad reification, i.e. observing the structure of the monadic computation. We present a solution to the monad reification problem and illustrate it with a small robot control language. Monad reification is not new but the novelty of our approach is in its directness, simplicity and compositionality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Hidaka:2013:SRQ, author = "Soichiro Hidaka and Kazuyuki Asada and Zhenjiang Hu and Hiroyuki Kato and Keisuke Nakano", title = "Structural recursion for querying ordered graphs", journal = j-SIGPLAN, volume = "48", number = "9", pages = "305--318", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500608", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Structural recursion, in the form of, for example, folds on lists and catamorphisms on algebraic data structures including trees, plays an important role in functional programming, by providing a systematic way for constructing and manipulating functional programs. It is, however, a challenge to define structural recursions for graph data structures, the most ubiquitous sort of data in computing. This is because unlike lists and trees, graphs are essentially not inductive and cannot be formalized as an initial algebra in general. In this paper, we borrow from the database community the idea of structural recursion on how to restrict recursions on infinite unordered regular trees so that they preserve the finiteness property and become terminating, which are desirable properties for query languages. We propose a new graph transformation language called lambdaFG for transforming and querying ordered graphs, based on the well-defined bisimulation relation on ordered graphs with special epsilon-edges. The language lambdaFG is a higher order graph transformation language that extends the simply typed lambda calculus with graph constructors and more powerful structural recursions, which is extended for transformations on the sibling dimension. It not only gives a general framework for manipulating graphs and reasoning about them, but also provides a solution to the open problem of how to define a structural recursion on ordered graphs, with the help of the bisimilarity for ordered graphs with epsilon-edges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Delaware:2013:MMM, author = "Benjamin Delaware and Steven Keuchel and Tom Schrijvers and Bruno C.d.S. Oliveira", title = "Modular monadic meta-theory", journal = j-SIGPLAN, volume = "48", number = "9", pages = "319--330", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500587", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents 3MT, a framework for modular mechanized meta-theory of languages with effects. Using 3MT, individual language features and their corresponding definitions --- semantic functions, theorem statements and proofs-- can be built separately and then reused to create different languages with fully mechanized meta-theory. 3MT combines modular datatypes and monads to define denotational semantics with effects on a per-feature basis, without fixing the particular set of effects or language constructs. One well-established problem with type soundness proofs for denotational semantics is that they are notoriously brittle with respect to the addition of new effects. The statement of type soundness for a language depends intimately on the effects it uses, making it particularly challenging to achieve modularity. 3MT solves this long-standing problem by splitting these theorems into two separate and reusable parts: a feature theorem that captures the well-typing of denotations produced by the semantic function of an individual feature with respect to only the effects used, and an effect theorem that adapts well-typings of denotations to a fixed superset of effects. The proof of type soundness for a particular language simply combines these theorems for its features and the combination of their effects. To establish both theorems, 3MT uses two key reasoning techniques: modular induction and algebraic laws about effects. Several effectful language features, including references and errors, illustrate the capabilities of 3MT. A case study reuses these features to build fully mechanized definitions and proofs for 28 languages, including several versions of mini-ML with effects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Lorenzen:2013:MAT, author = "Florian Lorenzen and Sebastian Erdweg", title = "Modular and automated type-soundness verification for language extensions", journal = j-SIGPLAN, volume = "48", number = "9", pages = "331--342", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500596", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Language extensions introduce high-level programming constructs that protect programmers from low-level details and repetitive tasks. For such an abstraction barrier to be sustainable, it is important that no errors are reported in terms of generated code. A typical strategy is to check the original user code prior to translation into a low-level encoding, applying the assumption that the translation does not introduce new errors. Unfortunately, such assumption is untenable in general, but in particular in the context of extensible programming languages, such as Racket or SugarJ, that allow regular programmers to define language extensions. In this paper, we present a formalism for building and automatically verifying the type-soundness of syntactic language extensions. To build a type-sound language extension with our formalism, a developer declares an extended syntax, type rules for the extended syntax, and translation rules into the (possibly further extended) base language. Our formalism then validates that the user-defined type rules are sufficient to guarantee that the code generated by the translation rules cannot contain any type errors. This effectively ensures that an initial type check prior to translation precludes type errors in generated code. We have implemented a core system in PLT Redex and we have developed a syntactically extensible variant of System F$_w$ that we extend with let notation, monadic do blocks, and algebraic data types. Our formalism verifies the soundness of each extension automatically.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Keep:2013:NFC, author = "Andrew W. Keep and R. Kent Dybvig", title = "A nanopass framework for commercial compiler development", journal = j-SIGPLAN, volume = "48", number = "9", pages = "343--350", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500618", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Contemporary compilers must typically handle sophisticated high-level source languages, generate efficient code for multiple hardware architectures and operating systems, and support source-level debugging, profiling, and other program development tools. As a result, compilers tend to be among the most complex of software systems. Nanopass frameworks are designed to help manage this complexity. A nanopass compiler is comprised of many single-task passes with formally defined intermediate languages. The perceived downside of a nanopass compiler is that the extra passes will lead to substantially longer compilation times. To determine whether this is the case, we have created a plug replacement for the commercial Chez Scheme compiler, implemented using an updated nanopass framework, and we have compared the speed of the new compiler and the code it generates against the original compiler for a large set of benchmark programs. This paper describes the updated nanopass framework, the new compiler, and the results of our experiments. The compiler produces faster code than the original, averaging 15-27\% depending on architecture and optimization level, due to a more sophisticated but slower register allocator and improvements to several optimizations. Compilation times average well within a factor of two of the original compiler, despite the slower register allocator and the replacement of five passes of the original 10 with over 50 nanopasses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{St-Amour:2013:ERA, author = "Vincent St-Amour and Neil Toronto", title = "Experience report: applying random testing to a base type environment", journal = j-SIGPLAN, volume = "48", number = "9", pages = "351--356", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500616", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As programmers, programming in typed languages increases our confidence in the correctness of our programs. As type system designers, soundness proofs increase our confidence in the correctness of our type systems. There is more to typed languages than their typing rules, however. To be usable, a typed language needs to provide a well-furnished standard library and to specify types for its exports. As software artifacts, these base type environments can rival typecheckers in complexity. Our experience with the Typed Racket base environment---which accounts for 31\% of the code in the Typed Racket implementation---teaches us that writing type environments can be just as error-prone as writing typecheckers. We report on our experience over the past two years of using random testing to increase our confidence in the correctness of the Typed Racket base environment.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Petersen:2013:ERF, author = "Christian L. Petersen and Matthias Gorges and Dustin Dunsmuir and Mark Ansermino and Guy A. Dumont", title = "Experience report: functional programming of {mHealth} applications", journal = j-SIGPLAN, volume = "48", number = "9", pages = "357--362", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500615", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A modular framework for the development of medical applications that promotes deterministic, robust and correct code is presented. The system is based on the portable Gambit Scheme programming language and provides a flexible cross-platform environment for developing graphical applications on mobile devices as well as medical instrumentation interfaces running on embedded platforms. Real world applications of this framework for mobile diagnostics, telemonitoring and automated drug infusions are reported. The source code for the core framework is open source and available at: https://github.com/part-cw/lambdanative.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Delbianco:2013:HSR, author = "Germ{\'a}n Andr{\'e}s Delbianco and Aleksandar Nanevski", title = "{Hoare}-style reasoning with (algebraic) continuations", journal = j-SIGPLAN, volume = "48", number = "9", pages = "363--376", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500593", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Continuations are programming abstractions that allow for manipulating the ``future'' of a computation. Amongst their many applications, they enable implementing unstructured program flow through higher-order control operators such as callcc. In this paper we develop a Hoare-style logic for the verification of programs with higher-order control, in the presence of dynamic state. This is done by designing a dependent type theory with first class callcc and abort operators, where pre- and postconditions of programs are tracked through types. Our operators are algebraic in the sense of Plotkin and Power, and Jaskelioff, to reduce the annotation burden and enable verification by symbolic evaluation. We illustrate working with the logic by verifying a number of characteristic examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Turon:2013:URH, author = "Aaron Turon and Derek Dreyer and Lars Birkedal", title = "Unifying refinement and {Hoare}-style reasoning in a logic for higher-order concurrency", journal = j-SIGPLAN, volume = "48", number = "9", pages = "377--390", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500600", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modular programming and modular verification go hand in hand, but most existing logics for concurrency ignore two crucial forms of modularity: *higher-order functions*, which are essential for building reusable components, and *granularity abstraction*, a key technique for hiding the intricacies of fine-grained concurrent data structures from the clients of those data structures. In this paper, we present CaReSL, the first logic to support the use of granularity abstraction for modular verification of higher-order concurrent programs. After motivating the features of CaReSL through a variety of illustrative examples, we demonstrate its effectiveness by using it to tackle a significant case study: the first formal proof of (partial) correctness for Hendler et al.'s ``flat combining'' algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Chlipala:2013:BSP, author = "Adam Chlipala", title = "The bedrock structured programming system: combining generative metaprogramming and {Hoare} logic in an extensible program verifier", journal = j-SIGPLAN, volume = "48", number = "9", pages = "391--402", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500592", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We report on the design and implementation of an extensible programming language and its intrinsic support for formal verification. Our language is targeted at low-level programming of infrastructure like operating systems and runtime systems. It is based on a cross-platform core combining characteristics of assembly languages and compiler intermediate languages. From this foundation, we take literally the saying that C is a ``macro assembly language'': we introduce an expressive notion of certified low-level macros, sufficient to build up the usual features of C and beyond as macros with no special support in the core. Furthermore, our macros have integrated support for strongest postcondition calculation and verification condition generation, so that we can provide a high-productivity formal verification environment within Coq for programs composed from any combination of macros. Our macro interface is expressive enough to support features that low-level programs usually only access through external tools with no formal guarantees, such as declarative parsing or SQL-inspired querying. The abstraction level of these macros only imposes a compile-time cost, via the execution of functional Coq programs that compute programs in our intermediate language; but the run-time cost is not substantially greater than for more conventional C code. We describe our experiences constructing a full C-like language stack using macros, with some experiments on the verifiability and performance of individual programs running on that stack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Cheney:2013:PTL, author = "James Cheney and Sam Lindley and Philip Wadler", title = "A practical theory of language-integrated query", journal = j-SIGPLAN, volume = "48", number = "9", pages = "403--416", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500586", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Language-integrated query is receiving renewed attention, in part because of its support through Microsoft's LINQ framework. We present a practical theory of language-integrated query based on quotation and normalisation of quoted terms. Our technique supports join queries, abstraction over values and predicates, composition of queries, dynamic generation of queries, and queries with nested intermediate data. Higher-order features prove useful even for constructing first-order queries. We prove a theorem characterising when a host query is guaranteed to generate a single SQL query. We present experimental results confirming our technique works, even in situations where Microsoft's LINQ framework either fails to produce an SQL query or, in one case, produces an avalanche of SQL queries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Garcia:2013:CTB, author = "Ronald Garcia", title = "Calculating threesomes, with blame", journal = j-SIGPLAN, volume = "48", number = "9", pages = "417--428", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500603", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Coercions and threesomes both enable a language to combine static and dynamic types while avoiding cast-based space leaks. Coercion calculi elegantly specify space-efficient cast behavior, even when augmented with blame tracking, but implementing their semantics directly is difficult. Threesomes, on the other hand, have a straightforward recursive implementation, but endowing them with blame tracking is challenging. In this paper, we show that you can use that elegant spec to produce that straightforward implementation: we use the coercion calculus to derive threesomes with blame. In particular, we construct novel threesome calculi for blame tracking strategies that detect errors earlier, catch more errors, and reflect an intuitive conception of safe and unsafe casts based on traditional subtyping.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Dunfield:2013:CEB, author = "Joshua Dunfield and Neelakantan R. Krishnaswami", title = "Complete and easy bidirectional typechecking for higher-rank polymorphism", journal = j-SIGPLAN, volume = "48", number = "9", pages = "429--442", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500582", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Bidirectional typechecking, in which terms either synthesize a type or are checked against a known type, has become popular for its scalability (unlike Damas-Milner type inference, bidirectional typing remains decidable even for very expressive type systems), its error reporting, and its relative ease of implementation. Following design principles from proof theory, bidirectional typing can be applied to many type constructs. The principles underlying a bidirectional approach to polymorphism, however, are less obvious. We give a declarative, bidirectional account of higher-rank polymorphism, grounded in proof theory; this calculus enjoys many properties such as eta-reduction and predictability of annotations. We give an algorithm for implementing the declarative system; our algorithm is remarkably simple and well-behaved, despite being both sound and complete.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Johnson:2013:OAA, author = "J. Ian Johnson and Nicholas Labich and Matthew Might and David {Van Horn}", title = "Optimizing abstract abstract machines", journal = j-SIGPLAN, volume = "48", number = "9", pages = "443--454", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500604", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The technique of abstracting abstract machines (AAM) provides a systematic approach for deriving computable approximations of evaluators that are easily proved sound. This article contributes a complementary step-by-step process for subsequently going from a naive analyzer derived under the AAM approach, to an efficient and correct implementation. The end result of the process is a two to three order-of-magnitude improvement over the systematically derived analyzer, making it competitive with hand-optimized implementations that compute fundamentally less precise results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Hritcu:2013:TNQ, author = "Catalin Hritcu and John Hughes and Benjamin C. Pierce and Antal Spector-Zabusky and Dimitrios Vytiniotis and Arthur Azevedo de Amorim and Leonidas Lampropoulos", title = "Testing noninterference, quickly", journal = j-SIGPLAN, volume = "48", number = "9", pages = "455--468", month = sep, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544174.2500574", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 27 18:32:10 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Information-flow control mechanisms are difficult to design and labor intensive to prove correct. To reduce the time wasted on proof attempts doomed to fail due to broken definitions, we advocate modern random testing techniques for finding counterexamples during the design process. We show how to use QuickCheck, a property-based random-testing tool, to guide the design of a simple information-flow abstract machine. We find that both sophisticated strategies for generating well-distributed random programs and readily falsifiable formulations of noninterference properties are critically important. We propose several approaches and evaluate their effectiveness on a collection of injected bugs of varying subtlety. We also present an effective technique for shrinking large counterexamples to minimal, easily comprehensible ones. Taken together, our best methods enable us to quickly and automatically generate simple counterexamples for all these bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '13 conference proceedings.", } @Article{Lopes:2013:EAP, author = "Cristina V. Lopes", title = "Empirical analysis of programming language adoption", journal = j-SIGPLAN, volume = "48", number = "10", pages = "1--18", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509515", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Some programming languages become widely popular while others fail to grow beyond their niche or disappear altogether. This paper uses survey methodology to identify the factors that lead to language adoption. We analyze large datasets, including over 200,000 SourceForge projects, 590,000 projects tracked by Ohloh, and multiple surveys of 1,000-13,000 programmers. We report several prominent findings. First, language adoption follows a power law; a small number of languages account for most language use, but the programming market supports many languages with niche user bases. Second, intrinsic features have only secondary importance in adoption. Open source libraries, existing code, and experience strongly influence developers when selecting a language for a project. Language features such as performance, reliability, and simple semantics do not. Third, developers will steadily learn and forget languages. The overall number of languages developers are familiar with is independent of age. Finally, when considering intrinsic aspects of languages, developers prioritize expressivity over correctness. They perceive static types as primarily helping with the latter, hence partly explaining the popularity of dynamic languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2013:SSE, author = "You Li and Zhendong Su and Linzhang Wang and Xuandong Li", title = "Steering symbolic execution to less traveled paths", journal = j-SIGPLAN, volume = "48", number = "10", pages = "19--32", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509553", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Symbolic execution is a promising testing and analysis methodology. It systematically explores a program's execution space and can generate test cases with high coverage. One significant practical challenge for symbolic execution is how to effectively explore the enormous number of program paths in real-world programs. Various heuristics have been proposed for guiding symbolic execution, but they are generally inefficient and ad-hoc. In this paper, we introduce a novel, unified strategy to guide symbolic execution to less explored parts of a program. Our key idea is to exploit a specific type of path spectra, namely the length-n subpath program spectra, to systematically approximate full path information for guiding path exploration. In particular, we use frequency distributions of explored length- n subpaths to prioritize ``less traveled'' parts of the program to improve test coverage and error detection. We have implemented our general strategy in KLEE, a state-of-the-art symbolic execution engine. Evaluation results on the GNU Coreutils programs show that (1) varying the length n captures program-specific information and exhibits different degrees of effectiveness, and (2) our general approach outperforms traditional strategies in both coverage and error detection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Carbin:2013:VQR, author = "Michael Carbin and Sasa Misailovic and Martin C. Rinard", title = "Verifying quantitative reliability for programs that execute on unreliable hardware", journal = j-SIGPLAN, volume = "48", number = "10", pages = "33--52", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509546", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Emerging high-performance architectures are anticipated to contain unreliable components that may exhibit soft errors, which silently corrupt the results of computations. Full detection and masking of soft errors is challenging, expensive, and, for some applications, unnecessary. For example, approximate computing applications (such as multimedia processing, machine learning, and big data analytics) can often naturally tolerate soft errors. We present Rely a programming language that enables developers to reason about the quantitative reliability of an application --- namely, the probability that it produces the correct result when executed on unreliable hardware. Rely allows developers to specify the reliability requirements for each value that a function produces. We present a static quantitative reliability analysis that verifies quantitative requirements on the reliability of an application, enabling a developer to perform sound and verified reliability engineering. The analysis takes a Rely program with a reliability specification and a hardware specification that characterizes the reliability of the underlying hardware components and verifies that the program satisfies its reliability specification when executed on the underlying unreliable hardware platform. We demonstrate the application of quantitative reliability analysis on six computations implemented in Rely.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Huang:2013:ECS, author = "Jipeng Huang and Michael D. Bond", title = "Efficient context sensitivity for dynamic analyses via calling context uptrees and customized memory management", journal = j-SIGPLAN, volume = "48", number = "10", pages = "53--72", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "OOPSLA '13 conference proceedings.", abstract = "State-of-the-art dynamic bug detectors such as data race and memory leak detectors report program locations that are likely causes of bugs. However, programmers need more than static program locations to understand the behavior of increasingly complex and concurrent software. Dynamic calling context provides additional information, but it is expensive to record calling context frequently, e.g., at every read and write. Context-sensitive dynamic analyses can build and maintain a calling context tree (CCT) to track calling context--but in order to reuse existing nodes, CCT-based approaches require an expensive lookup. This paper introduces a new approach for context sensitivity that avoids this expensive lookup. The approach uses a new data structure called the calling context uptree (CCU) that adds low overhead by avoiding the lookup and instead allocating a new node for each context. A key contribution is that the approach can mitigate the costs of allocating many nodes by extending tracing garbage collection (GC): GC collects unused CCU nodes naturally and efficiently, and we extend GC to merge duplicate nodes lazily. We implement our CCU-based approach in a high-performance Java virtual machine and integrate it with a staleness-based memory leak detector and happens-before data race detector, so they can report context-sensitive program locations that cause bugs. We show that the CCU-based approach, in concert with an extended GC, provides a compelling alternative to CCT-based approaches for adding context sensitivity to dynamic analyses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ureche:2013:MIS, author = "Vlad Ureche and Cristian Talau and Martin Odersky", title = "Miniboxing: improving the speed to code size tradeoff in parametric polymorphism translations", journal = j-SIGPLAN, volume = "48", number = "10", pages = "73--92", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Parametric polymorphism enables code reuse and type safety. Underneath the uniform interface exposed to programmers, however, its low level implementation has to cope with inherently non-uniform data: value types of different sizes and semantics (bytes, integers, floating point numbers) and reference types (pointers to heap objects). On the Java Virtual Machine, parametric polymorphism is currently translated to bytecode using two competing approaches: homogeneous and heterogeneous. Homogeneous translation requires boxing, and thus introduces indirect access delays. Heterogeneous translation duplicates and adapts code for each value type individually, producing more bytecode. Therefore bytecode speed and size are at odds with each other. This paper proposes a novel translation that significantly reduces the bytecode size without affecting the execution speed. The key insight is that larger value types (such as integers) can hold smaller ones (such as bytes) thus reducing the duplication necessary in heterogeneous translations. In our implementation, on the Scala compiler, we encode all primitive value types in long integers. The resulting bytecode approaches the performance of monomorphic code, matches the performance of the heterogeneous translation and obtains speedups of up to 22x over the homogeneous translation, all with modest increases in size.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shahriyar:2013:TGR, author = "Rifat Shahriyar and Stephen Michael Blackburn and Xi Yang and Kathryn S. McKinley", title = "Taking off the gloves with reference counting {Immix}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "93--110", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509527", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Despite some clear advantages and recent advances, reference counting remains a poor cousin to high-performance tracing garbage collectors. The advantages of reference counting include (a) immediacy of reclamation, (b) incrementality, and (c) local scope of its operations. After decades of languishing with hopelessly bad performance, recent work narrowed the gap between reference counting and the fastest tracing collectors to within 10\%. Though a major advance, this gap remains a substantial barrier to adoption in performance-conscious application domains. Our work identifies heap organization as the principal source of the remaining performance gap. We present the design, implementation, and analysis of a new collector, RC Immix, that replaces reference counting's traditional free-list heap organization with the line and block heap structure introduced by the Immix collector. The key innovations of RC Immix are (1) to combine traditional reference counts with per-line live object counts to identify reusable memory and (2) to eliminate fragmentation by integrating copying with reference counting of new objects and with backup tracing cycle collection. In RC Immix, reference counting offers efficient collection and the line and block heap organization delivers excellent mutator locality and efficient allocation. With these advances, RC Immix closes the 10\% performance gap, matching the performance of a highly tuned production generational collector. By removing the performance barrier, this work transforms reference counting into a serious alternative for meeting high performance objectives for garbage collected languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Xu:2013:RTO, author = "Guoqing Xu", title = "{Resurrector}: a tunable object lifetime profiling technique for optimizing real-world programs", journal = j-SIGPLAN, volume = "48", number = "10", pages = "111--130", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Modern object-oriented applications commonly suffer from severe performance problems that need to be optimized away for increased efficiency and user satisfaction. Many existing optimization techniques (such as object pooling and pretenuring) require precise identification of object lifetimes. However, it is particularly challenging to obtain object lifetimes both precisely and efficiently: precise profiling techniques such as Merlin introduce several hundred times slowdown even for small programs while efficient approximation techniques often sacrifice precision and produce less useful lifetime information. This paper presents a tunable profiling technique, called Resurrector, that explores the middle ground between high precision and high efficiency to find the precision-efficiency sweetspot for various liveness-based optimization techniques. Our evaluation shows that Resurrector is both more precise and more efficient than the GC-based approximation, and it is orders-of-magnitude faster than Merlin. To demonstrate Resurrector's usefulness, we have developed client analyses to find allocation sites that create large data structures with disjoint lifetimes. By inspecting program source code and reusing data structures created from these allocation sites, we have achieved significant performance gains. We have also improved the precision of an existing optimization technique using the lifetime information collected by Resurrector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Norris:2013:CCC, author = "Brian Norris and Brian Demsky", title = "{CDSChecker}: checking concurrent data structures written with {C\slash C++} atomics", journal = j-SIGPLAN, volume = "48", number = "10", pages = "131--150", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Writing low-level concurrent software has traditionally required intimate knowledge of the entire toolchain and often has involved coding in assembly. New language standards have extended C and C++ with support for low-level atomic operations and a weak memory model, enabling developers to write portable and efficient multithreaded code. Developing correct low-level concurrent code is well-known to be especially difficult under a weak memory model, where code behavior can be surprising. Building reliable concurrent software using C/C++ low-level atomic operations will likely require tools that help developers discover unexpected program behaviors. In this paper we present CDSChecker, a tool for exhaustively exploring the behaviors of concurrent code under the C/C++ memory model. We develop several novel techniques for modeling the relaxed behaviors allowed by the memory model and for minimizing the number of execution behaviors that CDSChecker must explore. We have used CDSChecker to exhaustively unit test several concurrent data structure implementations on specific inputs and have discovered errors in both a recently published C11 implementation of a work-stealing queue and a single producer, single consumer queue implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Raychev:2013:ERD, author = "Veselin Raychev and Martin Vechev and Manu Sridharan", title = "Effective race detection for event-driven programs", journal = j-SIGPLAN, volume = "48", number = "10", pages = "151--166", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509538", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Like shared-memory multi-threaded programs, event-driven programs such as client-side web applications are susceptible to data races that are hard to reproduce and debug. Race detection for such programs is hampered by their pervasive use of ad hoc synchronization, which can lead to a prohibitive number of false positives. Race detection also faces a scalability challenge, as a large number of short-running event handlers can quickly overwhelm standard vector-clock-based techniques. This paper presents several novel contributions that address both of these challenges. First, we introduce race coverage, a systematic method for exposing ad hoc synchronization and other (potentially harmful) races to the user, significantly reducing false positives. Second, we present an efficient connectivity algorithm for computing race coverage. The algorithm is based on chain decomposition and leverages the structure of event-driven programs to dramatically decrease the overhead of vector clocks. We implemented our techniques in a tool called EventRacer and evaluated it on a number of public web sites. The results indicate substantial performance and precision improvements of our approach over the state-of-the-art. Using EventRacer, we found many harmful races, most of which are beyond the reach of current techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bolz:2013:SSC, author = "Carl Friedrich Bolz and Lukas Diekmann and Laurence Tratt", title = "Storage strategies for collections in dynamically typed languages", journal = j-SIGPLAN, volume = "48", number = "10", pages = "167--182", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Dynamically typed language implementations often use more memory and execute slower than their statically typed cousins, in part because operations on collections of elements are unoptimised. This paper describes storage strategies, which dynamically optimise collections whose elements are instances of the same primitive type. We implement storage strategies in the PyPy virtual machine, giving a performance increase of 18\% on wide-ranging benchmarks of real Python programs. We show that storage strategies are simple to implement, needing only 1500LoC in PyPy, and have applicability to a wide range of virtual machines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Miller:2013:IPG, author = "Heather Miller and Philipp Haller and Eugene Burmako and Martin Odersky", title = "Instant pickles: generating object-oriented pickler combinators for fast and extensible serialization", journal = j-SIGPLAN, volume = "48", number = "10", pages = "183--202", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509547", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "As more applications migrate to the cloud, and as ``big data'' edges into even more production environments, the performance and simplicity of exchanging data between compute nodes/devices is increasing in importance. An issue central to distributed programming, yet often under-considered, is serialization or pickling, i.e., persisting runtime objects by converting them into a binary or text representation. Pickler combinators are a popular approach from functional programming; their composability alleviates some of the tedium of writing pickling code by hand, but they don't translate well to object-oriented programming due to qualities like open class hierarchies and subtyping polymorphism. Furthermore, both functional pickler combinators and popular, Java-based serialization frameworks tend to be tied to a specific pickle format, leaving programmers with no choice of how their data is persisted. In this paper, we present object-oriented pickler combinators and a framework for generating them at compile-time, called scala/pickling, designed to be the default serialization mechanism of the Scala programming language. The static generation of OO picklers enables significant performance improvements, outperforming Java and Kryo in most of our benchmarks. In addition to high performance and the need for little to no boilerplate, our framework is extensible: using the type class pattern, users can provide both (1) custom, easily interchangeable pickle formats and (2) custom picklers, to override the default behavior of the pickling framework. In benchmarks, we compare scala/pickling with other popular industrial frameworks, and present results on time, memory usage, and size when pickling/unpickling a number of data types used in real-world, large-scale distributed applications and frameworks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Salkeld:2013:IDO, author = "Robin Salkeld and Gregor Kiczales", title = "Interacting with dead objects", journal = j-SIGPLAN, volume = "48", number = "10", pages = "203--216", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509543", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Debugging and analyzing a snapshot of a crashed program's memory is far more difficult than working with a live program, because debuggers can no longer execute code to help make sense of the program state. We present an architecture that supports the restricted execution of ordinary code starting from the snapshot, as if the dead objects within it had been restored, but without access to their original external environment. We demonstrate the feasibility of this approach via an implementation for Java that does not require a custom virtual machine, show that it performs competitively with live execution, and use it to diagnose an unresolved memory leak in a mature mainstream application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Politz:2013:PFM, author = "Joe Gibbs Politz and Alejandro Martinez and Matthew Milano and Sumner Warren and Daniel Patterson and Junsong Li and Anand Chitipothu and Shriram Krishnamurthi", title = "{Python}: the full monty", journal = j-SIGPLAN, volume = "48", number = "10", pages = "217--232", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We present a small-step operational semantics for the Python programming language. We present both a core language for Python, suitable for tools and proofs, and a translation process for converting Python source to this core. We have tested the composition of translation and evaluation of the core for conformance with the primary Python implementation, thereby giving confidence in the fidelity of the semantics. We briefly report on the engineering of these components. Finally, we examine subtle aspects of the language, identifying scope as a pervasive concern that even impacts features that might be considered orthogonal.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gerakios:2013:FIS, author = "Prodromos Gerakios and Aggelos Biboudis and Yannis Smaragdakis", title = "Forsaking inheritance: supercharged delegation in {DelphJ}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "233--252", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We propose DelphJ: a Java-based OO language that eschews inheritance completely, in favor of a combination of class morphing and (deep) delegation. Compared to past delegation approaches, the novel aspect of our design is the ability to emulate the best aspects of inheritance while retaining maximum flexibility: using morphing, a class can select any of the methods of its delegatee and export them (if desired) or transform them (e.g., to add extra arguments or modify type signatures), yet without needing to name these methods explicitly and handle them one-by-one. Compared to past work on morphing, our approach adopts and adapts advanced delegation mechanisms, in order to add late binding capabilities and, thus, provide a full substitute of inheritance. Additionally, we explore complex semantic issues in the interaction of delegation with late binding. We present our language design both informally, with numerous examples, and formally in a core calculus.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Smaragdakis:2013:SBP, author = "Yannis Smaragdakis and George Balatsouras and George Kastrinis", title = "Set-based pre-processing for points-to analysis", journal = j-SIGPLAN, volume = "48", number = "10", pages = "253--270", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We present set-based pre-analysis: a virtually universal optimization technique for flow-insensitive points-to analysis. Points-to analysis computes a static abstraction of how object values flow through a program's variables. Set-based pre-analysis relies on the observation that much of this reasoning can take place at the set level rather than the value level. Computing constraints at the set level results in significant optimization opportunities: we can rewrite the input program into a simplified form with the same essential points-to properties. This rewrite results in removing both local variables and instructions, thus simplifying the subsequent value-based points-to computation. Effectively, set-based pre-analysis puts the program in a normal form optimized for points-to analysis. Compared to other techniques for off-line optimization of points-to analyses in the literature, the new elements of our approach are the ability to eliminate statements, and not just variables, as well as its modularity: set-based pre-analysis can be performed on the input just once, e.g., allowing the pre-optimization of libraries that are subsequently reused many times and for different analyses. In experiments with Java programs, set-based pre-analysis eliminates 30\% of the program's local variables and 30\% or more of computed context-sensitive points-to facts, over a wide set of benchmarks and analyses, resulting in a ~20\% average speedup (max: 110\%, median: 18\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tetali:2013:MSA, author = "Sai Deep Tetali and Mohsen Lesani and Rupak Majumdar and Todd Millstein", title = "{MrCrypt}: static analysis for secure cloud computations", journal = j-SIGPLAN, volume = "48", number = "10", pages = "271--286", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509554", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "In a common use case for cloud computing, clients upload data and computation to servers that are managed by a third-party infrastructure provider. We describe MrCrypt, a system that provides data confidentiality in this setting by executing client computations on encrypted data. MrCrypt statically analyzes a program to identify the set of operations on each input data column, in order to select an appropriate homomorphic encryption scheme for that column, and then transforms the program to operate over encrypted data. The encrypted data and transformed program are uploaded to the server and executed as usual, and the result of the computation is decrypted on the client side. We have implemented MrCrypt for Java and illustrate its practicality on three standard benchmark suites for the Hadoop MapReduce framework. We have also formalized the approach and proven several soundness and security guarantees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DeLozier:2013:ICL, author = "Christian DeLozier and Richard Eisenberg and Santosh Nagarakatte and Peter-Michael Osera and Milo M. K. Martin and Steve Zdancewic", title = "{Ironclad C++}: a library-augmented type-safe subset of {C++}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "287--304", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509550", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "The C++ programming language remains widely used, despite inheriting many unsafe features from C---features that often lead to failures of type or memory safety that manifest as buffer overflows, use-after-free vulnerabilities, or abstraction violations. Malicious attackers can exploit such violations to compromise application and system security. This paper introduces Ironclad C++, an approach to bringing the benefits of type and memory safety to C++. Ironclad C++ is, in essence, a library-augmented, type-safe subset of C++. All Ironclad C++ programs are valid C++ programs that can be compiled using standard, off-the-shelf C++ compilers. However, not all valid C++ programs are valid Ironclad C++ programs: a syntactic source-code validator statically prevents the use of unsafe C++ features. To enforce safety properties that are difficult to check statically, Ironclad C++ applies dynamic checks via templated ``smart pointer'' classes. Using a semi-automatic refactoring tool, we have ported nearly 50K lines of code to Ironclad C++. These benchmarks incur a performance overhead of 12\% on average, compared to the original unsafe C++ code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Richards:2013:FAC, author = "Gregor Richards and Christian Hammer and Francesco Zappa Nardelli and Suresh Jagannathan and Jan Vitek", title = "Flexible access control for {JavaScript}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "305--322", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509542", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Providing security guarantees for systems built out of untrusted components requires the ability to define and enforce access control policies over untrusted code. In Web 2.0 applications, JavaScript code from different origins is often combined on a single page, leading to well-known vulnerabilities. We present a security infrastructure which allows users and content providers to specify access control policies over subsets of a JavaScript program by leveraging the concept of delimited histories with revocation. We implement our proposal in WebKit and evaluate it with three policies on 50 widely used websites with no changes to their JavaScript code and report performance overheads and violations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Feldthaus:2013:SAR, author = "Asger Feldthaus and Anders M{\o}ller", title = "Semi-automatic rename refactoring for {JavaScript}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "323--338", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Modern IDEs support automated refactoring for many programming languages, but support for JavaScript is still primitive. To perform renaming, which is one of the fundamental refactorings, there is often no practical alternative to simple syntactic search-and-replace. Although more sophisticated alternatives have been developed, they are limited by whole-program assumptions and poor scalability. We propose a technique for semi-automatic refactoring for JavaScript, with a focus on renaming. Unlike traditional refactoring algorithms, semi-automatic refactoring works by a combination of static analysis and interaction with the programmer. With this pragmatic approach, we can provide scalable and effective refactoring support for real-world code, including libraries and incomplete applications. Through a series of experiments that estimate how much manual effort our technique demands from the programmer, we show that our approach is a useful improvement compared to search-and-replace tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Raychev:2013:RS, author = "Veselin Raychev and Max Sch{\"a}fer and Manu Sridharan and Martin Vechev", title = "Refactoring with synthesis", journal = j-SIGPLAN, volume = "48", number = "10", pages = "339--354", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509544", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Refactoring has become an integral part of modern software development, with wide support in popular integrated development environments (IDEs). Modern IDEs provide a fixed set of supported refactorings, listed in a refactoring menu. But with IDEs supporting more and more refactorings, it is becoming increasingly difficult for programmers to discover and memorize all their names and meanings. Also, since the set of refactorings is hard-coded, if a programmer wants to achieve a slightly different code transformation, she has to either apply a (possibly non-obvious) sequence of several built-in refactorings, or just perform the transformation by hand. We propose a novel approach to refactoring, based on synthesis from examples, which addresses these limitations. With our system, the programmer need not worry how to invoke individual refactorings or the order in which to apply them. Instead, a transformation is achieved via three simple steps: the programmer first indicates the start of a code refactoring phase; then she performs some of the desired code changes manually; and finally, she asks the tool to complete the refactoring. Our system completes the refactoring by first extracting the difference between the starting program and the modified version, and then synthesizing a sequence of refactorings that achieves (at least) the desired changes. To enable scalable synthesis, we introduce local refactorings, which allow for first discovering a refactoring sequence on small program fragments and then extrapolating it to a full refactoring sequence. We implemented our approach as an Eclipse plug-in, with an architecture that is easily extendible with new refactorings. The experimental results are encouraging: with only minimal user input, the synthesizer was able to quickly discover complex refactoring sequences for several challenging realistic examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bois:2013:BGV, author = "Kristof {Du Bois} and Jennifer B. Sartor and Stijn Eyerman and Lieven Eeckhout", title = "Bottle graphs: visualizing scalability bottlenecks in multi-threaded applications", journal = j-SIGPLAN, volume = "48", number = "10", pages = "355--372", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Understanding and analyzing multi-threaded program performance and scalability is far from trivial, which severely complicates parallel software development and optimization. In this paper, we present bottle graphs, a powerful analysis tool that visualizes multi-threaded program performance, in regards to both per-thread parallelism and execution time. Each thread is represented as a box, with its height equal to the share of that thread in the total program execution time, its width equal to its parallelism, and its area equal to its total running time. The boxes of all threads are stacked upon each other, leading to a stack with height equal to the total program execution time. Bottle graphs show exactly how scalable each thread is, and thus guide optimization towards those threads that have a smaller parallel component (narrower), and a larger share of the total execution time (taller), i.e. to the 'neck' of the bottle. Using light-weight OS modules, we calculate bottle graphs for unmodified multi-threaded programs running on real processors with an average overhead of 0.68\%. To demonstrate their utility, we do an extensive analysis of 12 Java benchmarks running on top of the Jikes JVM, which introduces many JVM service threads. We not only reveal and explain scalability limitations of several well-known Java benchmarks; we also analyze the reasons why the garbage collector itself does not scale, and in fact performs optimally with two collector threads for all benchmarks, regardless of the number of application threads. Finally, we compare the scalability of Jikes versus the OpenJDK JVM. We demonstrate how useful and intuitive bottle graphs are as a tool to analyze scalability and help optimize multi-threaded applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DElia:2013:BLP, author = "Daniele Cono D'Elia and Camil Demetrescu", title = "{Ball--Larus} path profiling across multiple loop iterations", journal = j-SIGPLAN, volume = "48", number = "10", pages = "373--390", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Identifying the hottest paths in the control flow graph of a routine can direct optimizations to portions of the code where most resources are consumed. This powerful methodology, called path profiling, was introduced by Ball and Larus in the mid 90's [4] and has received considerable attention in the last 15 years for its practical relevance. A shortcoming of the Ball-Larus technique was the inability to profile cyclic paths, making it difficult to mine execution patterns that span multiple loop iterations. Previous results, based on rather complex algorithms, have attempted to circumvent this limitation at the price of significant performance losses even for a small number of iterations. In this paper, we present a new approach to multi-iteration path profiling, based on data structures built on top of the original Ball-Larus numbering technique. Our approach allows the profiling of all executed paths obtained as a concatenation of up to k Ball-Larus acyclic paths, where k is a user-defined parameter. We provide examples showing that this method can reveal optimization opportunities that acyclic-path profiling would miss. An extensive experimental investigation on a large variety of Java benchmarks on the Jikes RVM shows that our approach can be even faster than Ball-Larus due to fewer operations on smaller hash tables, producing compact representations of cyclic paths even for large values of k.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sharma:2013:DDE, author = "Rahul Sharma and Eric Schkufza and Berkeley Churchill and Alex Aiken", title = "Data-driven equivalence checking", journal = j-SIGPLAN, volume = "48", number = "10", pages = "391--406", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We present a data driven algorithm for equivalence checking of two loops. The algorithm infers simulation relations using data from test runs. Once a candidate simulation relation has been obtained, off-the-shelf SMT solvers are used to check whether the simulation relation actually holds. The algorithm is sound: insufficient data will cause the proof to fail. We demonstrate a prototype implementation, called DDEC, of our algorithm, which is the first sound equivalence checker for loops written in x86 assembly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kneuss:2013:SMR, author = "Etienne Kneuss and Ivan Kuraj and Viktor Kuncak and Philippe Suter", title = "Synthesis modulo recursive functions", journal = j-SIGPLAN, volume = "48", number = "10", pages = "407--426", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509555", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We describe techniques for synthesis and verification of recursive functional programs over unbounded domains. Our techniques build on top of an algorithm for satisfiability modulo recursive functions, a framework for deductive synthesis, and complete synthesis procedures for algebraic data types. We present new counterexample-guided algorithms for constructing verified programs. We have implemented these algorithms in an integrated environment for interactive verification and synthesis from relational specifications. Our system was able to synthesize a number of useful recursive functions that manipulate unbounded numbers and data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Shi:2013:COU, author = "Yao Shi and Bernard Blackham and Gernot Heiser", title = "Code optimizations using formally verified properties", journal = j-SIGPLAN, volume = "48", number = "10", pages = "427--442", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509513", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Formal program verification offers strong assurance of correctness, backed by the strength of mathematical proof. Constructing these proofs requires humans to identify program invariants, and show that they are always maintained. These invariants are then used to prove that the code adheres to its specification. In this paper, we explore the overlap between formal verification and code optimization. We propose two approaches to reuse the invariants derived in formal proofs and integrate them into compilation. The first applies invariants extracted from the proof, while the second leverages the property of program safety (i.e., the absence of bugs). We reuse this information to improve the performance of generated object code. We evaluated these methods on seL4, a real-world formally-verified microkernel, and obtained improvements in average runtime performance (up to 28\%) and in worst-case execution time (up to 25\%). In macro-benchmarks, we found the performance of para-virtualized Linux running on the microkernel improved by 6-16\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dillig:2013:IIG, author = "Isil Dillig and Thomas Dillig and Boyang Li and Ken McMillan", title = "Inductive invariant generation via abductive inference", journal = j-SIGPLAN, volume = "48", number = "10", pages = "443--456", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "This paper presents a new method for generating inductive loop invariants that are expressible as boolean combinations of linear integer constraints. The key idea underlying our technique is to perform a backtracking search that combines Hoare-style verification condition generation with a logical abduction procedure based on quantifier elimination to speculate candidate invariants. Starting with true, our method iteratively strengthens loop invariants until they are inductive and strong enough to verify the program. A key feature of our technique is that it is lazy: It only infers those invariants that are necessary for verifying program correctness. Furthermore, our technique can infer arbitrary boolean combinations (including disjunctions) of linear invariants. We have implemented the proposed approach in a tool called HOLA. Our experiments demonstrate that HOLA can infer interesting invariants that are beyond the reach of existing state-of-the-art invariant generation tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hoppe:2013:DDB, author = "Michael Hoppe and Stefan Hanenberg", title = "Do developers benefit from generic types?: an empirical comparison of generic and raw types in {Java}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "457--474", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509528", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Type systems that permit developers to express themselves more precisely are one of the primary topics in programming language research, as well as in industrial software development. While it seems plausible that an expressive static type system increases developer productivity, there is little empirical evidence for or against this hypothesis. Generic types in Java are an example: as an extension of Java's original type system, some claim that Java 1.5 improves the type system's ``expressiveness.'' Even if this claim is true, there exists little empirical evidence that claimed expressiveness leads to a measurable increase in developer productivity. This paper introduces an experiment where generic types (in comparison to raw types) have been evaluated in three different directions: (1) the documentation impact on undocumented APIs, (2) the time required for fixing type errors, and (3) the extensibility of a generic type hierarchy. The results of the experiment suggest that generic types improve documentation and reduce extensibility --- without revealing a difference in the time required for fixing type errors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dimoulas:2013:OC, author = "Christos Dimoulas and Robert Bruce Findler and Matthias Felleisen", title = "Option contracts", journal = j-SIGPLAN, volume = "48", number = "10", pages = "475--494", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Many languages support behavioral software contracts so that programmers can describe a component's obligations and promises via logical assertions in its interface. The contract system monitors program execution, checks whether the assertions hold, and, if not, blames the guilty component. Pinning down the violator gets the debugging process started in the right direction. Quality contracts impose a serious run-time cost, however, and programmers therefore compromise in many ways. Some turn off contracts for deployment, but then contracts and code quickly get out of sync during maintenance. Others test contracts randomly or probabilistically. In all cases, programmers have to cope with lack of blame information when the program eventually fails. In response, we propose option contracts as an addition to the contract tool box. Our key insight is that in ordinary contract systems, server components impose their contract on client components, giving them no choice whether to trust the server's promises or check them. With option contracts, server components may choose to tag a contract as an option and clients may choose to exercise the option or accept it, in which case they also shoulder some responsibility. We show that option contracts permit programmers to specify flexible checking policies, that their cost is reasonable, and that they satisfy a complete monitoring theorem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Treichler:2013:LSD, author = "Sean Treichler and Michael Bauer and Alex Aiken", title = "Language support for dynamic, hierarchical data partitioning", journal = j-SIGPLAN, volume = "48", number = "10", pages = "495--514", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Applications written for distributed-memory parallel architectures must partition their data to enable parallel execution. As memory hierarchies become deeper, it is increasingly necessary that the data partitioning also be hierarchical to match. Current language proposals perform this hierarchical partitioning statically, which excludes many important applications where the appropriate partitioning is itself data dependent and so must be computed dynamically. We describe Legion, a region-based programming system, where each region may be partitioned into subregions. Partitions are computed dynamically and are fully programmable. The division of data need not be disjoint and subregions of a region may overlap, or alias one another. Computations use regions with certain privileges (e.g., expressing that a computation uses a region read-only) and data coherence (e.g., expressing that the computation need only be atomic with respect to other operations on the region), which can be controlled on a per-region (or subregion) basis. We present the novel aspects of the Legion design, in particular the combination of static and dynamic checks used to enforce soundness. We give an extended example illustrating how Legion can express computations with dynamically determined relationships between computations and data partitions. We prove the soundness of Legion's type system, and show Legion type checking improves performance by up to 71\% by eliding provably safe memory checks. In particular, we show that the dynamic checks to detect aliasing at runtime at the region granularity have negligible overhead. We report results for three real-world applications running on distributed memory machines, achieving up to 62.5X speedup on 96 GPUs on the Keeneland supercomputer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Balatsouras:2013:CHC, author = "George Balatsouras and Yannis Smaragdakis", title = "Class hierarchy complementation: soundly completing a partial type graph", journal = j-SIGPLAN, volume = "48", number = "10", pages = "515--532", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509530", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We present the problem of class hierarchy complementation: given a partially known hierarchy of classes together with subtyping constraints (``A has to be a transitive subtype of B'') complete the hierarchy so that it satisfies all constraints. The problem has immediate practical application to the analysis of partial programs--e.g., it arises in the process of providing a sound handling of ``phantom classes'' in the Soot program analysis framework. We provide algorithms to solve the hierarchy complementation problem in the single inheritance and multiple inheritance settings. We also show that the problem in a language such as Java, with single inheritance but multiple subtyping and distinguished class vs. interface types, can be decomposed into separate single- and multiple-subtyping instances. We implement our algorithms in a tool, JPhantom, which complements partial Java bytecode programs so that the result is guaranteed to satisfy the Java verifier requirements. JPhantom is highly scalable and runs in mere seconds even for large input applications and complex constraints (with a maximum of 14s for a 19MB binary).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ravichandran:2013:MES, author = "Kaushik Ravichandran and Santosh Pande", title = "Multiverse: efficiently supporting distributed high-level speculation", journal = j-SIGPLAN, volume = "48", number = "10", pages = "533--552", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Algorithmic speculation or high-level speculation is a promising programming paradigm which allows programmers to speculatively branch an execution into multiple independent parallel sections and then choose the best (perhaps fastest) amongst them. The continuing execution after the speculatively branched section sees only the modifications made by the best one. This programming paradigm allows programmers to harness parallelism and can provide dramatic performance improvements. In this paper we present the Multiverse speculative programming model. Multiverse allows programmers to exploit parallelism through high-level speculation. It can effectively harness large amounts of parallelism by speculating across an entire cluster and is not bound by the parallelism available in a single machine. We present abstractions and a runtime which allow programmers to introduce large scale high-level speculative parallelism into applications with minimal effort. We introduce a novel on-demand address space sharing mechanism which provide speculations efficient transparent access to the original address space of the application (including the use of pointers) across machine boundaries. Multiverse provides single commit semantics across speculations while guaranteeing isolation between them. We also introduce novel mechanisms to deal with scalability bottlenecks when there are a large number of speculations. We demonstrate that for several benchmarks, Multiverse achieves impressive speedups and good scalability across entire clusters. We study the overheads of the runtime and demonstrate how our special scalability mechanisms are crucial in scaling cluster wide.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Clebsch:2013:FCG, author = "Sylvan Clebsch and Sophia Drossopoulou", title = "Fully concurrent garbage collection of actors on many-core machines", journal = j-SIGPLAN, volume = "48", number = "10", pages = "553--570", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509557", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Disposal of dead actors in actor-model languages is as important as disposal of unreachable objects in object-oriented languages. In current practice, programmers are required to either manually terminate actors, or they have to rely on garbage collection systems that monitor actor mutation through write barriers, thread coordination through locks etc. These techniques, however, prevent the collector from being fully concurrent. We developed a protocol that allows garbage collection to run fully concurrently with all actors. The main challenges in concurrent garbage collection is the detection of cycles of sleeping actors in the actors graph, in the presence of concurrent mutation of this graph. Our protocol is solely built on message passing: it uses deferred direct reference counting, a dedicated actor for the detection of (cyclic) garbage, and a confirmation protocol (to deal with the mutation of the actor graph). We present our ideas informally through an example, and then present a formal model, prove soundness and argue completeness. We have implemented the protocol as part of a runtime library. As a preliminary performance evaluation, we discuss the performance of our approach as currently used at a financial institution, and use four benchmarks from the literature to compare our approach with other actor-model systems. These preliminary results indicate that the overhead of our approach is small.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhao:2013:INT, author = "Jisheng Zhao and Roberto Lublinerman and Zoran Budimli{\'c} and Swarat Chaudhuri and Vivek Sarkar", title = "Isolation for nested task parallelism", journal = j-SIGPLAN, volume = "48", number = "10", pages = "571--588", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Isolation--the property that a task can access shared data without interference from other tasks--is one of the most basic concerns in parallel programming. Whilethere is a large body of past work on isolated task-parallelism, the integration of isolation, task-parallelism, and nesting of tasks has been a difficult and unresolved challenge. In this pa- per, we present a programming and execution model called Otello where isolation is extended to arbitrarily nested parallel tasks with irregular accesses to heap data. At the same time, no additional burden is imposed on the programmer, who only exposes parallelism by creating and synchronizing parallel tasks, leaving the job of ensuring isolation to the underlying compiler and runtime system. Otello extends our past work on Aida execution model and the delegated isolation mechanism [22] to the setting of nested parallelism. The basic runtime construct in Aida and Otello is an assembly: a task equipped with a region in the shared heap that it owns. When an assembly A conflicts with an assembly B, A transfers--or delegates--its code and owned region to a carefully selected assembly C in a way that will ensure isolation with B, leaving the responsibility of re-executing task A to C. The choice of C depends on the nesting relationship between A and B.We have implemented Otello on top of the Habanero Java (HJ) parallel programming language [8], and used this implementation to evaluate Otello on collections of nested task-parallel benchmarks and non-nested transactional benchmarks from past work. On the nested task-parallel benchmarks, Otello achieves scalability comparable to HJ programs without built-in isolation, and the relative overhead of Otello is lower than that of many published data-race detection algorithms that detect the isolation violations (but do not enforce isolation). For the transactional benchmarks, Otello incurs lower overhead than a state-of-the-art software transactional memory system (Deuce STM).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Tripp:2013:TNP, author = "Omer Tripp and Eric Koskinen and Mooly Sagiv", title = "Turning nondeterminism into parallelism", journal = j-SIGPLAN, volume = "48", number = "10", pages = "589--604", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Nondeterminism is a useful and prevalent concept in the design and implementation of software systems. An important property of nondeterminism is its latent parallelism: A nondeterministic action can evaluate to multiple behaviors. If at least one of these behaviors does not conflict with concurrent tasks, then there is an admissible execution of the action in parallel with these tasks. Unfortunately, existing implementations of the atomic paradigm --- optimistic as well as pessimistic --- are unable to fully exhaust the parallelism potential of nondeterministic actions, lacking the means to guide concurrent tasks toward nondeterministic choices that minimize interference. This paper investigates the problem of utilizing parallelism due to nondeterminism. We observe that nondeterminism occurs in many real-world codes. We motivate the need for devising coordination mechanisms that can utilize available nondeterminism. We have developed a system featuring such mechanisms, which leverages nondeterminism in a wide class of query operations, allowing a task to look into the future of concurrent tasks that mutate the shared state during query evaluation and reduce conflict accordingly. We evaluate our system on a suite of 12 algorithmic benchmarks of wide applicability, as well as an industrial application. The results are encouraging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chong:2013:BIS, author = "Nathan Chong and Alastair F. Donaldson and Paul H. J. Kelly and Jeroen Ketema and Shaz Qadeer", title = "Barrier invariants: a shared state abstraction for the analysis of data-dependent {GPU} kernels", journal = j-SIGPLAN, volume = "48", number = "10", pages = "605--622", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Data-dependent GPU kernels, whose data or control flow are dependent on the input of the program, are difficult to verify because they require reasoning about shared state manipulated by many parallel threads. Existing verification techniques for GPU kernels achieve soundness and scalability by using a two-thread reduction and making the contents of the shared state nondeterministic each time threads synchronise at a barrier, to account for all possible thread interactions. This coarse abstraction prohibits verification of data-dependent kernels. We present barrier invariants, a novel abstraction technique which allows key properties about the shared state of a kernel to be preserved across barriers during formal reasoning. We have integrated barrier invariants with the GPUVerify tool, and present a detailed case study showing how they can be used to verify three prefix sum algorithms, allowing efficient modular verification of a stream compaction kernel, a key building block for GPU programming. This analysis goes significantly beyond what is possible using existing verification techniques for GPU kernels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Choi:2013:GGT, author = "Wontae Choi and George Necula and Koushik Sen", title = "Guided {GUI} testing of {Android} apps with minimal restart and approximate learning", journal = j-SIGPLAN, volume = "48", number = "10", pages = "623--640", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509552", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Smartphones and tablets with rich graphical user interfaces (GUI) are becoming increasingly popular. Hundreds of thousands of specialized applications, called apps, are available for such mobile platforms. Manual testing is the most popular technique for testing graphical user interfaces of such apps. Manual testing is often tedious and error-prone. In this paper, we propose an automated technique, called Swift-Hand, for generating sequences of test inputs for Android apps. The technique uses machine learning to learn a model of the app during testing, uses the learned model to generate user inputs that visit unexplored states of the app, and uses the execution of the app on the generated inputs to refine the model. A key feature of the testing algorithm is that it avoids restarting the app, which is a significantly more expensive operation than executing the app on a sequence of inputs. An important insight behind our testing algorithm is that we do not need to learn a precise model of an app, which is often computationally intensive, if our goal is to simply guide test execution into unexplored parts of the state space. We have implemented our testing algorithm in a publicly available tool for Android apps written in Java. Our experimental results show that we can achieve significantly better coverage than traditional random testing and L*-based testing in a given time budget. Our algorithm also reaches peak coverage faster than both random and L*-based testing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Azim:2013:TDF, author = "Tanzirul Azim and Iulian Neamtiu", title = "Targeted and depth-first exploration for systematic testing of {Android} apps", journal = j-SIGPLAN, volume = "48", number = "10", pages = "641--660", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509549", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Systematic exploration of Android apps is an enabler for a variety of app analysis and testing tasks. Performing the exploration while apps run on actual phones is essential for exploring the full range of app capabilities. However, exploring real-world apps on real phones is challenging due to non-determinism, non-standard control flow, scalability and overhead constraints. Relying on end-users to conduct the exploration might not be very effective: we performed a 7-use study on popular Android apps, and found that the combined 7-use coverage was 30.08\% of the app screens and 6.46\% of the app methods. Prior approaches for automated exploration of Android apps have run apps in an emulator or focused on small apps whose source code was available. To address these problems, we present A$^3$ E, an approach and tool that allows substantial Android apps to be explored systematically while running on actual phones, yet without requiring access to the app's source code. The key insight of our approach is to use a static, taint-style, dataflow analysis on the app bytecode in a novel way, to construct a high-level control flow graph that captures legal transitions among activities (app screens). We then use this graph to develop an exploration strategy named Targeted Exploration that permits fast, direct exploration of activities, including activities that would be difficult to reach during normal use. We also developed a strategy named Depth-first Exploration that mimics user actions for exploring activities and their constituents in a slower, but more systematic way. To measure the effectiveness of our techniques, we use two metrics: activity coverage (number of screens explored) and method coverage. Experiments with using our approach on 25 popular Android apps including BBC News, Gas Buddy, Amazon Mobile, YouTube, Shazam Encore, and CNN, show that our exploration techniques achieve 59.39--64.11\% activity coverage and 29.53--36.46\% method coverage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kansal:2013:LAB, author = "Aman Kansal and Scott Saponas and A. J. Bernheim Brush and Kathryn S. McKinley and Todd Mytkowicz and Ryder Ziola", title = "The latency, accuracy, and battery {(LAB)} abstraction: programmer productivity and energy efficiency for continuous mobile context sensing", journal = j-SIGPLAN, volume = "48", number = "10", pages = "661--676", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Emerging mobile applications that sense context are poised to delight and entertain us with timely news and events, health tracking, and social connections. Unfortunately, sensing algorithms quickly drain the phone's battery. Developers can overcome battery drain by carefully optimizing context sensing but that makes programming with context arduous and ties applications to current sensing hardware. These types of applications embody a twist on the classic tension between programmer productivity and performance due to their combination of requirements. This paper identifies the latency, accuracy, battery (LAB) abstraction to resolve this tension. We implement and evaluate LAB in a system called Senergy. Developers specify their LAB requirements independent of inference algorithms and sensors. Senergy delivers energy efficient context while meeting the requirements and adapts as hardware changes. We demonstrate LAB's expressiveness by using it to implement 22 context sensing algorithms for four types of context (location, driving, walking, and stationary) and six diverse applications. To demonstrate LAB's energy optimizations, we show often an order of magnitude improvements in energy efficiency on applications compared to prior approaches. This relatively simple, priority based API, may serve as a blueprint for future API design in an increasingly complex design space that must tradeoff latency, accuracy, and efficiency to meet application needs and attain portability across evolving, sensor-rich, heterogeneous, and power constrained hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bergan:2013:ICS, author = "Tom Bergan and Luis Ceze and Dan Grossman", title = "Input-covering schedules for multithreaded programs", journal = j-SIGPLAN, volume = "48", number = "10", pages = "677--692", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509508", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We propose constraining multithreaded execution to small sets of input-covering schedules, which we define as follows: given a program $P$, we say that a set of schedules $ \Sigma $ covers all inputs of program $P$ if, when given any input, $P$'s execution can be constrained to some schedule in $ \Sigma $ and still produce a semantically valid result. Our approach is to first compute a small $ \Sigma $ for a given program $P$, and then, at runtime, constrain $P$'s execution to always follow some schedule in $ \Sigma $, and never deviate. We have designed an algorithm that uses symbolic execution to systematically enumerate a set of input-covering schedules, $ \Sigma $. To deal with programs that run for an unbounded length of time, we partition execution into bounded epochs, find input-covering schedules for each epoch in isolation, and then piece the schedules together at runtime. We have implemented this algorithm along with a constrained execution runtime for pthreads programs, and we report results Our approach has the following advantage: because all possible runtime schedules are known a priori, we can seek to validate the program by thoroughly verifying each schedule in $ \Sigma $, in isolation, without needing to reason about the huge space of thread interleavings that arises due to conventional nondeterministic execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bond:2013:OCC, author = "Michael D. Bond and Milind Kulkarni and Man Cao and Minjia Zhang and Meisam Fathi Salmi and Swarnendu Biswas and Aritra Sengupta and Jipeng Huang", title = "{OCTET}: capturing and controlling cross-thread dependences efficiently", journal = j-SIGPLAN, volume = "48", number = "10", pages = "693--712", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Parallel programming is essential for reaping the benefits of parallel hardware, but it is notoriously difficult to develop and debug reliable, scalable software systems. One key challenge is that modern languages and systems provide poor support for ensuring concurrency correctness properties --- atomicity, sequential consistency, and multithreaded determinism --- because all existing approaches are impractical. Dynamic, software-based approaches slow programs by up to an order of magnitude because capturing and controlling cross-thread dependences (i.e., conflicting accesses to shared memory) requires synchronization at virtually every access to potentially shared memory. This paper introduces a new software-based concurrency control mechanism called OCTET that soundly captures cross-thread dependences and can be used to build dynamic analyses for concurrency correctness. OCTET achieves low overheads by tracking the locality state of each potentially shared object. Non-conflicting accesses conform to the locality state and require no synchronization; only conflicting accesses require a state change and heavyweight synchronization. This optimistic tradeoff leads to significant efficiency gains in capturing cross-thread dependences: a prototype implementation of OCTET in a high-performance Java virtual machine slows real-world concurrent programs by only 26\% on average. A dependence recorder, suitable for record {\&} replay, built on top of OCTET adds an additional 5\% overhead on average. These results suggest that OCTET can provide a foundation for developing low-overhead analyses that check and enforce concurrency correctness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Noll:2013:OFD, author = "Albert Noll and Thomas Gross", title = "Online feedback-directed optimizations for parallel {Java} code", journal = j-SIGPLAN, volume = "48", number = "10", pages = "713--728", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "OOPSLA '13 conference proceedings.", abstract = "The performance of parallel code significantly depends on the parallel task granularity (PTG). If the PTG is too coarse, performance suffers due to load imbalance; if the PTG is too fine, performance suffers from the overhead that is induced by parallel task creation and scheduling. This paper presents a software platform that automatically determines the PTG at run-time. Automatic PTG selection is enabled by concurrent calls, which are special source language constructs that provide a late decision (at run-time) of whether concurrent calls are executed sequentially or concurrently (as a parallel task). Furthermore, the execution semantics of concurrent calls permits the runtime system to merge two (or more) concurrent calls thereby coarsening the PTG. We present an integration of concurrent calls into the Java programming language, the Java Memory Model, and show how the Java Virtual Machine can adapt the PTG based on dynamic profiling. The performance evaluation shows that our runtime system performs competitively to Java programs for which the PTG is tuned manually. Compared to an unfortunate choice of the PTG, this approach performs up to 3x faster than standard Java code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Herhut:2013:RTP, author = "Stephan Herhut and Richard L. Hudson and Tatiana Shpeisman and Jaswanth Sreeram", title = "{River Trail}: a path to parallelism in {JavaScript}", journal = j-SIGPLAN, volume = "48", number = "10", pages = "729--744", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "JavaScript is the most popular language on the web and is a crucial component of HTML5 applications and services that run on consumer platforms ranging from desktops to phones. However, despite ample amount of hardware parallelism available to web applications on such platforms, JavaScript web applications remain predominantly sequential. Common parallel programming solutions accepted by other programming languages failed to transfer themselves to JavaScript due to differences in programming models, the additional requirements of the web and different developer expectations. In this paper we present River Trail --- a parallel programming model and API for JavaScript that provides safe, portable, programmer-friendly, deterministic parallelism to JavaScript applications. River Trail allows web applications to effectively utilize multiple cores, vector instructions, and GPUs on client platforms while allowing the web developer to remain within the environment of JavaScript. We describe the implementation of the River Trail compiler and runtime and present experimental results that show the impact of River Trail on performance and scalability for a variety of realistic HTML5 applications. Our experiments show that River Trail has a dramatic positive impact on overall performance and responsiveness of computationally intense JavaScript based applications achieving up to 33.6 times speedup for kernels and up to 11.8 times speedup for realistic web applications compared to sequential JavaScript. Moreover, River Trail enables new interactive web usages that are simply not even possible with standard sequential JavaScript.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bhattacharya:2013:CCI, author = "Suparna Bhattacharya and Kanchi Gopinath and Mangala Gowri Nanda", title = "Combining concern input with program analysis for bloat detection", journal = j-SIGPLAN, volume = "48", number = "10", pages = "745--764", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Framework based software tends to get bloated by accumulating optional features (or concerns ) just-in-case they are needed. The good news is that such feature bloat need not always cause runtime execution bloat. The bad news is that often enough, only a few statements from an optional concern may cause execution bloat that may result in as much as 50\% runtime overhead. We present a novel technique to analyze the connection between optional concerns and the potential sources of execution bloat induced by them. Our analysis automatically answers questions such as (1) whether a given set of optional concerns could lead to execution bloat and (2) which particular statements are the likely sources of bloat when those concerns are not required. The technique combines coarse grain concern input from an external source with a fine-grained static analysis. Our experimental evaluation highlights the effectiveness of such concern augmented program analysis in execution bloat assessment of ten programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2013:IMF, author = "Lingming Zhang and Lu Zhang and Sarfraz Khurshid", title = "Injecting mechanical faults to localize developer faults for evolving software", journal = j-SIGPLAN, volume = "48", number = "10", pages = "765--784", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509551", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "This paper presents a novel methodology for localizing faults in code as it evolves. Our insight is that the essence of failure-inducing edits made by the developer can be captured using mechanical program transformations (e.g., mutation changes). Based on the insight, we present the FIFL framework, which uses both the spectrum information of edits (obtained using the existing FaultTracer approach) as well as the potential impacts of edits (simulated by mutation changes) to achieve more accurate fault localization. We evaluate FIFL on real-world repositories of nine Java projects ranging from 5.7KLoC to 88.8KLoC. The experimental results show that FIFL is able to outperform the state-of-the-art FaultTracer technique for localizing failure-inducing program edits significantly. For example, all 19 FIFL strategies that use both the spectrum information and simulated impact information for each edit outperform the existing FaultTracer approach statistically at the significance level of 0.01. In addition, FIFL with its default settings outperforms FaultTracer by 2.33\% to 86.26\% on 16 of the 26 studied version pairs, and is only inferior than FaultTracer on one version pair.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Deng:2013:ECB, author = "Dongdong Deng and Wei Zhang and Shan Lu", title = "Efficient concurrency-bug detection across inputs", journal = j-SIGPLAN, volume = "48", number = "10", pages = "785--802", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "In the multi-core era, it is critical to efficiently test multi-threaded software and expose concurrency bugs before software release. Previous work has made significant progress in detecting and validating concurrency bugs under a given input. Unfortunately, software testing always faces large sets of test inputs, and existing techniques are still too expensive to be applied to every test input in practice. In this paper, we use open-source software to study how existing concurrency-bug detection tools work for a set of inputs. The study shows that an interleaving pattern, such as a data race or an atomicity violation, can often be exposed by many inputs. Consequently, existing bug detectors would inevitably waste their bug detection effort to generate duplicate bug reports, when applied to a set of inputs. Guided by the above study, we propose a coverage metric, Concurrent Function Pairs (CFP), to efficiently approximate how interleavings overlap across inputs. Using CFP, we have designed a new approach to detecting data races and atomicity-violation bugs for a set of inputs. Our evaluation on open-source C/C++ applications shows that our CFP-guided approach can effectively accelerate concurrency-bug detection for a set of inputs by reducing redundant detection effort across inputs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhong:2013:DAD, author = "Hao Zhong and Zhendong Su", title = "Detecting {API} documentation errors", journal = j-SIGPLAN, volume = "48", number = "10", pages = "803--816", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "When programmers encounter an unfamiliar API library, they often need to refer to its documentations, tutorials, or discussions on development forums to learn its proper usage. These API documents contain valuable information, but may also mislead programmers as they may contain errors ( e.g., broken code names and obsolete code samples). Although most API documents are actively maintained and updated, studies show that many new and latent errors do exist. It is tedious and error-prone to find such errors manually as API documents can be enormous with thousands of pages. Existing tools are ineffective in locating documentation errors because traditional natural language (NL) tools do not understand code names and code samples, and traditional code analysis tools do not understand NL sentences. In this paper, we propose the first approach, DOCREF, specifically designed and developed to detect API documentation errors. We formulate a class of inconsistencies to indicate potential documentation errors, and combine NL and code analysis techniques to detect and report such inconsistencies. We have implemented DOCREF and evaluated its effectiveness on the latest documentations of five widely-used API libraries. DOCREF has detected more than 1,000 new documentation errors, which we have reported to the authors. Many of the errors have already been confirmed and fixed, after we reported them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bao:2013:FDI, author = "Tao Bao and Xiangyu Zhang", title = "On-the-fly detection of instability problems in floating-point program execution", journal = j-SIGPLAN, volume = "48", number = "10", pages = "817--832", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "The machine representation of floating point values has limited precision such that errors may be introduced during execution. These errors may get propagated and magnified by the following operations, leading to instability problems, e.g., control flow path may be undesirably altered and faulty output may be emitted. In this paper, we develop an on-the-fly efficient monitoring technique that can predict if an execution is stable. The technique does not explicitly compute errors as doing so incurs high overhead. Instead, it detects possible places where an error becomes substantially inflated regarding the corresponding value, and then tags the value with one bit to denote that it has an inflated error. It then tracks inflation bit propagation, taking care of operations that may cut off such propagation. It reports instability if any inflation bit reaches a critical execution point, such as a predicate, where the inflated error may induce substantial execution difference, such as different execution paths. Our experiment shows that with appropriate thresholds, the technique can correctly detect that over 99.999996\% of the inputs of all the programs we studied are stable while a traditional technique relying solely on inflation detection mistakenly classifies majority of the inputs as unstable for some of the programs. Compared to the state of the art technique that is based on high precision computation and causes several hundred times slowdown, our technique only causes 7.91 times slowdown on average and can report all the true unstable executions with the appropriate thresholds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Coons:2013:BPO, author = "Katherine E. Coons and Madan Musuvathi and Kathryn S. McKinley", title = "Bounded partial-order reduction", journal = j-SIGPLAN, volume = "48", number = "10", pages = "833--848", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509556", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "Eliminating concurrency errors is increasingly important as systems rely more on parallelism for performance. Exhaustively exploring the state-space of a program's thread interleavings finds concurrency errors and provides coverage guarantees, but suffers from exponential state-space explosion. Two prior approaches alleviate state-space explosion. (1) Dynamic partial-order reduction (DPOR) provides full coverage and explores only one interleaving of independent transitions. (2) Bounded search provides bounded coverage by enumerating interleavings that do not exceed a bound. In particular, we focus on preemption-bounding. Combining partial-order reduction with preemption-bounding had remained an open problem. We show that preemption-bounded search explores the same partial orders repeatedly and consequently explores more executions than unbounded DPOR, even for small bounds. We further show that if DPOR simply uses the preemption bound to prune the state space as it explores new partial orders, it misses parts of the state space reachable in the bound and is therefore unsound. The bound essentially induces dependences between otherwise independent transitions in the DPOR state space. We introduce Bounded Partial Order Reduction (BPOR), a modification of DPOR that compensates for bound dependences. We identify properties that determine how well bounds combine with partial-order reduction. We prove sound coverage and empirically evaluate BPOR with preemption and fairness bounds. We show that by eliminating redundancies, BPOR significantly reduces testing time compared to bounded search. BPOR's faster incremental guarantees will help testers verify larger concurrent programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Mitchell:2013:FCP, author = "Nick Mitchell and Peter F. Sweeney", title = "On-the-fly capacity planning", journal = j-SIGPLAN, volume = "48", number = "10", pages = "849--866", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509540", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "When resolving performance problems, a simple histogram of hot call stacks does not cut it, especially given the highly fluid nature of modern deployments. Why bother tuning, when adding a few CPUs via the management console will quickly resolve the problem? The findings of these tools are also presented without any sense of context: e.g. string conversion may be expensive, but only matters if it contributes greatly to the response time of user logins. Historically, these concerns have been the purview of capacity planning. The power of planners lies in their ability to weigh demand versus capacity, and to do so in terms of the important units of work in the application (such as user logins). Unfortunately, they rely on measurements of rates and latencies, and both quantities are difficult to obtain. Even if possible, when all is said and done, these planners only relate to the code as a black-box: but, why bother adding CPUs, when easy code changes will fix the problem? We present a way to do planning on-the-fly: with a few call stack samples taken from an already-running system, we predict the benefit of a proposed tuning plan. We accomplish this by simulating the effect of a tuning action upon execution speed and the way it shifts resource demand. To identify existing problems, we show how to generate tuning actions automatically, guided by the desire to maximize speedup without needless expense, and that these generated plans may span resource and code changes. We show that it is possible to infer everything needed from these samples alone: levels of resource demand and the units of work in the application. We evaluate our planner on a suite of microbenchmarks and a suite of 15,000 data sets that come from real applications running in the wild.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Vafeiadis:2013:RSL, author = "Viktor Vafeiadis and Chinmay Narayan", title = "Relaxed separation logic: a program logic for {C11} concurrency", journal = j-SIGPLAN, volume = "48", number = "10", pages = "867--884", month = oct, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2544173.2509532", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 09:19:33 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "OOPSLA '13 conference proceedings.", abstract = "We introduce relaxed separation logic (RSL), the first program logic for reasoning about concurrent programs running under the C11 relaxed memory model. From a user's perspective, RSL is an extension of concurrent separation logic (CSL) with proof rules for the various kinds of C11 atomic accesses. As in CSL, individual threads are allowed to access non-atomically only the memory that they own, thus preventing data races. Ownership can, however, be transferred via certain atomic accesses. For SC-atomic accesses, we permit arbitrary ownership transfer; for acquire/release atomic accesses, we allow ownership transfer only in one direction; whereas for relaxed atomic accesses, we rule out ownership transfer completely. We illustrate RSL with a few simple examples and prove its soundness directly over the axiomatic C11 weak memory model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Petrank:2013:SFA, author = "Erez Petrank", title = "Safety-first approach to memory consistency models", journal = j-SIGPLAN, volume = "48", number = "11", pages = "1--2", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466479", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Reames:2013:THC, author = "Philip Reames and George Necula", title = "Towards hinted collection: annotations for decreasing garbage collector pause times", journal = j-SIGPLAN, volume = "48", number = "11", pages = "3--14", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2464158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "Garbage collection is widely used and has largely been a boon for programmer productivity. However, traditional garbage collection is approaching both practical and theoretical performance limits. In practice, the maximum heap size and heap structure of large applications are influenced as much by garbage collector behavior as by resource availability. We present an alternate approach to garbage collection wherein the programmer provides untrusted deallocation hints. Usage of deallocation hints is similar to trusted manual deallocation, but the consequence of an inaccurate hint is lost performance not correctness. Our hinted collector algorithm uses these hints to identify a subset of unreachable objects with both better parallel asymptotic complexity and practical performance. On some benchmarks, our prototype collector implementation achieves 10-20\% pause time reductions. We close with a discussion of the design trade-offs inherent in our approach and lessons to be learned from our collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Morikawa:2013:ASR, author = "Kazuya Morikawa and Tomoharu Ugawa and Hideya Iwasaki", title = "Adaptive scanning reduces sweep time for the {Lisp2} mark-compact garbage collector", journal = j-SIGPLAN, volume = "48", number = "11", pages = "15--26", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466480", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "Mark-compact garbage collection helps long-running programs avoid fragmentation. The Lisp2 mark-compact collector is a classic but still widely-used compaction algorithm. It sequentially scans the entire heap to compact all live objects at one end of the heap while preserving their order of addresses. Since the heap is generally large, this scanning takes a long time. Although some collectors adopt a separate bitmap into which mark bits of objects are stored to reduce the scanning time, we observed that scanning the bitmap can take longer than scanning the heap if objects are densely located. We propose a new scanning method from this observation, which adaptively alternates methods of scanning depending on heap usage; it scans those parts of the heap where live objects are densely located whereas it scans the bitmap for the remaining parts. We implemented this scanning method in the Lisp2 collector of Jikes RVM. The experimental results revealed that the adaptive scanner scanned faster than the method that only scanned the heap and the method that only scanned the bitmap.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{White:2013:CTP, author = "David R. White and Jeremy Singer and Jonathan M. Aitken and Richard E. Jones", title = "Control theory for principled heap sizing", journal = j-SIGPLAN, volume = "48", number = "11", pages = "27--38", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466481", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "ISMM '13 conference proceedings.", abstract = "We propose a new, principled approach to adaptive heap sizing based on control theory. We review current state-of-the-art heap sizing mechanisms, as deployed in Jikes RVM and HotSpot. We then formulate heap sizing as a control problem, apply and tune a standard controller algorithm, and evaluate its performance on a set of well-known benchmarks. We find our controller adapts the heap size more responsively than existing mechanisms. This responsiveness allows tighter virtual machine memory footprints while preserving target application throughput, which is ideal for both embedded and utility computing domains. In short, we argue that formal, systematic approaches to memory management should be replacing ad-hoc heuristics as the discipline matures. Control-theoretic heap sizing is one such systematic approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Brock:2013:PPA, author = "Jacob Brock and Xiaoming Gu and Bin Bao and Chen Ding", title = "{Pacman}: program-assisted cache management", journal = j-SIGPLAN, volume = "48", number = "11", pages = "39--50", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466482", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "As caches become larger and shared by an increasing number of cores, cache management is becoming more important. This paper explores collaborative caching, which uses software hints to influence hardware caching. Recent studies have shown that such collaboration between software and hardware can theoretically achieve optimal cache replacement on LRU-like cache. This paper presents Pacman, a practical solution for collaborative caching in loop-based code. Pacman uses profiling to analyze patterns in an optimal caching policy in order to determine which data to cache and at what time. It then splits each loop into different parts at compile time. At run time, the loop boundary is adjusted to selectively store data that would be stored in an optimal policy. In this way, Pacman emulates the optimal policy wherever it can. Pacman requires a single bit at the load and store instructions. Some of the current hardware has partial support. This paper presents results using both simulated and real systems, and compares simulated results to related caching policies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wang:2013:GSE, author = "Yan Wang and Iulian Neamtiu and Rajiv Gupta", title = "Generating sound and effective memory debuggers", journal = j-SIGPLAN, volume = "48", number = "11", pages = "51--62", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2464159", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "We present a new approach for constructing debuggers based on declarative specification of bug conditions and root causes, and automatic generation of debugger code. We illustrate our approach on several classes of bugs, memory or otherwise. For each bug class, bug conditions and their root cause are specified declaratively, in First-order logic, using 1 to 4 predicates. We employ a low-level operational semantics and abstract traces to permit concise bug specification and prove soundness. To facilitate locating bugs, we introduce a new concept of value propagation chains that reduce programmer burden by narrowing the fault to a handful of executed instructions (1 to 16 in our experiments). We employ automatic translation to generate the debugger implementation, which runs on top of the Pin infrastructure. Experiments with using our system on 7 versions of 4 real-world programs show that our approach is expressive, effective at finding bugs and their causes, and efficient. We believe that, using our approach, other kinds of declaratively-specified, provably-correct, auto-generated debuggers can be constructed with little effort.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kalibera:2013:RBR, author = "Tomas Kalibera and Richard Jones", title = "Rigorous benchmarking in reasonable time", journal = j-SIGPLAN, volume = "48", number = "11", pages = "63--74", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2464160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", note = "ISMM '13 conference proceedings.", abstract = "Experimental evaluation is key to systems research. Because modern systems are complex and non-deterministic, good experimental methodology demands that researchers account for uncertainty. To obtain valid results, they are expected to run many iterations of benchmarks, invoke virtual machines (VMs) several times, or even rebuild VM or benchmark binaries more than once. All this repetition costs time to complete experiments. Currently, many evaluations give up on sufficient repetition or rigorous statistical methods, or even run benchmarks only in training sizes. The results reported often lack proper variation estimates and, when a small difference between two systems is reported, some are simply unreliable. In contrast, we provide a statistically rigorous methodology for repetition and summarising results that makes efficient use of experimentation time. Time efficiency comes from two key observations. First, a given benchmark on a given platform is typically prone to much less non-determinism than the common worst-case of published corner-case studies. Second, repetition is most needed where most uncertainty arises (whether between builds, between executions or between iterations). We capture experimentation cost with a novel mathematical model, which we use to identify the number of repetitions at each level of an experiment necessary and sufficient to obtain a given level of precision. We present our methodology as a cookbook that guides researchers on the number of repetitions they should run to obtain reliable results. We also show how to present results with an effect size confidence interval. As an example, we show how to use our methodology to conduct throughput experiments with the DaCapo and SPEC CPU benchmarks on three recent platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Aigner:2013:ATU, author = "Martin Aigner and Christoph M. Kirsch", title = "{ACDC}: towards a universal mutator for benchmarking heap management systems", journal = j-SIGPLAN, volume = "48", number = "11", pages = "75--84", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2464161", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "We present ACDC, an open-source benchmark that may be configured to emulate explicit single- and multi-threaded memory allocation, sharing, access, and deallocation behavior to expose virtually any relevant allocator performance differences. ACDC mimics periodic memory allocation and deallocation (AC) as well as persistent memory (DC). Memory may be allocated thread-locally and shared among multiple threads to study multicore scalability and even false sharing. Memory may be deallocated by threads other than the allocating threads to study blowup memory fragmentation. Memory may be accessed and deallocated sequentially in allocation order or in tree-like traversals to expose allocator deficiencies in exploiting spatial locality. We demonstrate ACDC's capabilities with seven state-of-the-art allocators for C/C++ in an empirical study which also reveals interesting performance differences between the allocators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2013:PSC, author = "Lian Li and Cristina Cifuentes and Nathan Keynes", title = "Precise and scalable context-sensitive pointer analysis via value flow graph", journal = j-SIGPLAN, volume = "48", number = "11", pages = "85--96", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466483", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "In this paper, we propose a novel method for context-sensitive pointer analysis using the value flow graph (VFG) formulation. We achieve context-sensitivity by simultaneously applying function cloning and computing context-free language reachability (CFL-reachability) in a novel way. In contrast to existing clone-based and CFL-based approaches, flow-sensitivity is easily integrated in our approach by using a flow-sensitive VFG where each value flow edge is computed in a flow-sensitive manner. We apply context-sensitivity to both local variables and heap objects and propose a new approximation for heap cloning. We prove that our approach can achieve context-sensitivity without loss of precision, i.e., it is as precise as inlining all function calls. We develop an efficient algorithm and implement a context-, flow-, and field-sensitive pointer analysis with heap cloning support in LLVM. We evaluate the efficiency and precision of our implementation using standard SPEC CPU2006 benchmarks. Our experimental results show that the analysis is much faster than existing approaches, it scales well to large real-world applications, and it enables more effective compiler optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ravitch:2013:AMO, author = "Tristan Ravitch and Ben Liblit", title = "Analyzing memory ownership patterns in {C} libraries", journal = j-SIGPLAN, volume = "48", number = "11", pages = "97--108", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2464162", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "Programs written in multiple languages are known as polyglot programs. In part due to the proliferation of new and productive high-level programming languages, these programs are becoming more common in environments that must interoperate with existing systems. Polyglot programs must manage resource lifetimes across language boundaries. Resource lifetime management bugs can lead to leaks and crashes, which are more difficult to debug in polyglot programs than monoglot programs. We present analyses to automatically infer the ownership semantics of C libraries. The results of these analyses can be used to generate bindings to C libraries that intelligently manage resources, to check the correctness of polyglot programs, and to document the interfaces of C libraries. While these analyses are unsound and incomplete, we demonstrate that they significantly reduce the manual annotation burden for a suite of fifteen open source libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ricci:2013:ETP, author = "Nathan P. Ricci and Samuel Z. Guyer and J. Eliot B. Moss", title = "{Elephant Tracks}: portable production of complete and precise {GC} traces", journal = j-SIGPLAN, volume = "48", number = "11", pages = "109--118", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466484", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "We present Elephant Tracks (ET), a dynamic program analysis tool for Java that produces detailed traces of garbage collection-related events, including object allocations, object deaths, and pointer updates. Like prior work, our tracing tool is based on the Merlin algorithm [6,7], but offers several substantial new capabilities. First, it is much more precise than previous tools: it traces method entries and exits and measures time in terms of them, allowing it to place events precisely in the context of the program structure. Second, it is implemented using a combination of JVM Tool Interface (JVMTI)[13] callbacks and bytecode rewriting, and works with any standard JVM. Finally, it produces complete traces, including weak references, events from the Java Native Interface and sun.misc.Unsafe, and VM start up objects. In this paper we also explore the general design space of tracing tools, and carefully define the execution model that the traces represent.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bu:2013:BAD, author = "Yingyi Bu and Vinayak Borkar and Guoqing Xu and Michael J. Carey", title = "A bloat-aware design for big data applications", journal = j-SIGPLAN, volume = "48", number = "11", pages = "119--130", month = nov, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2555670.2466485", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 9 08:04:34 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "ISMM '13 conference proceedings.", abstract = "Over the past decade, the increasing demands on data-driven business intelligence have led to the proliferation of large-scale, data-intensive applications that often have huge amounts of data (often at terabyte or petabyte scale) to process. An object-oriented programming language such as Java is often the developer's choice for implementing such applications, primarily due to its quick development cycle and rich community resource. While the use of such languages makes programming easier, significant performance problems can often be seen --- the combination of the inefficiencies inherent in a managed run-time system and the impact of the huge amount of data to be processed in the limited memory space often leads to memory bloat and performance degradation at a surprisingly early stage. This paper proposes a bloat-aware design paradigm towards the development of efficient and scalable Big Data applications in object-oriented GC enabled languages. To motivate this work, we first perform a study on the impact of several typical memory bloat patterns. These patterns are summarized from the user complaints on the mailing lists of two widely-used open-source Big Data applications. Next, we discuss our design paradigm to eliminate bloat. Using examples and real-world experience, we demonstrate that programming under this paradigm does not incur significant programming burden. We have implemented a few common data processing tasks both using this design and using the conventional object-oriented design. Our experimental results show that this new design paradigm is extremely effective in improving performance --- even for the moderate-size data sets processed, we have observed 2.5x+ performance gains, and the improvement grows substantially with the size of the data set.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ankner:2013:EAH, author = "Johan Ankner and Josef David Svenningsson", title = "An {EDSL} approach to high performance {Haskell} programming", journal = j-SIGPLAN, volume = "48", number = "12", pages = "1--12", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503789", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "This paper argues for a new methodology for writing high performance Haskell programs by using Embedded Domain Specific Languages. We exemplify the methodology by describing a complete library, meta-repa, which is a reimplementation of parts of the repa library. The paper describes the implementation of meta-repa and contrasts it with the standard approach to writing high performance libraries. We conclude that even though the embedded language approach has an initial cost of defining the language and some syntactic overhead it gives a more tailored programming model, stronger performance guarantees, better control over optimizations, simpler implementation of fusion and inlining and allows for moving type level programming down to value level programming in some cases. We also provide benchmarks showing that meta-repa is as fast, or faster, than repa. Furthermore, meta-repa also includes push arrays and we demonstrate their usefulness for writing certain high performance kernels such as FFT.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bernardy:2013:NFP, author = "Jean-Philippe Bernardy and Nicolas Pouillard", title = "Names for free: polymorphic views of names and binders", journal = j-SIGPLAN, volume = "48", number = "12", pages = "13--24", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503780", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "We propose a novel technique to represent names and binders in Haskell. The dynamic (run-time) representation is based on de Bruijn indices, but it features an interface to write and manipulate variables conveniently, using Haskell-level lambdas and variables. The key idea is to use rich types: a subterm with an additional free variable is viewed either as $ \forall \nu . \nu \to {\rm Term}(a + \nu) $ or $ \exists \nu . \nu \times {\rm Term}(a + v) $ depending on whether it is constructed or analysed. We demonstrate on a number of examples how this approach permits to express term construction and manipulation in a natural way, while retaining the good properties of representations based on de Bruijn indices.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bird:2013:UIT, author = "Richard Bird and Jeremy Gibbons and Stefan Mehner and Janis Voigtl{\"a}nder and Tom Schrijvers", title = "Understanding idiomatic traversals backwards and forwards", journal = j-SIGPLAN, volume = "48", number = "12", pages = "25--36", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503781", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "We present new ways of reasoning about a particular class of effectful Haskell programs, namely those expressed as idiomatic traversals. Starting out with a specific problem about labelling and unlabelling binary trees, we extract a general inversion law, applicable to any monad, relating a traversal over the elements of an arbitrary traversable type to a traversal that goes in the opposite direction. This law can be invoked to show that, in a suitable sense, unlabelling is the inverse of labelling. The inversion law, as well as a number of other properties of idiomatic traversals, is a corollary of a more general theorem characterising traversable functors as finitary containers: an arbitrary traversable object can be decomposed uniquely into shape and contents, and traversal be understood in terms of those. Proof of the theorem involves the properties of traversal in a special idiom related to the free applicative functor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Blazevic:2013:ASM, author = "Mario Bla{\v{z}}evi{\'c}", title = "Adding structure to monoids: thus hopefully ending {Haskell}'s string type confusion", journal = j-SIGPLAN, volume = "48", number = "12", pages = "37--46", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503785", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "This paper presents the rationale and design of monoid-subclasses. This Haskell library consists of a collection of type classes that generalize the interface of several common data types, most importantly those used to represent strings. We demonstrate that the mathematical theory behind monoid-subclasses can bring substantial practical benefits to the Haskell library ecosystem by generalizing attoparsec, one of the most popular Haskell parsing libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Claessen:2013:SPN, author = "Koen Claessen and Michal H. Palka", title = "Splittable pseudorandom number generators using cryptographic hashing", journal = j-SIGPLAN, volume = "48", number = "12", pages = "47--58", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503784", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "We propose a new splittable pseudorandom number generator (PRNG) based on a cryptographic hash function. Splittable PRNGs, in contrast to linear PRNGs, allow the creation of two (seemingly) independent generators from a given random number generator. Splittable PRNGs are very useful for structuring purely functional programs, as they avoid the need for threading around state. We show that the currently known and used splittable PRNGs are either not efficient enough, have inherent flaws, or lack formal arguments about their randomness. In contrast, our proposed generator can be implemented efficiently, and comes with a formal statements and proofs that quantify how 'random' the results are that are generated. The provided proofs give strong randomness guarantees under assumptions commonly made in cryptography.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kiselyov:2013:EEA, author = "Oleg Kiselyov and Amr Sabry and Cameron Swords", title = "Extensible effects: an alternative to monad transformers", journal = j-SIGPLAN, volume = "48", number = "12", pages = "59--70", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503791", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "We design and implement a library that solves the long-standing problem of combining effects without imposing restrictions on their interactions (such as static ordering). Effects arise from interactions between a client and an effect handler (interpreter); interactions may vary throughout the program and dynamically adapt to execution conditions. Existing code that relies on monad transformers may be used with our library with minor changes, gaining efficiency over long monad stacks. In addition, our library has greater expressiveness, allowing for practical idioms that are inefficient, cumbersome, or outright impossible with monad transformers. Our alternative to a monad transformer stack is a single monad, for the coroutine-like communication of a client with its handler. Its type reflects possible requests, i.e., possible effects of a computation. To support arbitrary effects and their combinations, requests are values of an extensible union type, which allows adding and, notably, subtracting summands. Extending and, upon handling, shrinking of the union of possible requests is reflected in its type, yielding a type-and-effect system for Haskell. The library is lightweight, generalizing the extensible exception handling to other effects and accurately tracking them in types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Leslie-Hurd:2013:MVS, author = "Joe Leslie-Hurd", title = "Maintaining verified software", journal = j-SIGPLAN, volume = "48", number = "12", pages = "71--80", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503787", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "Maintaining software in the face of evolving dependencies is a challenging problem, and in addition to good release practices there is a need for automatic dependency analysis tools to avoid errors creeping in. Verified software reveals more semantic information in the form of mechanized proofs of functional specifications, and this can be used for dependency analysis. In this paper we present a scheme for automatic dependency analysis of verified software, which for each program checks that the collection of installed libraries is sufficient to guarantee its functional correctness. We illustrate the scheme with a case study of Haskell packages verified in higher order logic. The dependency analysis reduces the burden of maintaining verified Haskell packages by automatically computing version ranges for the packages they depend on, such that any combination provides the functionality required for correct operation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lindley:2013:HPP, author = "Sam Lindley and Conor McBride", title = "{Hasochism}: the pleasure and pain of dependently typed {Haskell} programming", journal = j-SIGPLAN, volume = "48", number = "12", pages = "81--92", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503786", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "Haskell's type system has outgrown its Hindley-Milner roots to the extent that it now stretches to the basics of dependently typed programming. In this paper, we collate and classify techniques for programming with dependent types in Haskell, and contribute some new ones. In particular, through extended examples --- merge-sort and rectangular tilings --- we show how to exploit Haskell's constraint solver as a theorem prover, delivering code which, as Agda programmers, we envy. We explore the compromises involved in simulating variations on the theme of the dependent function space in an attempt to help programmers put dependent types to work, and to inform the evolving language design both of Haskell and of dependently typed languages more broadly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lippmeier:2013:DFF, author = "Ben Lippmeier and Manuel M. T. Chakravarty and Gabriele Keller and Amos Robinson", title = "Data flow fusion with series expressions in {Haskell}", journal = j-SIGPLAN, volume = "48", number = "12", pages = "93--104", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503782", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "Existing approaches to array fusion can deal with straight-line producer consumer pipelines, but cannot fuse branching data flows where a generated array is consumed by several different consumers. Branching data flows are common and natural to write, but a lack of fusion leads to the creation of an intermediate array at every branch point. We present a new array fusion system that handles branches, based on Waters's series expression framework, but extended to work in a functional setting. Our system also solves a related problem in stream fusion, namely the introduction of duplicate loop counters. We demonstrate speedup over existing fusion systems for several key examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2013:ILH, author = "Hai Liu and Neal Glew and Leaf Petersen and Todd A. Anderson", title = "The {Intel} labs {Haskell} research compiler", journal = j-SIGPLAN, volume = "48", number = "12", pages = "105--116", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503779", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "The Glasgow Haskell Compiler (GHC) is a well supported optimizing compiler for the Haskell programming language, along with its own extensions to the language and libraries. Haskell's lazy semantics imposes a runtime model which is in general difficult to implement efficiently. GHC achieves good performance across a wide variety of programs via aggressive optimization taking advantage of the lack of side effects, and by targeting a carefully tuned virtual machine. The Intel Labs Haskell Research Compiler uses GHC as a frontend, but provides a new whole-program optimizing backend by compiling the GHC intermediate representation to a relatively generic functional language compilation platform. We found that GHC's external Core language was relatively easy to use, but reusing GHC's libraries and achieving full compatibility were harder. For certain classes of programs, our platform provides substantial performance benefits over GHC alone, performing $ 2 \times $ faster than GHC with the LLVM backend on selected modern performance-oriented benchmarks; for other classes of programs, the benefits of GHC's tuned virtual machine continue to outweigh the benefits of more aggressive whole program optimization. Overall we achieve parity with GHC with the LLVM backend. In this paper, we describe our Haskell compiler stack, its implementation and optimization approach, and present benchmark results comparing it to GHC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{vanderPloeg:2013:MFR, author = "Atze van der Ploeg", title = "Monadic functional reactive programming", journal = j-SIGPLAN, volume = "48", number = "12", pages = "117--128", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503783", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "Functional Reactive Programming (FRP) is a way to program reactive systems in functional style, eliminating many of the problems that arise from imperative techniques. In this paper, we present an alternative FRP formulation that is based on the notion of a reactive computation: a monadic computation which may require the occurrence of external events to continue. A signal computation is a reactive computation that may also emit values. In contrast to signals in other FRP formulations, signal computations can end, leading to a monadic interface for sequencing signal phases. This interface has several advantages: routing is implicit, sequencing signal phases is easier and more intuitive than when using the switching combinators found in other FRP approaches, and dynamic lists require much less boilerplate code. In other FRP approaches, either the entire FRP expression is re-evaluated on each external stimulus, or impure techniques are used to prevent redundant re-computations. We show how Monadic FRP can be implemented straightforwardly in a purely functional way while preventing redundant re-computations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Voellmy:2013:MHP, author = "Andreas Richard Voellmy and Junchang Wang and Paul Hudak and Kazuhiko Yamamoto", title = "{Mio}: a high-performance multicore {IO} manager for {GHC}", journal = j-SIGPLAN, volume = "48", number = "12", pages = "129--140", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503790", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "Haskell threads provide a key, lightweight concurrency abstraction to simplify the programming of important network applications such as web servers and software-defined network (SDN) controllers. The flagship Glasgow Haskell Compiler (GHC) introduces a run-time system (RTS) to achieve a high-performance multicore implementation of Haskell threads, by introducing effective components such as a multicore scheduler, a parallel garbage collector, an IO manager, and efficient multicore memory allocation. Evaluations of the GHC RTS, however, show that it does not scale well on multicore processors, leading to poor performance of many network applications that try to use lightweight Haskell threads. In this paper, we show that the GHC IO manager, which is a crucial component of the GHC RTS, is the scaling bottleneck. Through a series of experiments, we identify key data structure, scheduling, and dispatching bottlenecks of the GHC IO manager. We then design a new multicore IO manager named Mio that eliminates all these bottlenecks. Our evaluations show that the new Mio manager improves realistic web server throughput by 6.5x and reduces expected web server response time by 5.7x. We also show that with Mio, McNettle (an SDN controller written in Haskell) can scale effectively to 40+ cores, reach a throughput of over 20 million new requests per second on a single machine, and hence become the fastest of all existing SDN controllers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Wortmann:2013:COH, author = "Peter M. Wortmann and David Duke", title = "Causality of optimized {Haskell}: what is burning our cycles?", journal = j-SIGPLAN, volume = "48", number = "12", pages = "141--152", month = dec, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578854.2503788", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "Haskell '14 conference proceedings.", abstract = "Profiling real-world Haskell programs is hard, as compiler optimizations make it tricky to establish causality between the source code and program behavior. In this paper we attack the root issue by performing a causality analysis of functional programs under optimization. We apply our findings to build a novel profiling infrastructure on top of the Glasgow Haskell Compiler, allowing for performance analysis even of aggressively optimized programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Birkedal:2014:MRA, author = "Lars Birkedal", title = "Modular reasoning about concurrent higher-order imperative programs", journal = j-SIGPLAN, volume = "49", number = "1", pages = "1--1", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2537849", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cousot:2014:GCC, author = "Patrick` Cousot and Radhia Cousot", title = "A {Galois} connection calculus for abstract interpretation", journal = j-SIGPLAN, volume = "49", number = "1", pages = "3--4", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2537850", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We introduce a Galois connection calculus for language independent specification of abstract interpretations used in programming language semantics, formal verification, and static analysis. This Galois connection calculus and its type system are typed by abstract interpretation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Castagna:2014:PFS, author = "Giuseppe Castagna and Kim Nguyen and Zhiwu Xu and Hyeonseung Im and Sergue{\"\i} Lenglet and Luca Padovani", title = "Polymorphic functions with set-theoretic types: part 1: syntax, semantics, and evaluation", journal = j-SIGPLAN, volume = "49", number = "1", pages = "5--17", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535840", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "This article is the first part of a two articles series about a calculus with higher-order polymorphic functions, recursive types with arrow and product type constructors and set-theoretic type connectives (union, intersection, and negation). In this first part we define and study the explicitly-typed version of the calculus in which type instantiation is driven by explicit instantiation annotations. In particular, we define an explicitly-typed lambda-calculus with intersection types and an efficient evaluation model for it. In the second part, presented in a companion paper, we define a local type inference system that allows the programmer to omit explicit instantiation annotations, and a type reconstruction system that allows the programmer to omit explicit type annotations. The work presented in the two articles provides the theoretical foundations and technical machinery needed to design and implement higher-order polymorphic functional languages for semi-structured data.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kilpatrick:2014:BRH, author = "Scott Kilpatrick and Derek Dreyer and Simon Peyton Jones and Simon Marlow", title = "{Backpack}: retrofitting {Haskell} with interfaces", journal = j-SIGPLAN, volume = "49", number = "1", pages = "19--31", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535884", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Module systems like that of Haskell permit only a weak form of modularity in which module implementations depend directly on other implementations and must be processed in dependency order. Module systems like that of ML, on the other hand, permit a stronger form of modularity in which explicit interfaces express assumptions about dependencies, and each module can be typechecked and reasoned about independently. In this paper, we present Backpack, a new language for building separately-typecheckable *packages* on top of a weak module system like Haskell's. The design of Backpack is inspired by the MixML module calculus of Rossberg and Dreyer, but differs significantly in detail. Like MixML, Backpack supports explicit interfaces and recursive linking. Unlike MixML, Backpack supports a more flexible applicative semantics of instantiation. Moreover, its design is motivated less by foundational concerns and more by the practical concern of integration into Haskell, which has led us to advocate simplicity --- in both the syntax and semantics of Backpack --- over raw expressive power. The semantics of Backpack packages is defined by elaboration to sets of Haskell modules and binary interface files, thus showing how Backpack maintains interoperability with Haskell while extending it with separate typechecking. Lastly, although Backpack is geared toward integration into Haskell, its design and semantics are largely agnostic with respect to the details of the underlying core language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Casinghino:2014:CPP, author = "Chris Casinghino and Vilhelm Sj{\"o}berg and Stephanie Weirich", title = "Combining proofs and programs in a dependently typed language", journal = j-SIGPLAN, volume = "49", number = "1", pages = "33--45", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535883", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Most dependently-typed programming languages either require that all expressions terminate (e.g. Coq, Agda, and Epigram), or allow infinite loops but are inconsistent when viewed as logics (e.g. Haskell, ATS, $ \Omega $ ). Here, we combine these two approaches into a single dependently-typed core language. The language is composed of two fragments that share a common syntax and overlapping semantics: a logic that guarantees total correctness, and a call-by-value programming language that guarantees type safety but not termination. The two fragments may interact: logical expressions may be used as programs; the logic may soundly reason about potentially nonterminating programs; programs can require logical proofs as arguments; and ``mobile'' program values, including proofs computed at runtime, may be used as evidence by the logic. This language allows programmers to work with total and partial functions uniformly, providing a smooth path from functional programming to dependently-typed programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dissegna:2014:TCA, author = "Stefano Dissegna and Francesco Logozzo and Francesco Ranzato", title = "Tracing compilation by abstract interpretation", journal = j-SIGPLAN, volume = "49", number = "1", pages = "47--59", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535866", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Tracing just-in-time compilation is a popular compilation schema for the efficient implementation of dynamic languages, which is commonly used for JavaScript, Python, and PHP. It relies on two key ideas. First, it monitors the execution of the program to detect so-called hot paths, i.e., the most frequently executed paths. Then, it uses some store information available at runtime to optimize hot paths. The result is a residual program where the optimized hot paths are guarded by sufficient conditions ensuring the equivalence of the optimized path and the original program. The residual program is persistently mutated during its execution, e.g., to add new optimized paths or to merge existing paths. Tracing compilation is thus fundamentally different than traditional static compilation. Nevertheless, despite the remarkable practical success of tracing compilation, very little is known about its theoretical foundations. We formalize tracing compilation of programs using abstract interpretation. The monitoring (viz., hot path detection) phase corresponds to an abstraction of the trace semantics that captures the most frequent occurrences of sequences of program points together with an abstraction of their corresponding stores, e.g., a type environment. The optimization (viz., residual program generation) phase corresponds to a transform of the original program that preserves its trace semantics up to a given observation as modeled by some abstraction. We provide a generic framework to express dynamic optimizations and to prove them correct. We instantiate it to prove the correctness of dynamic type specialization. We show that our framework is more general than a recent model of tracing compilation introduced in POPL~2011 by Guo and Palsberg (based on operational bisimulations). In our model we can naturally express hot path reentrance and common optimizations like dead-store elimination, which are either excluded or unsound in Guo and Palsberg's framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ramsay:2014:TDA, author = "Steven J. Ramsay and Robin P. Neatherway and C.-H. Luke Ong", title = "A type-directed abstraction refinement approach to higher-order model checking", journal = j-SIGPLAN, volume = "49", number = "1", pages = "61--72", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535873", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "The trivial-automaton model checking problem for higher-order recursion schemes has become a widely studied object in connection with the automatic verification of higher-order programs. The problem is formidably hard: despite considerable progress in recent years, no decision procedures have been demonstrated to scale robustly beyond recursion schemes that comprise more than a few hundred rewrite rules. We present a new, fixed-parameter polynomial time algorithm, based on a novel, type directed form of abstraction refinement in which behaviours of a scheme are distinguished by the abstraction according to the intersection types that they inhabit (the properties that they satisfy). Unlike other intersection type approaches, our algorithm reasons both about acceptance by the property automaton and acceptance by its dual, simultaneously, in order to minimize the amount of work done by converging on the solution to a problem instance from both sides. We have constructed Preface, a prototype implementation of the algorithm, and assembled an extensive body of evidence to demonstrate empirically that the algorithm readily scales to recursion schemes of several thousand rules, well beyond the capabilities of current state-of-the-art higher-order model checkers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Coughlin:2014:FTA, author = "Devin Coughlin and Bor-Yuh Evan Chang", title = "Fissile type analysis: modular checking of almost everywhere invariants", journal = j-SIGPLAN, volume = "49", number = "1", pages = "73--85", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535855", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We present a generic analysis approach to the imperative relationship update problem, in which destructive updates temporarily violate a global invariant of interest. Such invariants can be conveniently and concisely specified with dependent refinement types, which are efficient to check flow-insensitively. Unfortunately, while traditional flow-insensitive type checking is fast, it is inapplicable when the desired invariants can be temporarily broken. To overcome this limitation, past works have directly ratcheted up the complexity of the type analysis and associated type invariants, leading to inefficient analysis and verbose specifications. In contrast, we propose a generic lifting of modular refinement type analyses with a symbolic analysis to efficiently and effectively check concise invariants that hold almost everywhere. The result is an efficient, highly modular flow-insensitive type analysis to optimistically check the preservation of global relationship invariants that can fall back to a precise, disjunctive symbolic analysis when the optimistic assumption is violated. This technique permits programmers to temporarily break and then re-establish relationship invariants--a flexibility that is crucial for checking relationships in real-world, imperative languages. A significant challenge is selectively violating the global type consistency invariant over heap locations, which we achieve via almost type-consistent heaps. To evaluate our approach, we have encoded the problem of verifying the safety of reflective method calls in dynamic languages as a refinement type checking problem. Our analysis is capable of validating reflective call safety at interactive speeds on commonly-used Objective-C libraries and applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bodin:2014:TMJ, author = "Martin Bodin and Arthur Chargueraud and Daniele Filaretti and Philippa Gardner and Sergio Maffeis and Daiva Naudziuniene and Alan Schmitt and Gareth Smith", title = "A trusted mechanised {JavaScript} specification", journal = j-SIGPLAN, volume = "49", number = "1", pages = "87--100", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535876", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "JavaScript is the most widely used web language for client-side applications. Whilst the development of JavaScript was initially just led by implementation, there is now increasing momentum behind the ECMA standardisation process. The time is ripe for a formal, mechanised specification of JavaScript, to clarify ambiguities in the ECMA standards, to serve as a trusted reference for high-level language compilation and JavaScript implementations, and to provide a platform for high-assurance proofs of language properties. We present JSCert, a formalisation of the current ECMA standard in the Coq proof assistant, and JSRef, a reference interpreter for JavaScript extracted from Coq to OCaml. We give a Coq proof that JSRef is correct with respect to JSCert and assess JSRef using test262, the ECMA conformance test suite. Our methodology ensures that JSCert is a comparatively accurate formulation of the English standard, which will only improve as time goes on. We have demonstrated that modern techniques of mechanised specification can handle the complexity of JavaScript.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Krebbers:2014:OAS, author = "Robbert Krebbers", title = "An operational and axiomatic semantics for non-determinism and sequence points in {C}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "101--112", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535878", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "The C11 standard of the C programming language does not specify the execution order of expressions. Besides, to make more effective optimizations possible (e.g., delaying of side-effects and interleaving), it gives compilers in certain cases the freedom to use even more behaviors than just those of all execution orders. Widely used C compilers actually exploit this freedom given by the C standard for optimizations, so it should be taken seriously in formal verification. This paper presents an operational and axiomatic semantics (based on separation logic) for non-determinism and sequence points in C. We prove soundness of our axiomatic semantics with respect to our operational semantics. This proof has been fully formalized using the Coq proof assistant.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Anderson:2014:NSF, author = "Carolyn Jane Anderson and Nate Foster and Arjun Guha and Jean-Baptiste Jeannin and Dexter Kozen and Cole Schlesinger and David Walker", title = "{NetKAT}: semantic foundations for networks", journal = j-SIGPLAN, volume = "49", number = "1", pages = "113--126", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535862", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Recent years have seen growing interest in high-level languages for programming networks. But the design of these languages has been largely ad hoc, driven more by the needs of applications and the capabilities of network hardware than by foundational principles. The lack of a semantic foundation has left language designers with little guidance in determining how to incorporate new features, and programmers without a means to reason precisely about their code. This paper presents NetKAT, a new network programming language that is based on a solid mathematical foundation and comes equipped with a sound and complete equational theory. We describe the design of NetKAT, including primitives for filtering, modifying, and transmitting packets; union and sequential composition operators; and a Kleene star operator that iterates programs. We show that NetKAT is an instance of a canonical and well-studied mathematical structure called a Kleene algebra with tests (KAT) and prove that its equational theory is sound and complete with respect to its denotational semantics. Finally, we present practical applications of the equational theory including syntactic techniques for checking reachability, proving non-interference properties that ensure isolation between programs, and establishing the correctness of compilation algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sharma:2014:BVT, author = "Rahul Sharma and Aditya V. Nori and Alex Aiken", title = "Bias-variance tradeoffs in program analysis", journal = j-SIGPLAN, volume = "49", number = "1", pages = "127--137", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535853", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "It is often the case that increasing the precision of a program analysis leads to worse results. It is our thesis that this phenomenon is the result of fundamental limits on the ability to use precise abstract domains as the basis for inferring strong invariants of programs. We show that bias-variance tradeoffs, an idea from learning theory, can be used to explain why more precise abstractions do not necessarily lead to better results and also provides practical techniques for coping with such limitations. Learning theory captures precision using a combinatorial quantity called the VC dimension. We compute the VC dimension for different abstractions and report on its usefulness as a precision metric for program analyses. We evaluate cross validation, a technique for addressing bias-variance tradeoffs, on an industrial strength program verification tool called YOGI. The tool produced using cross validation has significantly better running time, finds new defects, and has fewer time-outs than the current production version. Finally, we make some recommendations for tackling bias-variance tradeoffs in program analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DSilva:2014:AS, author = "Vijay D'Silva and Leopold Haller and Daniel Kroening", title = "Abstract satisfaction", journal = j-SIGPLAN, volume = "49", number = "1", pages = "139--150", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535868", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "This article introduces an abstract interpretation framework that codifies the operations in SAT and SMT solvers in terms of lattices, transformers and fixed points. We develop the idea that a formula denotes a set of models in a universe of structures. This set of models has characterizations as fixed points of deduction, abduction and quantification transformers. A wide range of satisfiability procedures can be understood as computing and refining approximations of such fixed points. These include procedures in the DPLL family, those for preprocessing and inprocessing in SAT solvers, decision procedures for equality logics, weak arithmetics, and procedures for approximate quantification. Our framework provides a unified, mathematical basis for studying and combining program analysis and satisfiability procedures. A practical benefit of our work is a new, logic-agnostic architecture for implementing solvers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Farzan:2014:PC, author = "Azadeh Farzan and Zachary Kincaid and Andreas Podelski", title = "Proofs that count", journal = j-SIGPLAN, volume = "49", number = "1", pages = "151--164", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535885", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Counting arguments are among the most basic proof methods in mathematics. Within the field of formal verification, they are useful for reasoning about programs with infinite control, such as programs with an unbounded number of threads, or (concurrent) programs with recursive procedures. While counting arguments are common in informal, hand-written proofs of such programs, there are no fully automated techniques to construct counting arguments. The key questions involved in automating counting arguments are: how to decide what should be counted?, and how to decide when a counting argument is valid? In this paper, we present a technique for automatically constructing and checking counting arguments, which includes novel solutions to these questions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{deAmorim:2014:VIF, author = "Arthur Azevedo de Amorim and Nathan Collins and Andr{\'e} DeHon and Delphine Demange and Catalin Hritcu and David Pichardie and Benjamin C. Pierce and Randy Pollack and Andrew Tolmach", title = "A verified information-flow architecture", journal = j-SIGPLAN, volume = "49", number = "1", pages = "165--178", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535839", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "SAFE is a clean-slate design for a highly secure computer system, with pervasive mechanisms for tracking and limiting information flows. At the lowest level, the SAFE hardware supports fine-grained programmable tags, with efficient and flexible propagation and combination of tags as instructions are executed. The operating system virtualizes these generic facilities to present an information-flow abstract machine that allows user programs to label sensitive data with rich confidentiality policies. We present a formal, machine-checked model of the key hardware and software mechanisms used to control information flow in SAFE and an end-to-end proof of noninterference for this model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kumar:2014:CVI, author = "Ramana Kumar and Magnus O. Myreen and Michael Norrish and Scott Owens", title = "{CakeML}: a verified implementation of {ML}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "179--191", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535841", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We have developed and mechanically verified an ML system called CakeML, which supports a substantial subset of Standard ML. CakeML is implemented as an interactive read-eval-print loop (REPL) in x86-64 machine code. Our correctness theorem ensures that this REPL implementation prints only those results permitted by the semantics of CakeML. Our verification effort touches on a breadth of topics including lexing, parsing, type checking, incremental and dynamic compilation, garbage collection, arbitrary-precision arithmetic, and compiler bootstrapping. Our contributions are twofold. The first is simply in building a system that is end-to-end verified, demonstrating that each piece of such a verification effort can in practice be composed with the others, and ensuring that none of the pieces rely on any over-simplifying assumptions. The second is developing novel approaches to some of the more challenging aspects of the verification. In particular, our formally verified compiler can bootstrap itself: we apply the verified compiler to itself to produce a verified machine-code implementation of the compiler. Additionally, our compiler proof handles diverging input programs with a lightweight approach based on logical timeout exceptions. The entire development was carried out in the HOL4 theorem prover.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Barthe:2014:PRV, author = "Gilles Barthe and C{\'e}dric Fournet and Benjamin Gr{\'e}goire and Pierre-Yves Strub and Nikhil Swamy and Santiago Zanella-B{\'e}guelin", title = "Probabilistic relational verification for cryptographic implementations", journal = j-SIGPLAN, volume = "49", number = "1", pages = "193--205", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535847", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Relational program logics have been used for mechanizing formal proofs of various cryptographic constructions. With an eye towards scaling these successes towards end-to-end security proofs for implementations of distributed systems, we present RF*, a relational extension of F*, a general-purpose higher-order stateful programming language with a verification system based on refinement types. The distinguishing feature of F* is a relational Hoare logic for a higher-order, stateful, probabilistic language. Through careful language design, we adapt the F* typechecker to generate both classic and relational verification conditions, and to automatically discharge their proofs using an SMT solver. Thus, we are able to benefit from the existing features of F*, including its abstraction facilities for modular reasoning about program fragments. We evaluate RF* experimentally by programming a series of cryptographic constructions and protocols, and by verifying their security properties, ranging from information flow to unlinkability, integrity, and privacy. Moreover, we validate the design of RF* by formalizing in Coq a core probabilistic \lambda calculus and a relational refinement type system and proving the soundness of the latter against a denotational semantics of the probabilistic lambda \lambda calculus.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chaudhuri:2014:BBQ, author = "Swarat Chaudhuri and Martin Clochard and Armando Solar-Lezama", title = "Bridging boolean and quantitative synthesis using smoothed proof search", journal = j-SIGPLAN, volume = "49", number = "1", pages = "207--220", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535859", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We present a new technique for parameter synthesis under boolean and quantitative objectives. The input to the technique is a ``sketch'' --- a program with missing numerical parameters --- and a probabilistic assumption about the program's inputs. The goal is to automatically synthesize values for the parameters such that the resulting program satisfies: (1) a {boolean specification}, which states that the program must meet certain assertions, and (2) a {quantitative specification}, which assigns a real valued rating to every program and which the synthesizer is expected to optimize. Our method --- called smoothed proof search --- reduces this task to a sequence of unconstrained smooth optimization problems that are then solved numerically. By iteratively solving these problems, we obtain parameter values that get closer and closer to meeting the boolean specification; at the limit, we obtain values that provably meet the specification. The approximations are computed using a new notion of smoothing for program abstractions, where an abstract transformer is approximated by a function that is continuous according to a metric over abstract states. We present a prototype implementation of our synthesis procedure, and experimental results on two benchmarks from the embedded control domain. The experiments demonstrate the benefits of smoothed proof search over an approach that does not meet the boolean and quantitative synthesis goals simultaneously.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Beyene:2014:CBA, author = "Tewodros Beyene and Swarat Chaudhuri and Corneliu Popeea and Andrey Rybalchenko", title = "A constraint-based approach to solving games on infinite graphs", journal = j-SIGPLAN, volume = "49", number = "1", pages = "221--233", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535860", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We present a constraint-based approach to computing winning strategies in two-player graph games over the state space of infinite-state programs. Such games have numerous applications in program verification and synthesis, including the synthesis of infinite-state reactive programs and branching-time verification of infinite-state programs. Our method handles games with winning conditions given by safety, reachability, and general Linear Temporal Logic (LTL) properties. For each property class, we give a deductive proof rule that --- provided a symbolic representation of the game players --- describes a winning strategy for a particular player. Our rules are sound and relatively complete. We show that these rules can be automated by using an off-the-shelf Horn constraint solver that supports existential quantification in clause heads. The practical promise of the rules is demonstrated through several case studies, including a challenging ``Cinderella-Stepmother game'' that allows infinite alternation of discrete and continuous choices by two players, as well as examples derived from prior work on program repair and synthesis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Darulova:2014:SCR, author = "Eva Darulova and Viktor Kuncak", title = "Sound compilation of reals", journal = j-SIGPLAN, volume = "49", number = "1", pages = "235--248", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535874", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Writing accurate numerical software is hard because of many sources of unavoidable uncertainties, including finite numerical precision of implementations. We present a programming model where the user writes a program in a real-valued implementation and specification language that explicitly includes different types of uncertainties. We then present a compilation algorithm that generates a finite-precision implementation that is guaranteed to meet the desired precision with respect to real numbers. Our compilation performs a number of verification steps for different candidate precisions. It generates verification conditions that treat all sources of uncertainties in a unified way and encode reasoning about finite-precision roundoff errors into reasoning about real numbers. Such verification conditions can be used as a standardized format for verifying the precision and the correctness of numerical programs. Due to their non-linear nature, precise reasoning about these verification conditions remains difficult and cannot be handled using state-of-the art SMT solvers alone. We therefore propose a new procedure that combines exact SMT solving over reals with approximate and sound affine and interval arithmetic. We show that this approach overcomes scalability limitations of SMT solvers while providing improved precision over affine and interval arithmetic. Our implementation gives promising results on several numerical models, including dynamical systems, transcendental functions, and controller implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Huet:2014:YRD, author = "G{\'e}rard Huet and Hugo Herbelin", title = "30 years of research and development around {Coq}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "249--249", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2537848", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Brookes:2014:ER, author = "Stephen Brookes and Peter W. O'Hearn and Uday Reddy", title = "The essence of {Reynolds}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "251--255", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2537851", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "John Reynolds (1935-2013) was a pioneer of programming languages research. In this paper we pay tribute to the man, his ideas, and his influence.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kuper:2014:FAW, author = "Lindsey Kuper and Aaron Turon and Neelakantan R. Krishnaswami and Ryan R. Newton", title = "Freeze after writing: quasi-deterministic parallel programming with {LVars}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "257--270", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535842", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Deterministic-by-construction parallel programming models offer the advantages of parallel speedup while avoiding the nondeterministic, hard-to-reproduce bugs that plague fully concurrent code. A principled approach to deterministic-by-construction parallel programming with shared state is offered by LVars: shared memory locations whose semantics are defined in terms of an application-specific lattice. Writes to an LVar take the least upper bound of the old and new values with respect to the lattice, while reads from an LVar can observe only that its contents have crossed a specified threshold in the lattice. Although it guarantees determinism, this interface is quite limited. We extend LVars in two ways. First, we add the ability to ``freeze'' and then read the contents of an LVar directly. Second, we add the ability to attach event handlers to an LVar, triggering a callback when the LVar's value changes. Together, handlers and freezing enable an expressive and useful style of parallel programming. We prove that in a language where communication takes place through these extended LVars, programs are at worst quasi-deterministic: on every run, they either produce the same answer or raise an error. We demonstrate the viability of our approach by implementing a library for Haskell supporting a variety of LVar-based data structures, together with a case study that illustrates the programming model and yields promising parallel speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Burckhardt:2014:RDT, author = "Sebastian Burckhardt and Alexey Gotsman and Hongseok Yang and Marek Zawirski", title = "Replicated data types: specification, verification, optimality", journal = j-SIGPLAN, volume = "49", number = "1", pages = "271--284", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535848", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Geographically distributed systems often rely on replicated eventually consistent data stores to achieve availability and performance. To resolve conflicting updates at different replicas, researchers and practitioners have proposed specialized consistency protocols, called replicated data types, that implement objects such as registers, counters, sets or lists. Reasoning about replicated data types has however not been on par with comparable work on abstract data types and concurrent data types, lacking specifications, correctness proofs, and optimality results. To fill in this gap, we propose a framework for specifying replicated data types using relations over events and verifying their implementations using replication-aware simulations. We apply it to 7 existing implementations of 4 data types with nontrivial conflict-resolution strategies and optimizations (last-writer-wins register, counter, multi-value register and observed-remove set). We also present a novel technique for obtaining lower bounds on the worst-case space overhead of data type implementations and use it to prove optimality of 4 implementations. Finally, we show how to specify consistency of replicated stores with multiple objects axiomatically, in analogy to prior work on weak memory models. Overall, our work provides foundational reasoning tools to support research on replicated eventually consistent stores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bouajjani:2014:VEC, author = "Ahmed Bouajjani and Constantin Enea and Jad Hamza", title = "Verifying eventual consistency of optimistic replication systems", journal = j-SIGPLAN, volume = "49", number = "1", pages = "285--296", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535877", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We address the verification problem of eventual consistency of optimistic replication systems. Such systems are typically used to implement distributed data structures over large scale networks. We introduce a formal definition of eventual consistency that applies to a wide class of existing implementations, including the ones using speculative executions. Then, we reduce the problem of checking eventual consistency to reachability and model checking problems. This reduction enables the use of existing verification tools for message-passing programs in the context of verifying optimistic replication systems. Furthermore, we derive from these reductions decision procedures for checking eventual consistency of systems implemented as finite-state programs communicating through unbounded unordered channels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DalLago:2014:CEH, author = "Ugo {Dal Lago} and Davide Sangiorgi and Michele Alberti", title = "On coinductive equivalences for higher-order probabilistic functional programs", journal = j-SIGPLAN, volume = "49", number = "1", pages = "297--308", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535872", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We study bisimulation and context equivalence in a probabilistic lambda-calculus. The contributions of this paper are threefold. Firstly we show a technique for proving congruence of probabilistic applicative bisimilarity. While the technique follows Howe's method, some of the technicalities are quite different, relying on non-trivial ``disentangling'' properties for sets of real numbers. Secondly we show that, while bisimilarity is in general strictly finer than context equivalence, coincidence between the two relations is attained on pure lambda-terms. The resulting equality is that induced by Levy--Longo trees, generally accepted as the finest extensional equivalence on pure lambda-terms under a lazy regime. Finally, we derive a coinductive characterisation of context equivalence on the whole probabilistic language, via an extension in which terms akin to distributions may appear in redex position. Another motivation for the extension is that its operational semantics allows us to experiment with a different congruence technique, namely that of logical bisimilarity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ehrhard:2014:PCS, author = "Thomas Ehrhard and Christine Tasson and Michele Pagani", title = "Probabilistic coherence spaces are fully abstract for probabilistic {PCF}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "309--320", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535865", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Probabilistic coherence spaces (PCoh) yield a semantics of higher-order probabilistic computation, interpreting types as convex sets and programs as power series. We prove that the equality of interpretations in Pcoh characterizes the operational indistinguishability of programs in PCF with a random primitive. This is the first result of full abstraction for a semantics of probabilistic PCF. The key ingredient relies on the regularity of power series. Along the way to the theorem, we design a weighted intersection type assignment system giving a logical presentation of PCoh.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gordon:2014:TSD, author = "Andrew D. Gordon and Thore Graepel and Nicolas Rolland and Claudio Russo and Johannes Borgstrom and John Guiver", title = "{Tabular}: a schema-driven probabilistic programming language", journal = j-SIGPLAN, volume = "49", number = "1", pages = "321--334", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535850", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We propose a new kind of probabilistic programming language for machine learning. We write programs simply by annotating existing relational schemas with probabilistic model expressions. We describe a detailed design of our language, Tabular, complete with formal semantics and type system. A rich series of examples illustrates the expressiveness of Tabular. We report an implementation, and show evidence of the succinctness of our notation relative to current best practice. Finally, we describe and verify a transformation of Tabular schemas so as to predict missing values in a concrete database. The ability to query for missing values provides a uniform interface to a wide variety of tasks, including classification, clustering, recommendation, and ranking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sergey:2014:MHO, author = "Ilya Sergey and Dimitrios Vytiniotis and Simon Peyton Jones", title = "Modular, higher-order cardinality analysis in theory and practice", journal = j-SIGPLAN, volume = "49", number = "1", pages = "335--347", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535861", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Since the mid '80s, compiler writers for functional languages (especially lazy ones) have been writing papers about identifying and exploiting thunks and lambdas that are used only once. However it has proved difficult to achieve both power and simplicity in practice. We describe a new, modular analysis for a higher-order language, which is both simple and effective, and present measurements of its use in a full-scale, state of the art optimising compiler. The analysis finds many single-entry thunks and one-shot lambdas and enables a number of program optimisations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chang:2014:PL, author = "Stephen Chang and Matthias Felleisen", title = "Profiling for laziness", journal = j-SIGPLAN, volume = "49", number = "1", pages = "349--360", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535887", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "While many programmers appreciate the benefits of lazy programming at an abstract level, determining which parts of a concrete program to evaluate lazily poses a significant challenge for most of them. Over the past thirty years, experts have published numerous papers on the problem, but developing this level of expertise requires a significant amount of experience. We present a profiling-based technique that captures and automates this expertise for the insertion of laziness annotations into strict programs. To make this idea precise, we show how to equip a formal semantics with a metric that measures waste in an evaluation. Then we explain how to implement this metric as a dynamic profiling tool that suggests where to insert laziness into a program. Finally, we present evidence that our profiler's suggestions either match or improve on an expert's use of laziness in a range of real-world applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cave:2014:FRP, author = "Andrew Cave and Francisco Ferreira and Prakash Panangaden and Brigitte Pientka", title = "Fair reactive programming", journal = j-SIGPLAN, volume = "49", number = "1", pages = "361--372", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535881", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Functional Reactive Programming (FRP) models reactive systems with events and signals, which have previously been observed to correspond to the ``eventually'' and ``always'' modalities of linear temporal logic (LTL). In this paper, we define a constructive variant of LTL with least fixed point and greatest fixed point operators in the spirit of the modal mu-calculus, and give it a proofs-as-programs interpretation as a foundational calculus for reactive programs. Previous work emphasized the propositions-as-types part of the correspondence between LTL and FRP; here we emphasize the proofs-as-programs part by employing structural proof theory. We show that the type system is expressive enough to enforce liveness properties such as the fairness of schedulers and the eventual delivery of results. We illustrate programming in this calculus using (co)iteration operators. We prove type preservation of our operational semantics, which guarantees that our programs are causal. We give also a proof of strong normalization which provides justification that our programs are productive and that they satisfy liveness properties derived from their types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Abdulla:2014:ODP, author = "Parosh Abdulla and Stavros Aronis and Bengt Jonsson and Konstantinos Sagonas", title = "Optimal dynamic partial order reduction", journal = j-SIGPLAN, volume = "49", number = "1", pages = "373--384", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535845", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Stateless model checking is a powerful technique for program verification, which however suffers from an exponential growth in the number of explored executions. A successful technique for reducing this number, while still maintaining complete coverage, is Dynamic Partial Order Reduction (DPOR). We present a new DPOR algorithm, which is the first to be provably optimal in that it always explores the minimal number of executions. It is based on a novel class of sets, called source sets, which replace the role of persistent sets in previous algorithms. First, we show how to modify an existing DPOR algorithm to work with source sets, resulting in an efficient and simple to implement algorithm. Second, we extend this algorithm with a novel mechanism, called wakeup trees, that allows to achieve optimality. We have implemented both algorithms in a stateless model checking tool for Erlang programs. Experiments show that source sets significantly increase the performance and that wakeup trees incur only a small overhead in both time and space.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Itzhaky:2014:MRA, author = "Shachar Itzhaky and Anindya Banerjee and Neil Immerman and Ori Lahav and Aleksandar Nanevski and Mooly Sagiv", title = "Modular reasoning about heap paths via effectively propositional formulas", journal = j-SIGPLAN, volume = "49", number = "1", pages = "385--396", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535854", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "First order logic with transitive closure, and separation logic enable elegant interactive verification of heap-manipulating programs. However, undecidabilty results and high asymptotic complexity of checking validity preclude complete automatic verification of such programs, even when loop invariants and procedure contracts are specified as formulas in these logics. This paper tackles the problem of procedure-modular verification of reachability properties of heap-manipulating programs using efficient decision procedures that are complete: that is, a SAT solver must generate a counterexample whenever a program does not satisfy its specification. By (a) requiring each procedure modifies a fixed set of heap partitions and creates a bounded amount of heap sharing, and (b) restricting program contracts and loop invariants to use only deterministic paths in the heap, we show that heap reachability updates can be described in a simple manner. The restrictions force program specifications and verification conditions to lie within a fragment of first-order logic with transitive closure that is reducible to effectively propositional logic, and hence facilitate sound, complete and efficient verification. We implemented a tool atop Z3 and report on preliminary experiments that establish the correctness of several programs that manipulate linked data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chong:2014:SCA, author = "Nathan Chong and Alastair F. Donaldson and Jeroen Ketema", title = "A sound and complete abstraction for reasoning about parallel prefix sums", journal = j-SIGPLAN, volume = "49", number = "1", pages = "397--409", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535882", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Prefix sums are key building blocks in the implementation of many concurrent software applications, and recently much work has gone into efficiently implementing prefix sums to run on massively parallel graphics processing units (GPUs). Because they lie at the heart of many GPU-accelerated applications, the correctness of prefix sum implementations is of prime importance. We introduce a novel abstraction, the interval of summations, that allows scalable reasoning about implementations of prefix sums. We present this abstraction as a monoid, and prove a soundness and completeness result showing that a generic sequential prefix sum implementation is correct for an array of length $n$ if and only if it computes the correct result for a specific test case when instantiated with the interval of summations monoid. This allows correctness to be established by running a single test where the input and result require O(n lg(n)) space. This improves upon an existing result by Sheeran where the input requires O(n lg(n)) space and the result O(n$^2$ \lg(n)) space, and is more feasible for large n than a method by Voigtlaender that uses O(n) space for the input and result but requires running O(n$^2$ ) tests. We then extend our abstraction and results to the context of data-parallel programs, developing an automated verification method for GPU implementations of prefix sums. Our method uses static verification to prove that a generic prefix sum implementation is data race-free, after which functional correctness of the implementation can be determined by running a single test case under the interval of summations abstraction. We present an experimental evaluation using four different prefix sum algorithms, showing that our method is highly automatic, scales to large thread counts, and significantly outperforms Voigtlaender's method when applied to large arrays.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Miller:2014:ADS, author = "Andrew Miller and Michael Hicks and Jonathan Katz and Elaine Shi", title = "Authenticated data structures, generically", journal = j-SIGPLAN, volume = "49", number = "1", pages = "411--423", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535851", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "An authenticated data structure (ADS) is a data structure whose operations can be carried out by an untrusted prover, the results of which a verifier can efficiently check as authentic. This is done by having the prover produce a compact proof that the verifier can check along with each operation's result. ADSs thus support outsourcing data maintenance and processing tasks to untrusted servers without loss of integrity. Past work on ADSs has focused on particular data structures (or limited classes of data structures), one at a time, often with support only for particular operations. This paper presents a generic method, using a simple extension to a ML-like functional programming language we call \lambda o (lambda-auth), with which one can program authenticated operations over any data structure defined by standard type constructors, including recursive types, sums, and products. The programmer writes the data structure largely as usual and it is compiled to code to be run by the prover and verifier. Using a formalization of \lambda o we prove that all well-typed \lambda o programs result in code that is secure under the standard cryptographic assumption of collision-resistant hash functions. We have implemented \lambda o as an extension to the OCaml compiler, and have used it to produce authenticated versions of many interesting data structures including binary search trees, red-black+ trees, skip lists, and more. Performance experiments show that our approach is efficient, giving up little compared to the hand-optimized data structures developed previously.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Swamy:2014:GTE, author = "Nikhil Swamy and Cedric Fournet and Aseem Rastogi and Karthikeyan Bhargavan and Juan Chen and Pierre-Yves Strub and Gavin Bierman", title = "Gradual typing embedded securely in {JavaScript}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "425--437", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535889", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "JavaScript's flexible semantics makes writing correct code hard and writing secure code extremely difficult. To address the former problem, various forms of gradual typing have been proposed, such as Closure and TypeScript. However, supporting all common programming idioms is not easy; for example, TypeScript deliberately gives up type soundness for programming convenience. In this paper, we propose a gradual type system and implementation techniques that provide important safety and security guarantees. We present TS\# , a gradual type system and source-to-source compiler for JavaScript. In contrast to prior gradual type systems, TS\# features full runtime reflection over three kinds of types: (1) simple types for higher-order functions, recursive datatypes and dictionary-based extensible records; (2) the type any, for dynamically type-safe TS\# expressions; and (3) the type un, for untrusted, potentially malicious JavaScript contexts in which TS\# is embedded. After type-checking, the compiler instruments the program with various checks to ensure the type safety of TS\# despite its interactions with arbitrary JavaScript contexts, which are free to use eval, stack walks, prototype customizations, and other offensive features. The proof of our main theorem employs a form of type-preserving compilation, wherein we prove all the runtime invariants of the translation of TS\# to JavaScript by showing that translated programs are well-typed in JS\# , a previously proposed dependently typed language for proving functional correctness of JavaScript programs. We describe a prototype compiler, a secure runtime, and sample applications for TS\#. Our examples illustrate how web security patterns that developers currently program in JavaScript (with much difficulty and still with dubious results) can instead be programmed naturally in TS\#, retaining a flavor of idiomatic JavaScript, while providing strong safety guarantees by virtue of typing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Long:2014:SIF, author = "Fan Long and Stelios Sidiroglou-Douskos and Deokhwan Kim and Martin Rinard", title = "Sound input filter generation for integer overflow errors", journal = j-SIGPLAN, volume = "49", number = "1", pages = "439--452", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535888", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We present a system, SIFT, for generating input filters that nullify integer overflow errors associated with critical program sites such as memory allocation or block copy sites. SIFT uses a static program analysis to generate filters that discard inputs that may trigger integer overflow errors in the computations of the sizes of allocated memory blocks or the number of copied bytes in block copy operations. Unlike all previous techniques of which we are aware, SIFT is sound --- if an input passes the filter, it will not trigger an integer overflow error at any analyzed site. Our results show that SIFT successfully analyzes (and therefore generates sound input filters for) 56 out of 58 memory allocation and block memory copy sites in analyzed input processing modules from five applications (VLC, Dillo, Swfdec, Swftools, and GIMP). These nullified errors include six known integer overflow vulnerabilities. Our results also show that applying these filters to 62895 real-world inputs produces no false positives. The analysis and filter generation times are all less than a second.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Brotherston:2014:PCS, author = "James Brotherston and Jules Villard", title = "Parametric completeness for separation theories", journal = j-SIGPLAN, volume = "49", number = "1", pages = "453--464", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535844", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "In this paper, we close the logical gap between provability in the logic BBI, which is the propositional basis for separation logic, and validity in an intended class of separation models, as employed in applications of separation logic such as program verification. An intended class of separation models is usually specified by a collection of axioms describing the specific model properties that are expected to hold, which we call a separation theory. Our main contributions are as follows. First, we show that several typical properties of separation theories are not definable in BBI. Second, we show that these properties become definable in a suitable hybrid extension of BBI, obtained by adding a theory of naming to BBI in the same way that hybrid logic extends normal modal logic. The binder-free extension captures most of the properties we consider, and the full extension HyBBI(V) with the usual V binder of hybrid logic covers all these properties. Third, we present an axiomatic proof system for our hybrid logic whose extension with any set of ``pure'' axioms is sound and complete with respect to the models satisfying those axioms. As a corollary of this general result, we obtain, in a parametric manner, a sound and complete axiomatic proof system for any separation theory from our considered class. To the best of our knowledge, this class includes all separation theories appearing in the published literature.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hou:2014:PSP, author = "Zh{\'e} H{\'o}u and Ranald Clouston and Rajeev Gor{\'e} and Alwen Tiu", title = "Proof search for propositional abstract separation logics via labelled sequents", journal = j-SIGPLAN, volume = "49", number = "1", pages = "465--476", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535864", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Abstract separation logics are a family of extensions of Hoare logic for reasoning about programs that mutate memory. These logics are ``abstract'' because they are independent of any particular concrete memory model. Their assertion languages, called propositional abstract separation logics, extend the logic of (Boolean) Bunched Implications (BBI) in various ways. We develop a modular proof theory for various propositional abstract separation logics using cut-free labelled sequent calculi. We first extend the cut-fee labelled sequent calculus for BBI of Hou et al to handle Calcagno et al's original logic of separation algebras by adding sound rules for partial-determinism and cancellativity, while preserving cut-elimination. We prove the completeness of our calculus via a sound intermediate calculus that enables us to construct counter-models from the failure to find a proof. We then capture other propositional abstract separation logics by adding sound rules for indivisible unit and disjointness, while maintaining completeness and cut-elimination. We present a theorem prover based on our labelled calculus for these logics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lee:2014:PSS, author = "Wonyeol Lee and Sungwoo Park", title = "A proof system for separation logic with magic wand", journal = j-SIGPLAN, volume = "49", number = "1", pages = "477--490", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535871", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Separation logic is an extension of Hoare logic which is acknowledged as an enabling technology for large-scale program verification. It features two new logical connectives, separating conjunction and separating implication, but most of the applications of separation logic have exploited only separating conjunction without considering separating implication. Nevertheless the power of separating implication has been well recognized and there is a growing interest in its use for program verification. This paper develops a proof system for full separation logic which supports not only separating conjunction but also separating implication. The proof system is developed in the style of sequent calculus and satisfies the admissibility of cut. The key challenge in the development is to devise a set of inference rules for manipulating heap structures that ensure the completeness of the proof system with respect to separation logic. We show that our proof of completeness directly translates to a proof search strategy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Atkey:2014:PCL, author = "Robert Atkey", title = "From parametricity to conservation laws, via {Noether}'s theorem", journal = j-SIGPLAN, volume = "49", number = "1", pages = "491--502", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535867", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Invariance is of paramount importance in programming languages and in physics. In programming languages, John Reynolds' theory of relational parametricity demonstrates that parametric polymorphic programs are invariant under change of data representation, a property that yields ``free'' theorems about programs just from their types. In physics, Emmy Noether showed that if the action of a physical system is invariant under change of coordinates, then the physical system has a conserved quantity: a quantity that remains constant for all time. Knowledge of conserved quantities can reveal deep properties of physical systems. For example, the conservation of energy is by Noether's theorem a consequence of a system's invariance under time-shifting. In this paper, we link Reynolds' relational parametricity with Noether's theorem for deriving conserved quantities. We propose an extension of System F$ \omega $ with new kinds, types and term constants for writing programs that describe classical mechanical systems in terms of their Lagrangians. We show, by constructing a relationally parametric model of our extension of F$ \omega $, that relational parametricity is enough to satisfy the hypotheses of Noether's theorem, and so to derive conserved quantities for free, directly from the polymorphic types of Lagrangians expressed in our system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Atkey:2014:RPM, author = "Robert Atkey and Neil Ghani and Patricia Johann", title = "A relationally parametric model of dependent type theory", journal = j-SIGPLAN, volume = "49", number = "1", pages = "503--515", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535852", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Reynolds' theory of relational parametricity captures the invariance of polymorphically typed programs under change of data representation. Reynolds' original work exploited the typing discipline of the polymorphically typed lambda-calculus System F, but there is now considerable interest in extending relational parametricity to type systems that are richer and more expressive than that of System F. This paper constructs parametric models of predicative and impredicative dependent type theory. The significance of our models is twofold. Firstly, in the impredicative variant we are able to deduce the existence of initial algebras for all indexed= functors. To our knowledge, ours is the first account of parametricity for dependent types that is able to lift the useful deduction of the existence of initial algebras in parametric models of System F to the dependently typed setting. Secondly, our models offer conceptual clarity by uniformly expressing relational parametricity for dependent types in terms of reflexive graphs, which allows us to unify the interpretations of types and kinds, instead of taking the relational interpretation of types as a primitive notion. Expressing our model in terms of reflexive graphs ensures that it has canonical choices for the interpretations of the standard type constructors of dependent type theory, except for the interpretation of the universe of small types, where we formulate a refined interpretation tailored for relational parametricity. Moreover, our reflexive graph model opens the door to generalisations of relational parametricity, for example to higher-dimensional relational parametricity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Murawski:2014:GSI, author = "Andrzej S. Murawski and Nikos Tzevelekos", title = "Game semantics for interface middleweight {Java}", journal = j-SIGPLAN, volume = "49", number = "1", pages = "517--528", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535880", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We consider an object calculus in which open terms interact with the environment through interfaces. The calculus is intended to capture the essence of contextual interactions of Middleweight Java code. Using game semantics, we provide fully abstract models for the induced notions of contextual approximation and equivalence. These are the first denotational models of this kind.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Jeannet:2014:AAG, author = "Bertrand Jeannet and Peter Schrammel and Sriram Sankaranarayanan", title = "Abstract acceleration of general linear loops", journal = j-SIGPLAN, volume = "49", number = "1", pages = "529--540", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535843", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We present abstract acceleration techniques for computing loop invariants for numerical programs with linear assignments and conditionals. Whereas abstract interpretation techniques typically over-approximate the set of reachable states iteratively, abstract acceleration captures the effect of the loop with a single, non-iterative transfer function applied to the initial states at the loop head. In contrast to previous acceleration techniques, our approach applies to any linear loop without restrictions. Its novelty lies in the use of the Jordan normal form decomposition of the loop body to derive symbolic expressions for the entries of the matrix modeling the effect of $\eta \geq \Omicron$ iterations of the loop. The entries of such a matrix depend on \eta through complex polynomial, exponential and trigonometric functions. Therefore, we introduces an abstract domain for matrices that captures the linear inequality relations between these complex expressions. This results in an abstract matrix for describing the fixpoint semantics of the loop. Our approach integrates smoothly into standard abstract interpreters and can handle programs with nested loops and loops containing conditional branches. We evaluate it over small but complex loops that are commonly found in control software, comparing it with other tools for computing linear loop invariants. The loops in our benchmarks typically exhibit polynomial, exponential and oscillatory behaviors that present challenges to existing approaches. Our approach finds non-trivial invariants to prove useful bounds on the values of variables for such loops, clearly outperforming the existing approaches in terms of precision while exhibiting good performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{DAntoni:2014:MSA, author = "Loris D'Antoni and Margus Veanes", title = "Minimization of symbolic automata", journal = j-SIGPLAN, volume = "49", number = "1", pages = "541--553", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535849", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Symbolic Automata extend classical automata by using symbolic alphabets instead of finite ones. Most of the classical automata algorithms rely on the alphabet being finite, and generalizing them to the symbolic setting is not a trivial task. In this paper we study the problem of minimizing symbolic automata. We formally define and prove the basic properties of minimality in the symbolic setting, and lift classical minimization algorithms (Huffman-Moore's and Hopcroft's algorithms) to symbolic automata. While Hopcroft's algorithm is the fastest known algorithm for DFA minimization, we show how, in the presence of symbolic alphabets, it can incur an exponential blowup. To address this issue, we introduce a new algorithm that fully benefits from the symbolic representation of the alphabet and does not suffer from the exponential blowup. We provide comprehensive performance evaluation of all the algorithms over large benchmarks and against existing state-of-the-art implementations. The experiments show how the new symbolic algorithm is faster than previous implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chaudhuri:2014:CAD, author = "Swarat Chaudhuri and Azadeh Farzan and Zachary Kincaid", title = "Consistency analysis of decision-making programs", journal = j-SIGPLAN, volume = "49", number = "1", pages = "555--567", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535858", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Applications in many areas of computing make discrete decisions under uncertainty, for reasons such as limited numerical precision in calculations and errors in sensor-derived inputs. As a result, individual decisions made by such programs may be nondeterministic, and lead to contradictory decisions at different points of an execution. This means that an otherwise correct program may execute along paths, that it would not follow under its ideal semantics, violating essential program invariants on the way. A program is said to be consistent if it does not suffer from this problem despite uncertainty in decisions. In this paper, we present a sound, automatic program analysis for verifying that a program is consistent in this sense. Our analysis proves that each decision made along a program execution is consistent with the decisions made earlier in the execution. The proof is done by generating an invariant that abstracts the set of all decisions made along executions that end at a program location l, then verifying, using a fixpoint constraint-solver, that no contradiction can be derived when these decisions are combined with new decisions made at l. We evaluate our analysis on a collection of programs implementing algorithms in computational geometry. Consistency is known to be a critical, frequently-violated, and thoroughly studied correctness property in geometry, but ours is the first attempt at automated verification of consistency of geometric algorithms. Our benchmark suite consists of implementations of convex hull computation, triangulation, and point location algorithms. On almost all examples that are not consistent (with two exceptions), our analysis is able to verify consistency within a few minutes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2014:TGD, author = "Danfeng Zhang and Andrew C. Myers", title = "Toward general diagnosis of static errors", journal = j-SIGPLAN, volume = "49", number = "1", pages = "569--581", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535870", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We introduce a general way to locate programmer mistakes that are detected by static analyses such as type checking. The program analysis is expressed in a constraint language in which mistakes result in unsatisfiable constraints. Given an unsatisfiable system of constraints, both satisfiable and unsatisfiable constraints are analyzed, to identify the program expressions most likely to be the cause of unsatisfiability. The likelihood of different error explanations is evaluated under the assumption that the programmer's code is mostly correct, so the simplest explanations are chosen, following Bayesian principles. For analyses that rely on programmer-stated assumptions, the diagnosis also identifies assumptions likely to have been omitted. The new error diagnosis approach has been implemented for two very different program analyses: type inference in OCaml and information flow checking in Jif. The effectiveness of the approach is evaluated using previously collected programs containing errors. The results show that when compared to existing compilers and other tools, the general technique identifies the location of programmer errors significantly more accurately.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2014:CFT, author = "Sheng Chen and Martin Erwig", title = "Counter-factual typing for debugging type errors", journal = j-SIGPLAN, volume = "49", number = "1", pages = "583--594", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535863", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Changing a program in response to a type error plays an important part in modern software development. However, the generation of good type error messages remains a problem for highly expressive type systems. Existing approaches often suffer from a lack of precision in locating errors and proposing remedies. Specifically, they either fail to locate the source of the type error consistently, or they report too many potential error locations. Moreover, the change suggestions offered are often incorrect. This makes the debugging process tedious and ineffective. We present an approach to the problem of type debugging that is based on generating and filtering a comprehensive set of type-change suggestions. Specifically, we generate all (program-structure-preserving) type changes that can possibly fix the type error. These suggestions will be ranked and presented to the programmer in an iterative fashion. In some cases we also produce suggestions to change the program. In most situations, this strategy delivers the correct change suggestions quickly, and at the same time never misses any rare suggestions. The computation of the potentially huge set of type-change suggestions is efficient since it is based on a variational type inference algorithm that type checks a program with variations only once, efficiently reusing type information for shared parts. We have evaluated our method and compared it with previous approaches. Based on a large set of examples drawn from the literature, we have found that our method outperforms other approaches and provides a viable alternative.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Boker:2014:BTS, author = "Udi Boker and Thomas A. Henzinger and Arjun Radhakrishna", title = "Battery transition systems", journal = j-SIGPLAN, volume = "49", number = "1", pages = "595--606", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535875", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "The analysis of the energy consumption of software is an important goal for quantitative formal methods. Current methods, using weighted transition systems or energy games, model the energy source as an ideal resource whose status is characterized by one number, namely the amount of remaining energy. Real batteries, however, exhibit behaviors that can deviate substantially from an ideal energy resource. Based on a discretization of a standard continuous battery model, we introduce {\em battery transition systems}. In this model, a battery is viewed as consisting of two parts --- the available-charge tank and the bound-charge tank. Any charge or discharge is applied to the available-charge tank. Over time, the energy from each tank diffuses to the other tank. Battery transition systems are infinite state systems that, being not well-structured, fall into no decidable class that is known to us. Nonetheless, we are able to prove that the $ \omega $-regular model-checking problem is decidable for battery transition systems. We also present a case study on the verification of control programs for energy-constrained semi-autonomous robots.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Li:2014:SOS, author = "Yi Li and Aws Albarghouthi and Zachary Kincaid and Arie Gurfinkel and Marsha Chechik", title = "Symbolic optimization with {SMT} solvers", journal = j-SIGPLAN, volume = "49", number = "1", pages = "607--618", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535857", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "The rise in efficiency of Satisfiability Modulo Theories (SMT) solvers has created numerous uses for them in software verification, program synthesis, functional programming, refinement types, etc. In all of these applications, SMT solvers are used for generating satisfying assignments (e.g., a witness for a bug) or proving unsatisfiability/validity(e.g., proving that a subtyping relation holds). We are often interested in finding not just an arbitrary satisfying assignment, but one that optimizes (minimizes/maximizes) certain criteria. For example, we might be interested in detecting program executions that maximize energy usage (performance bugs), or synthesizing short programs that do not make expensive API calls. Unfortunately, none of the available SMT solvers offer such optimization capabilities. In this paper, we present SYMBA, an efficient SMT-based optimization algorithm for objective functions in the theory of linear real arithmetic (LRA). Given a formula \phi and an objective function t, SYMBA finds a satisfying assignment of \phi that maximizes the value of t. SYMBA utilizes efficient SMT solvers as black boxes. As a result, it is easy to implement and it directly benefits from future advances in SMT solvers. Moreover, SYMBA can optimize a set of objective functions, reusing information between them to speed up the analysis. We have implemented SYMBA and evaluated it on a large number of optimization benchmarks drawn from program analysis tasks. Our results indicate the power and efficiency of SYMBA in comparison with competing approaches, and highlight the importance of its multi-objective-function feature.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Benton:2014:AEP, author = "Nick Benton and Martin Hofmann and Vivek Nigam", title = "Abstract effects and proof-relevant logical relations", journal = j-SIGPLAN, volume = "49", number = "1", pages = "619--631", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535869", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We give a denotational semantics for a region-based effect system that supports type abstraction in the sense that only externally visible effects need to be tracked: non-observable internal modifications, such as the reorganisation of a search tree or lazy initialisation, can count as 'pure' or 'read only'. This 'fictional purity' allows clients of a module to validate soundly more effect-based program equivalences than would be possible with previous semantics. Our semantics uses a novel variant of logical relations that maps types not merely to partial equivalence relations on values, as is commonly done, but rather to a proof-relevant generalisation thereof, namely setoids. The objects of a setoid establish that values inhabit semantic types, whilst its morphisms are understood as proofs of semantic equivalence. The transition to proof-relevance solves two awkward problems caused by na{\"\i}ve use of existential quantification in Kripke logical relations, namely failure of admissibility and spurious functional dependencies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Katsumata:2014:PEM, author = "Shin-ya Katsumata", title = "Parametric effect monads and semantics of effect systems", journal = j-SIGPLAN, volume = "49", number = "1", pages = "633--645", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535846", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "We study fundamental properties of a generalisation of monad called parametric effect monad, and apply it to the interpretation of general effect systems whose effects have sequential composition operators. We show that parametric effect monads admit analogues of the structures and concepts that exist for monads, such as Kleisli triples, the state monad and the continuation monad, Plotkin and Power's algebraic operations, and the categorical ++-lifting. We also show a systematic method to generate both effects and a parametric effect monad from a monad morphism. Finally, we introduce two effect systems with explicit and implicit subeffecting, and discuss their denotational semantics and the soundness of effect systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Pagani:2014:AQS, author = "Michele Pagani and Peter Selinger and Beno{\^\i}t Valiron", title = "Applying quantitative semantics to higher-order quantum computing", journal = j-SIGPLAN, volume = "49", number = "1", pages = "647--658", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535879", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Finding a denotational semantics for higher order quantum computation is a long-standing problem in the semantics of quantum programming languages. Most past approaches to this problem fell short in one way or another, either limiting the language to an unusably small finitary fragment, or giving up important features of quantum physics such as entanglement. In this paper, we propose a denotational semantics for a quantum lambda calculus with recursion and an infinite data type, using constructions from quantitative semantics of linear logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Accattoli:2014:NST, author = "Beniamino Accattoli and Eduardo Bonelli and Delia Kesner and Carlos Lombardi", title = "A nonstandard standardization theorem", journal = j-SIGPLAN, volume = "49", number = "1", pages = "659--670", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535886", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Standardization is a fundamental notion for connecting programming languages and rewriting calculi. Since both programming languages and calculi rely on substitution for defining their dynamics, explicit substitutions (ES) help further close the gap between theory and practice. This paper focuses on standardization for the linear substitution calculus, a calculus with ES capable of mimicking reduction in lambda-calculus and linear logic proof-nets. For the latter, proof-nets can be formalized by means of a simple equational theory over the linear substitution calculus. Contrary to other extant calculi with ES, our system can be equipped with a residual theory in the sense of L{\'e}vy, which is used to prove a left-to-right standardization theorem for the calculus with ES but without the equational theory. Such a theorem, however, does not lift from the calculus with ES to proof-nets, because the notion of left-to-right derivation is not preserved by the equational theory. We then relax the notion of left-to-right standard derivation, based on a total order on redexes, to a more liberal notion of standard derivation based on partial orders. Our proofs rely on Gonthier, L{\'e}vy, and Melli{\`e}s' axiomatic theory for standardization. However, we go beyond merely applying their framework, revisiting some of its key concepts: we obtain uniqueness (modulo) of standard derivations in an abstract way and we provide a coinductive characterization of their key abstract notion of external redex. This last point is then used to give a simple proof that linear head reduction --a nondeterministic strategy having a central role in the theory of linear logic-- is standard.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Eisenberg:2014:CTF, author = "Richard A. Eisenberg and Dimitrios Vytiniotis and Simon Peyton Jones and Stephanie Weirich", title = "Closed type families with overlapping equations", journal = j-SIGPLAN, volume = "49", number = "1", pages = "671--683", month = jan, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578855.2535856", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Mar 4 17:04:57 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "POPL '14 conference proceedings.", abstract = "Open, type-level functions are a recent innovation in Haskell that move Haskell towards the expressiveness of dependent types, while retaining the look and feel of a practical programming language. This paper shows how to increase expressiveness still further, by adding closed type functions whose equations may overlap, and may have non-linear patterns over an open type universe. Although practically useful and simple to implement, these features go beyond conventional dependent type theory in some respects, and have a subtle metatheory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lerner:2014:TRT, author = "Benjamin S. Lerner and Joe Gibbs Politz and Arjun Guha and Shriram Krishnamurthi", title = "{TeJaS}: retrofitting type systems for {JavaScript}", journal = j-SIGPLAN, volume = "49", number = "2", pages = "1--16", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508170", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "JavaScript programs vary widely in functionality, complexity, and use, and analyses of these programs must accommodate such variations. Type-based analyses are typically the simplest such analyses, but due to the language's subtle idioms and many application-specific needs --- such as ensuring general-purpose type correctness, security properties, or proper library usage --- we have found that a single type system does not suffice for all purposes. However, these varied uses still share many reusable common elements. In this paper we present TeJaS, a framework for building type systems for JavaScript. TeJaS has been engineered modularly to encourage experimentation. Its initial type environment is reified, to admit easy modeling of the various execution contexts of JavaScript programs, and its type language and typing rules are extensible, to enable variations of the type system to be constructed easily. The paper presents the base TeJaS type system, which performs traditional type-checking for JavaScript. Because JavaScript demands complex types, we explain several design decisions to improve user ergonomics. We then describe TeJaS's modular structure, and illustrate it by reconstructing the essence of a very different type system for JavaScript. Systems built from TeJaS have been applied to several real-world, third-party JavaScript programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Kashyap:2014:TRS, author = "Vineeth Kashyap and John Sarracino and John Wagner and Ben Wiedermann and Ben Hardekopf", title = "Type refinement for static analysis of {JavaScript}", journal = j-SIGPLAN, volume = "49", number = "2", pages = "17--26", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508175", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "Static analysis of JavaScript has proven useful for a variety of purposes, including optimization, error checking, security auditing, program refactoring, and more. We propose a technique called type refinement that can improve the precision of such static analyses for JavaScript without any discernible performance impact. Refinement is a known technique that uses the conditions in branch guards to refine the analysis information propagated along each branch path. The key insight of this paper is to recognize that JavaScript semantics include many implicit conditional checks on types, and that performing type refinement on these implicit checks provides significant benefit for analysis precision. To demonstrate the effectiveness of type refinement, we implement a static analysis tool for reporting potential type-errors in JavaScript programs. We provide an extensive empirical evaluation of type refinement using a benchmark suite containing a variety of JavaScript application domains, ranging from the standard performance benchmark suites (Sunspider and Octane), to open-source JavaScript applications, to machine-generated JavaScript via Emscripten. We show that type refinement can significantly improve analysis precision by up to 86\% without affecting the performance of the analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Allende:2014:CIS, author = "Esteban Allende and Johan Fabry and {\'E}ric Tanter", title = "Cast insertion strategies for gradually-typed objects", journal = j-SIGPLAN, volume = "49", number = "2", pages = "27--36", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508171", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "Gradual typing enables a smooth and progressive integration of static and dynamic typing. The semantics of a gradually-typed program is given by translation to an intermediate language with casts: runtime type checks that control the boundaries between statically- and dynamically-typed portions of a program. This paper studies the performance of different cast insertion strategies in the context of Gradualtalk, a gradually-typed Smalltalk. We first implement the strategy specified by Siek and Taha, which inserts casts at call sites. We then study the dual approach, which consists in performing casts in callees. Based on the observation that both strategies perform well in different scenarios, we design a hybrid strategy that combines the best of each approach. We evaluate these three strategies using both micro- and macro-benchmarks. We also discuss the impact of these strategies on memory, modularity, and inheritance. The hybrid strategy constitutes a promising cast insertion strategy for adding gradual types to existing dynamically-typed languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Kedlaya:2014:ITS, author = "Madhukar N. Kedlaya and Jared Roesch and Behnam Robatmili and Mehrdad Reshadi and Ben Hardekopf", title = "Improved type specialization for dynamic scripting languages", journal = j-SIGPLAN, volume = "49", number = "2", pages = "37--48", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508177", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "Type feedback and type inference are two common methods used to optimize dynamic languages such as JavaScript. Each of these methods has its own strengths and weaknesses, and we propose that each can benefit from the other if combined in the right way. We explore the interdependency between these two methods and propose two novel ways to combine them in order to significantly increase their aggregate benefit and decrease their aggregate overhead. In our proposed strategy, an initial type inference pass is applied that can reduce type feedback overhead by enabling more intelligent placement of profiling hooks. This initial type inference pass is novel in the literature. After profiling, a final type inference pass uses the type information from profiling to generate efficient code. While this second pass is not novel, we significantly improve its effectiveness in a novel way by feeding the type inference pass information about the function signature, i.e., the types of the function's arguments for a specific function invocation. Our results show significant speedups when using these low-overhead strategies, ranging from $ 1.2 \times $ to $ 4 \times $ over an implementation that does not perform type feedback or type inference based optimizations. Our experiments are carried out across a wide range of traditional benchmarks and realistic web applications. The results also show an average reduction of 23.5\% in the size of the profiled data for these benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Keil:2014:EDA, author = "Matthias Keil and Peter Thiemann", title = "Efficient dynamic access analysis using {JavaScript} proxies", journal = j-SIGPLAN, volume = "49", number = "2", pages = "49--60", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508176", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "JSConTest introduced the notions of effect monitoring and dynamic effect inference for JavaScript. It enables the description of effects with path specifications resembling regular expressions. It is implemented by an offline source code transformation. To overcome the limitations of the JSConTest implementation, we redesigned and reimplemented effect monitoring by taking advantage of JavaScript proxies. Our new design avoids all drawbacks of the prior implementation. It guarantees full interposition; it is not restricted to a subset of JavaScript; it is self-maintaining; and its scalability to large programs is significantly better than with JSConTest. The improved scalability has two sources. First, the reimplementation is significantly faster than the original, transformation-based implementation. Second, the reimplementation relies on the fly-weight pattern and on trace reduction to conserve memory. Only the combination of these techniques enables monitoring and inference for large programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Weiher:2014:PIU, author = "Marcel Weiher and Robert Hirschfeld", title = "Polymorphic identifiers: uniform resource access in {Objective-Smalltalk}", journal = j-SIGPLAN, volume = "49", number = "2", pages = "61--72", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508169", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "In object-oriented programming, polymorphic dispatch of operations decouples clients from specific providers of services and allows implementations to be modified or substituted without affecting clients. The Uniform Access Principle (UAP) tries to extend these qualities to resource access by demanding that access to state be indistinguishable from access to operations. Despite language features supporting the UAP, the overall goal of substitutability has not been achieved for either alternative resources such as keyed storage, files or web pages, or for alternate access mechanisms: specific kinds of resources are bound to specific access mechanisms and vice versa. Changing storage or access patterns either requires changes to both clients and service providers and trying to maintain the UAP imposes significant penalties in terms of code-duplication and/or performance overhead. We propose introducing first class identifiers as polymorphic names for storage locations to solve these problems. With these Polymorphic Identifiers, we show that we can provide uniform access to a wide variety of resource types as well as storage and access mechanisms, whether parametrized or direct, without affecting client code, without causing code duplication or significant performance penalties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Park:2014:AAS, author = "Changhee Park and Hongki Lee and Sukyoung Ryu", title = "All about the with statement in {JavaScript}: removing with statements in {JavaScript} applications", journal = j-SIGPLAN, volume = "49", number = "2", pages = "73--84", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508173", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "The with statement in JavaScript makes static analysis of JavaScript applications difficult by introducing a new scope at run time and thus invalidating lexical scoping. Therefore, many static approaches to JavaScript program analysis and the strict mode of ECMAScript 5 simply disallow the with statement. To justify exclusion of the with statement, we should better understand the actual usage patterns of the with statement. In this paper, we present the usage patterns of the with statement in real-world JavaScript applications currently used in the 898 most popular web sites. We investigate whether we can rewrite the with statements in each pattern to other statements not using the with statement. We show that we can rewrite all the static occurrences of the with statement that do not have any dynamic code generating functions. Even though the rewriting process is not applicable to any dynamically generated with statements, our results are still promising. Because all the static approaches that disallow the with statement also disallow dynamic code generation, such static approaches can allow the with statement using our rewriting process. We formally present our rewriting strategy, provide its implementation, and show its faithfulness using extensive testing. We believe that removing with statements will simplify JavaScript program analysis designs without considering dynamic scope introduction while imposing fewer syntactic restrictions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Lameed:2014:OMF, author = "Nurudeen A. Lameed and Laurie J. Hendren", title = "Optimizing {MATLAB} {\tt feval} with dynamic techniques", journal = j-SIGPLAN, volume = "49", number = "2", pages = "85--96", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508174", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "MATLAB is a popular dynamic array-based language used by engineers, scientists and students worldwide. The built-in function feval is an important MATLAB feature for certain classes of numerical programs and solvers which benefit from having functions as parameters. Programmers may pass a function name or function handle to the solver and then the solver uses feval to indirectly call the function. In this paper, we show that there are significant performance overheads for function calls via feval, in both MATLAB interpreters and JITs. The paper then proposes, implements and compares two on-the-fly mechanisms for specialization of feval calls. The first approach uses on-stack replacement technology, as supported by McVM/McOSR. The second approach specializes calls of functions with feval using a combination of runtime input argument types and values. Experimental results on seven numerical solvers show that the techniques provide good performance improvements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Yoo:2014:WRR, author = "Danny Yoo and Shriram Krishnamurthi", title = "{Whalesong}: running {Racket} in the browser", journal = j-SIGPLAN, volume = "49", number = "2", pages = "97--108", month = feb, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2578856.2508172", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 06:09:05 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "DLS '13 conference proceedings.", abstract = "JavaScript is the language of the ubiquitous Web, but it only poorly supports event-driven functional programs due to its single-threaded, asynchronous nature and lack of rich control flow operators. We present Whalesong, a compiler from Racket that generates JavaScript code that masks these problems. We discuss the implementation strategy using delimited continuations, an interface to the DOM, and an FFI for adapting JavaScript libraries to add new platform-dependent reactive features. In the process, we also describe extensions to Racket's functional event-driven programming model. We also briefly discuss the implementation details.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '13 conference proceedings.", } @Article{Bodik:2014:MBS, author = "Rastislav Bodik", title = "Modeling biology with solver-aided programming languages", journal = j-SIGPLAN, volume = "49", number = "3", pages = "1--2", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517229", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A good model of a biological cell exposes secrets of the cell's signaling mechanisms, explaining diseases and facilitating drug discovery. Modeling cells is fundamentally a programming problem --- it's programming because the model is a concurrent program that simulates the cell, and it's a problem because it is hard to write a program that reproduces all experimental observations of the cell faithfully. In this talk, I will introduce solver-aided programming languages and show how they ease modeling biology as well as make programming accessible to non-programmers. Solver-aided languages come with constructs that delegate part of the programming problem to a constraint solver, which can be guided to synthesize parts of the program, localize its bugs, or act as a clairvoyant oracle. I will describe our work on synthesis of stem cell models in c. elegans and then show how our framework called Rosette can rapidly implement a solver aided language in several domains, from programming by demonstration to spatial parallel programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Erdweg:2014:FEL, author = "Sebastian Erdweg and Felix Rieger", title = "A framework for extensible languages", journal = j-SIGPLAN, volume = "49", number = "3", pages = "3--12", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517210", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Extensible programming languages such as SugarJ or Racket enable programmers to introduce customary language features as extensions of the base language. Traditionally, systems that support language extensions are either (i) agnostic to the base language or (ii) only support a single base language. In this paper, we present a framework for language extensibility that turns a non-extensible language into an extensible language featuring library-based extensible syntax, extensible static analyses, and extensible editor support. To make a language extensible, our framework only requires knowledge of the base language's grammar, the syntax for import statements (which activate extensions), and how to compile base-language programs. We have evaluated the generality of our framework by instantiating it for Java, Haskell, Prolog, JavaScript, and System F$_{ \omega }$, and by studying existing module-system features and their support in our framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Flatt:2014:SRY, author = "Matthew Flatt", title = "Submodules in {Racket}: you want it when, again?", journal = j-SIGPLAN, volume = "49", number = "3", pages = "13--22", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517211", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In an extensible programming language, programmers write code that must run at different times --- in particular, at compile time versus run time. The module system of the Racket programming language enables a programmer to reason about programs in the face of such extensibility, because the distinction between run-time and compile-time phases is built into the language model. Submodules extend Racket's module system to make the phase-separation facet of the language extensible. That is, submodules give programmers the capability to define new phases, such as `test time' or \documentation time,' with the same reasoning and code-management benefits as the built-in distinction between run time and compile time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Dyer:2014:DVE, author = "Robert Dyer and Hridesh Rajan and Tien N. Nguyen", title = "Declarative visitors to ease fine-grained source code mining with full history on billions of {AST} nodes", journal = j-SIGPLAN, volume = "49", number = "3", pages = "23--32", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517226", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software repositories contain a vast wealth of information about software development. Mining these repositories has proven useful for detecting patterns in software development, testing hypotheses for new software engineering approaches, etc. Specifically, mining source code has yielded significant insights into software development artifacts and processes. Unfortunately, mining source code at a large-scale remains a difficult task. Previous approaches had to either limit the scope of the projects studied, limit the scope of the mining task to be more coarse-grained, or sacrifice studying the history of the code due to both human and computational scalability issues. In this paper we address the substantial challenges of mining source code: (a) at a very large scale; (b) at a fine-grained level of detail; and (c) with full history information. To address these challenges, we present domain-specific language features for source code mining. Our language features are inspired by object-oriented visitors and provide a default depth-first traversal strategy along with two expressions for defining custom traversals. We provide an implementation of these features in the Boa infrastructure for software repository mining and describe a code generation strategy into Java code. To show the usability of our domain-specific language features, we reproduced over 40 source code mining tasks from two large-scale previous studies in just 2 person-weeks. The resulting code for these tasks show between $ 2.0 \times $--$ 4.8 \times $ reduction in code size. Finally we perform a small controlled experiment to gain insights into how easily mining tasks written using our language features can be understood, with no prior training. We show a substantial number of tasks (77\%) were understood by study participants, in about 3 minutes per task.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Solodkyy:2014:OPM, author = "Yuriy Solodkyy and Gabriel {Dos Reis} and Bjarne Stroustrup", title = "Open pattern matching for {C++}", journal = j-SIGPLAN, volume = "49", number = "3", pages = "33--42", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517222", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Pattern matching is an abstraction mechanism that can greatly simplify source code. We present functional-style pattern matching for C++ implemented as a library, called Mach7$^1$. All the patterns are user-definable, can be stored in variables, passed among functions, and allow the use of class hierarchies. As an example, we implement common patterns used in functional languages. Our approach to pattern matching is based on compile-time composition of pattern objects through concepts. This is superior (in terms of performance and expressiveness) to approaches based on run-time composition of polymorphic pattern objects. In particular, our solution allows mapping functional code based on pattern matching directly into C++ and produces code that is only a few percent slower than hand-optimized C++ code. The library uses an efficient type switch construct, further extending it to multiple scrutinees and general patterns. We compare the performance of pattern matching to that of double dispatch and open multi-methods in C++.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Martin:2014:TCR, author = "Marko Martin and Mira Mezini and Sebastian Erdweg", title = "Template constructors for reusable object initialization", journal = j-SIGPLAN, volume = "49", number = "3", pages = "43--52", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517212", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reuse of and abstraction over object initialization logic is not properly supported in mainstream object-oriented languages. This may result in significant amount of boilerplate code and proliferation of constructors in subclasses. It also makes it impossible for mixins to extend the initialization interface of classes they are applied to. We propose template constructors, which employ template parameters and pattern matching of them against signatures of superclass constructors to enable a one-to-many binding of super-calls. We demonstrate how template constructors solve the aforementioned problems. We present a formalization of the concept, a Java-based implementation, and use cases which exercise its strengths.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Richard-Foy:2014:EHL, author = "Julien Richard-Foy and Olivier Barais and Jean-Marc J{\'e}z{\'e}quel", title = "Efficient high-level abstractions for {Web} programming", journal = j-SIGPLAN, volume = "49", number = "3", pages = "53--60", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517227", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing large Web applications is known to be difficult. One challenge comes from the fact that the application's logic is scattered into heterogeneous clients and servers, making it difficult to share code between both sides or to move code from one side to the other. Another challenge is performance: while Web applications rely on ever more code on the client-side, they may run on smart phones with limited hardware capabilities. These two challenges raise the following problem: how to benefit from high-level languages and libraries making code complexity easier to manage and abstracting over the clients and servers differences without trading this ease of engineering for performance? This article presents high-level abstractions defined as deep embedded DSLs in Scala that can generate efficient code leveraging the characteristics of both client and server environments. We compare performance on client-side against other candidate technologies and against hand written low-level JavaScript code. Though code written with our DSL has a high level of abstraction, our benchmark on a real world application reports that it runs as fast as hand tuned low-level JavaScript code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Gerakios:2014:RTP, author = "Prodromos Gerakios and Aggelos Biboudis and Yannis Smaragdakis", title = "Reified type parameters using {Java} annotations", journal = j-SIGPLAN, volume = "49", number = "3", pages = "61--64", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517223", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Java generics are compiled by-erasure: all clients reuse the same bytecode, with uses of the unknown type erased. C++ templates are compiled by-expansion: each type-instantiation of a template produces a different code definition. The two approaches offer trade-offs on multiple axes. We propose an extension of Java generics that allows by-expansion translation relative to selected type parameters only. This language design allows sophisticated users to get the best of both worlds at a fine granularity. Furthermore, our proposal is based on Java 8 Type Annotations (JSR 308) and the Checker Framework as an abstraction layer for controlling compilation without changes to the internals of a Java compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Schulze:2014:DDP, author = "Sandro Schulze and J{\"o}rg Liebig and Janet Siegmund and Sven Apel", title = "Does the discipline of preprocessor annotations matter?: a controlled experiment", journal = j-SIGPLAN, volume = "49", number = "3", pages = "65--74", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517215", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The C preprocessor ( CPP ) is a simple and language-independent tool, widely used to implement variable software systems using conditional compilation (i.e., by including or excluding annotated code). Although CPP provides powerful means to express variability, it has been criticized for allowing arbitrary annotations that break the underlying structure of the source code. We distinguish between disciplined annotations, which align with the structure of the source code, and undisciplined annotations, which do not. Several studies suggest that especially the latter type of annotations makes it hard to (automatically) analyze the code. However, little is known about whether the type of annotations has an effect on program comprehension. We address this issue by means of a controlled experiment with human subjects. We designed similar tasks for both, disciplined and undisciplined annotations, to measure program comprehension. Then, we measured the performance of the subjects regarding correctness and response time for solving the tasks. Our results suggest that there are no differences between disciplined and undisciplined annotations from a program-comprehension perspective. Nevertheless, we observed that finding and correcting errors is a time-consuming and tedious task in the presence of preprocessor annotations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Medeiros:2014:IPB, author = "Fl{\'a}vio Medeiros and M{\'a}rcio Ribeiro and Rohit Gheyi", title = "Investigating preprocessor-based syntax errors", journal = j-SIGPLAN, volume = "49", number = "3", pages = "75--84", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517221", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The C preprocessor is commonly used to implement variability in program families. Despite the widespread usage, some studies indicate that the C preprocessor makes variability implementation difficult and error-prone. However, we still lack studies to investigate preprocessor-based syntax errors and quantify to what extent they occur in practice. In this paper, we define a technique based on a variability-aware parser to find syntax errors in releases and commits of program families. To investigate these errors, we perform an empirical study where we use our technique in 41 program family releases, and more than 51 thousand commits of 8 program families. We find 7 and 20 syntax errors in releases and commits of program families, respectively. They are related not only to incomplete annotations, but also to complete ones. We submit 8 patches to fix errors that developers have not fixed yet, and they accept 75\% of them. Our results reveal that the time developers need to fix the errors varies from days to years in family repositories. We detect errors even in releases of well-known and widely used program families, such as Bash, CVS and Vim. We also classify the syntax errors into 6 different categories. This classification may guide developers to avoid them during development.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Kramer:2014:UDO, author = "Dean Kramer and Samia Oussena and Peter Komisarczuk and Tony Clark", title = "Using document-oriented {GUIs} in dynamic software product lines", journal = j-SIGPLAN, volume = "49", number = "3", pages = "85--94", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517214", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic Software Product Line (DSPL) Engineering has gained interest through its promise of being able to unify software adaptation whereby software adaptation can be realised at compile time and runtime. While previous work has enabled program logic adaptation by the use of language extensions and platform support, little attention has been placed on Graphical User Interface (GUI) variability. Different design patterns including the Model View Controller are commonly used in GUI implementation, with GUI documents being used for declaring the GUI. To handle dynamic GUI variability currently, the developer needs to implement GUI refinements using multiple techniques. This paper proposes a solution for dealing with GUI document variability, statically and dynamically, in a unified way. In our approach, we currently use a compile time method for producing GUI variants, and code transformations to handle these variants within the application at runtime. To avoid GUI duplicates, only GUI variants that are unique, and related to a valid product configuration, are produced. To validate our approach, we implemented tool support to enable this for Android based applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Siegmund:2014:FBP, author = "Norbert Siegmund and Alexander von Rhein and Sven Apel", title = "Family-based performance measurement", journal = j-SIGPLAN, volume = "49", number = "3", pages = "95--104", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517209", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most contemporary programs are customizable. They provide many features that give rise to millions of program variants. Determining which feature selection yields an optimal performance is challenging, because of the exponential number of variants. Predicting the performance of a variant based on previous measurements proved successful, but induces a trade-off between the measurement effort and prediction accuracy. We propose the alternative approach of family-based performance measurement, to reduce the number of measurements required for identifying feature interactions and for obtaining accurate predictions. The key idea is to create a variant simulator (by translating compile-time variability to run-time variability) that can simulate the behavior of all program variants. We use it to measure performance of individual methods, trace methods to features, and infer feature interactions based on the call graph. We evaluate our approach by means of five feature-oriented programs. On average, we achieve accuracy of 98\%, with only a single measurement per customizable program. Observations show that our approach opens avenues of future research in different domains, such an feature-interaction detection and testing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Marek:2014:SRC, author = "Luk{\'a}s Marek and Stephen Kell and Yudi Zheng and Lubom{\'\i}r Bulej and Walter Binder and Petr Tuma and Danilo Ansaloni and Aibek Sarimbekov and Andreas Sewe", title = "{ShadowVM}: robust and comprehensive dynamic program analysis for the {Java} platform", journal = j-SIGPLAN, volume = "49", number = "3", pages = "105--114", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517219", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic analysis tools are often implemented using instrumentation, particularly on managed runtimes including the Java Virtual Machine (JVM). Performing instrumentation robustly is especially complex on such runtimes: existing frameworks offer limited coverage and poor isolation, while previous work has shown that apparently innocuous instrumentation can cause deadlocks or crashes in the observed application. This paper describes ShadowVM, a system for instrumentation-based dynamic analyses on the JVM which combines a number of techniques to greatly improve both isolation and coverage. These centre on the offload of analysis to a separate process; we believe our design is the first system to enable genuinely full bytecode coverage on the JVM. We describe a working implementation, and use a case study to demonstrate its improved coverage and to evaluate its runtime overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Kolesnikov:2014:CPB, author = "Sergiy Kolesnikov and Alexander von Rhein and Claus Hunsen and Sven Apel", title = "A comparison of product-based, feature-based, and family-based type checking", journal = j-SIGPLAN, volume = "49", number = "3", pages = "115--124", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517213", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Analyzing software product lines is difficult, due to their inherent variability. In the past, several strategies for product-line analysis have been proposed, in particular, product-based, feature-based, and family-based strategies. Despite recent attempts to conceptually and empirically compare different strategies, there is no work that empirically compares all of the three strategies in a controlled setting. We close this gap by extending a compiler for feature-oriented programming with support for product-based, feature-based, and family-based type checking. We present and discuss the results of a comparative performance evaluation that we conducted on a set of 12 feature-oriented, Java-based product lines. Most notably, we found that the family-based strategy is superior for all subject product lines: it is substantially faster, it detects all kinds of errors, and provides the most detailed information about them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Ofenbeck:2014:SST, author = "Georg Ofenbeck and Tiark Rompf and Alen Stojanov and Martin Odersky and Markus P{\"u}schel", title = "{Spiral} in {Scala}: towards the systematic construction of generators for performance libraries", journal = j-SIGPLAN, volume = "49", number = "3", pages = "125--134", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517228", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program generators for high performance libraries are an appealing solution to the recurring problem of porting and optimizing code with every new processor generation, but only few such generators exist to date. This is due to not only the difficulty of the design, but also of the actual implementation, which often results in an ad-hoc collection of standalone programs and scripts that are hard to extend, maintain, or reuse. In this paper we ask whether and which programming language concepts and features are needed to enable a more systematic construction of such generators. The systematic approach we advocate extrapolates from existing generators: (a) describing the problem and algorithmic knowledge using one, or several, domain-specific languages (DSLs), (b) expressing optimizations and choices as rewrite rules on DSL programs, (c) designing data structures that can be configured to control the type of code that is generated and the data representation used, and (d) using autotuning to select the best-performing alternative. As a case study, we implement a small, but representative subset of Spiral in Scala using the Lightweight Modular Staging (LMS) framework. The first main contribution of this paper is the realization of (c) using type classes to abstract over staging decisions, i.e. which pieces of a computation are performed immediately and for which pieces code is generated. Specifically, we abstract over different complex data representations jointly with different code representations including generating loops versus unrolled code with scalar replacement --- a crucial and usually tedious performance transformation. The second main contribution is to provide full support for (a) and (d) within the LMS framework: we extend LMS to support translation between different DSLs and autotuning through search.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Chapin:2014:SNT, author = "Peter Chapin and Christian Skalka and Scott Smith and Michael Watson", title = "{Scalaness\slash nesT}: type specialized staged programming for sensor networks", journal = j-SIGPLAN, volume = "49", number = "3", pages = "135--144", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517217", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming wireless embedded networks is challenging due to severe limitations on processing speed, memory, and bandwidth. Staged programming can help bridge the gap between high level code refinement techniques and efficient device level programs by allowing a first stage program to specialize device level code. Here we introduce a two stage programming system for wireless sensor networks. The first stage program is written in our extended dialect of Scala, called Scalaness, where components written in our type safe dialect of nesC, called nesT, are composed and specialized. Scalaness programs can dynamically construct TinyOS-compliant nesT device images that can be deployed to motes. A key result, called cross-stage type safety, shows that successful static type checking of a Scalaness program means no type errors will arise either during programmatic composition and specialization of WSN code, or later on the WSN itself. Scalaness has been implemented through direct modification of the Scala compiler. Implementation of a staged public-key cryptography calculation shows the sensor memory footprint can be significantly reduced by staging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Sujeeth:2014:FGH, author = "Arvind K. Sujeeth and Austin Gibbons and Kevin J. Brown and HyoukJoong Lee and Tiark Rompf and Martin Odersky and Kunle Olukotun", title = "Forge: generating a high performance {DSL} implementation from a declarative specification", journal = j-SIGPLAN, volume = "49", number = "3", pages = "145--154", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517220", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Domain-specific languages provide a promising path to automatically compile high-level code to parallel, heterogeneous, and distributed hardware. However, in practice high performance DSLs still require considerable software expertise to develop and force users into tool-chains that hinder prototyping and debugging. To address these problems, we present Forge, a new meta DSL for declaratively specifying high performance embedded DSLs. Forge provides DSL authors with high-level abstractions (e.g., data structures, parallel patterns, effects) for specifying their DSL in a way that permits high performance. From this high-level specification, Forge automatically generates both a na{\"\i}ve Scala library implementation of the DSL and a high performance version using the Delite DSL framework. Users of a Forge-generated DSL can prototype their application using the library version, and then switch to the Delite version to run on multicore CPUs, GPUs, and clusters without changing the application code. Forge-generated Delite DSLs perform within 2x of hand-optimized C++ and up to $ 40 \times $ better than Spark, an alternative high-level distributed programming environment. Compared to a manually implemented Delite DSL, Forge provides a factor of $3$--$ 6 \times $ reduction in lines of code and does not sacrifice any performance. Furthermore, Forge specifications can be generated from existing Scala libraries, are easy to maintain, shield DSL developers from changes in the Delite framework, and enable DSLs to be retargeted to other frameworks transparently.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Kurilova:2014:SSL, author = "Darya Kurilova and Derek Rayside", title = "On the simplicity of synthesizing linked data structure operations", journal = j-SIGPLAN, volume = "49", number = "3", pages = "155--158", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517225", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We argue that synthesizing operations on recursive linked data structures is not as hard as it appears and is, in fact, within reach of current SAT-based synthesis techniques --- with the addition of a simple approach that we describe to decompose the problem into smaller parts. To generate smaller pieces of code, i.e., shorter routines, is obviously easier than large and complex routines, and, also, there is more potential for automating the code synthesis. In this paper, we present a code generation algorithm for synthesizing operations of linked data structures and, as an example, describe how the proposed algorithm works to synthesize operations of an AVL tree.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Dhungana:2014:GCD, author = "Deepak Dhungana and Andreas Falkner and Alois Haselb{\"o}ck", title = "Generation of conjoint domain models for system-of-systems", journal = j-SIGPLAN, volume = "49", number = "3", pages = "159--168", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517224", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software solutions in complex environments, such as railway control systems or power plants, are assemblies of heterogeneous components, which are very large and complex systems themselves. Interplay of these systems requires a thorough design of a system-of-systems (SoS) encompassing the required interactions between the involved systems. One of the challenges lies in reconciliation of the domain data structures and runtime constraints to ensure consistency of the SoS behavior. In this paper, we present a generative approach that enables reconciliation of a common platform based on reusable domain models of the involved systems. This is comparable to a product line configuration problem where we generate a common platform model for all involved systems. We discuss the specific requirements for model composition in a SoS context and address them in our approach. In particular, our approach addresses the operational and managerial independence of the individual systems and offers appropriate modeling constructs. We report on our experiences of applying the approach in several real world projects and share the lessons learned.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Basso:2014:SLS, author = "F{\'a}bio Paulo Basso and Raquel Mainardi Pillat and Toacy Cavalcante Oliveira and Leandro Buss Becker", title = "Supporting large scale model transformation reuse", journal = j-SIGPLAN, volume = "49", number = "3", pages = "169--178", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517218", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The growth of applications developed with the support of model transformations makes reuse a required practice, specially when applied to transformation assets (e.g. transformation chains, algorithms, and configuration files). In order to promote reuse one must consider the different implementations, communalities, and variants among these assets. In this domain, a couple techniques have been used as solutions to adapt reusable assets for specific needs. However, so far, no work has discussed their combined use in real software projects. In this paper, we present a new tool named WCT, which can be used to adapt transformation assets. Moreover, through lessons learned in industry, we address some reuse techniques devoted to adapt these assets.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{An:2014:MDG, author = "Kyoungho An and Takayuki Kuroda and Aniroddha Gokhale and Sumant Tambe and Andrea Sorbini", title = "Model-driven generative framework for automated {OMG DDS} performance testing in the cloud", journal = j-SIGPLAN, volume = "49", number = "3", pages = "179--182", month = mar, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2637365.2517216", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 26 05:58:25 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Object Management Group's (OMG) Data Distribution Service (DDS) provides many configurable policies which determine end-to-end quality of service (QoS) of applications. It is challenging to predict the system's performance in terms of latencies, throughput, and resource usage because diverse combinations of QoS configurations influence QoS of applications in different ways. To overcome this problem, design-time formal methods have been applied with mixed success, but lack of sufficient accuracy in prediction, tool support, and understanding of formalism has prevented wider adoption of the formal techniques. A promising approach to address this challenge is to emulate system behavior and gather data on the QoS parameters of interest by experimentation. To realize this approach, which is preferred over formal methods due to their limitations in accurately predicting QoS, we have developed a model-based automatic performance testing framework with generative capabilities to reduce manual efforts in generating a large number of relevant QoS configurations that can be deployed and tested on a cloud platform. This paper describes our initial efforts in developing and using this technology.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '13 conference proceedings.", } @Article{Vitek:2014:SCR, author = "Jan Vitek", title = "{SIGPLAN Chair}'s report", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "1--1", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641640", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gibbons:2014:SVC, author = "Jeremy Gibbons", title = "{SIGPLAN Vice-Chair}'s report", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "2--2", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641641", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Black:2014:SSR, author = "Andrew Black", title = "{SIGPLAN Secretary}'s report", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "3--3", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641642", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lopes:2014:STR, author = "Cristina V. Lopes", title = "{SIGPLAN Treasurer}'s report", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "4--4", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641643", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Dreyer:2014:SA, author = "Derek Dreyer", title = "{SIGPLAN} awards", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "5--7", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641644", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lawall:2014:SPA, author = "Julia Lawall and Cristina V. Lopes", title = "{SIGPLAN Professional Activities Committee Report}", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "8--8", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641645", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hind:2014:SRH, author = "Michael Hind", title = "{SIGPLAN Research Highlights Annual Report}", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "9--9", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641646", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Sewell:2014:PPC, author = "Peter Sewell", title = "{POPL 2014 Program Chair}'s report", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "10--26", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641647", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This note describes the POPL 2014 paper selection process and its rationale.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Lopes:2014:OTP, author = "Cristina V. Lopes", title = "The {OOPSLA} two-phase review process", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "27--32", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641648", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Boehm:2014:PP, author = "Hans Boehm and Jack Davidson and Kathleen Fisher and Cormac Flanagan and Jeremy Gibbons and Mary Hall and Graham Hutton and David Padua and Frank Tip and Jan Vitek and Philip Wadler", title = "Practices of {PLDI}", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "33--38", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641649", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Fahndrich:2014:SAS, author = "Manuel F{\"a}hndrich and Francesco Logozzo", title = "{SAS2013} artifact submission experience report", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "39--40", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641650", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Gay:2014:NLH, author = "David Gay and Philip Levis and Robert von Behren and Matt Welsh and Eric Brewer and David Culler", title = "The {nesC} language: a holistic approach to networked embedded systems", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "41--51", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641652", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present nesC, a programming language for networked embedded systems that represent a new design space for application developers. An example of a networked embedded system is a sensor network, which consists of (potentially) thousands of tiny, low-power ``motes,'' each of which execute concurrent, reactive programs that must operate with severe memory and power constraints. nesC's contribution is to support the special needs of this domain by exposing a programming model that incorporates event-driven execution, a flexible concurrency model, and component-oriented application design. Restrictions on the programming model allow the nesC compiler to perform whole-program analyses, including data-race detection (which improves reliability) and aggressive function inlining (which reduces resource consumption). nesC has been used to implement TinyOS, a small operating system for sensor networks, as well as several significant sensor applications. nesC and TinyOS have been adopted by a large number of sensor network research groups, and our experience and evaluation of the language shows that it is effective at supporting the complex, concurrent programming style demanded by this new class of deeply networked systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{LeBotlan:2014:MRM, author = "Didier {Le Botlan} and Didier R{\'e}my", title = "{MLF}: raising {ML} to the power of {System F}", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "52--63", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641653", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a type system MLF that generalizes ML with first-class polymorphism as in System F. Expressions may contain second-order type annotations. Every typable expression admits a principal type, which however depends on type annotations. Principal types capture all other types that can be obtained by implicit type instantiation and they can be inferred. All expressions of ML are well-typed without any annotations. All expressions of System F can be mechanically encoded into MLF by dropping all type abstractions and type applications, and injecting types of lambda-abstractions into MLF types. Moreover, only parameters of lambda-abstractions that are used polymorphically need to remain annotated.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Harris:2014:LSL, author = "Tim Harris and Keir Fraser", title = "Language support for lightweight transactions", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "64--78", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641654", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent programming is notoriously difficult. Current abstractions are intricate and make it hard to design computer systems that are reliable and scalable. We argue that these problems can be addressed by moving to a declarative style of concurrency control in which programmers directly indicate the safety properties that they require. In our scheme the programmer demarks sections of code which execute within lightweight software-based transactions that commit atomically and exactly once. These transactions can update shared data, instantiate objects, invoke library features and so on. They can also block, waiting for arbitrary boolean conditions to become true. Transactions which do not access the same shared memory locations can commit concurrently. Furthermore, in general, no performance penalty is incurred for memory accesses outside transactions. We present a detailed design of this proposal along with an implementation and evaluation. We argue that the resulting system (i ) is easier for mainstream programmers to use, (ii ) prevents lock-based priority-inversion and deadlock problems and (iii ) can offer performance advantages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Henzinger:2014:AP, author = "Thomas A. Henzinger and Ranjit Jhala and Rupak Majumdar and Kenneth L. McMillan", title = "Abstractions from proofs", journal = j-SIGPLAN, volume = "49", number = "4S", pages = "79--91", month = apr, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2641638.2641655", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:36:32 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The success of model checking for large programs depends crucially on the ability to efficiently construct parsimonious abstractions. A predicate abstraction is parsimonious if at each control location, it specifies only relationships between current values of variables, and only those which are required for proving correctness. Previous methods for automatically refining predicate abstractions until sufficient precision is obtained do not systematically construct parsimonious abstractions: predicates usually contain symbolic variables, and are added heuristically and often uniformly to many or all control locations at once. We use Craig interpolation to efficiently construct, from a given abstract error trace which cannot be concretized, a parsimonious abstraction that removes the trace. At each location of the trace, we infer the relevant predicates as an interpolant between the two formulas that define the past and the future segment of the trace. Each interpolant is a relationship between current values of program variables, and is relevant only at that particular program location. It can be found by a linear scan of the proof of infeasibility of the trace.We develop our method for programs with arithmetic and pointer expressions, and call-by-value function calls. For function calls, Craig interpolation offers a systematic way of generating relevant predicates that contain only the local variables of the function and the values of the formal parameters when the function was called. We have extended our model checker BLAST with predicate discovery by Craig interpolation, and applied it successfully to C programs with more than 130,000 lines of code, which was not possible with approaches that build less parsimonious abstractions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Kulkarni:2014:EED, author = "Prasad A. Kulkarni", title = "Energy efficient data access techniques", journal = j-SIGPLAN, volume = "49", number = "5", pages = "1--1", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2602568", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy has become a first class design constraint for all types of processors. Data accesses contribute to processor energy usage and can account for up to 25\% of the total energy used in embedded processors. Using a set-associative level-one data cache (L1 DC) organization is particularly energy inefficient as load operations access all L1 DC tag and data arrays in parallel to reduce access latency, but the data can reside in at most one way. Techniques that reduce L1 DC energy usage at the expense of degrading performance, such as filter caches, have not been adopted. In this presentation I will describe various techniques we have developed to reduce the energy usage for L1 DC accesses without adversely affecting performance. These techniques include avoiding unnecessary loads from L1 DC data arrays and a practical data filter cache design that not only significantly reduces data access energy usage, but also avoids the traditional execution time penalty associated with data filter caches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Spink:2014:ECG, author = "Tom Spink and Harry Wagstaff and Bj{\"o}rn Franke and Nigel Topham", title = "Efficient code generation in a region-based dynamic binary translator", journal = j-SIGPLAN, volume = "49", number = "5", pages = "3--12", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597810", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Region-based JIT compilation operates on translation units comprising multiple basic blocks and, possibly cyclic or conditional, control flow between these. It promises to reconcile aggressive code optimisation and low compilation latency in performance-critical dynamic binary translators. Whilst various region selection schemes and isolated code optimisation techniques have been investigated it remains unclear how to best exploit such regions for efficient code generation. Complex interactions with indirect branch tables and translation caches can have adverse effects on performance if not considered carefully. In this paper we present a complete code generation strategy for a region-based dynamic binary translator, which exploits branch type and control flow profiling information to improve code quality for the common case. We demonstrate that using our code generation strategy a competitive region-based dynamic compiler can be built on top of the LLVM JIT compilation framework. For the ARM-V5T target ISA and SPEC CPU 2006 benchmarks we achieve execution rates of, on average, 867 MIPS and up to 1323 MIPS on a standard X86 host machine, outperforming state-of-the-art QEMU-ARM by delivering a speedup of 264\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Lezuo:2014:COC, author = "Roland Lezuo and Philipp Paulweber and Andreas Krall", title = "{CASM}: optimized compilation of abstract state machines", journal = j-SIGPLAN, volume = "49", number = "5", pages = "13--22", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597813", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we present CASM, a language based on Abstract State Machines (ASM), and its optimizing compiler. ASM is a well-defined (formal) method based on algebraic concepts. A distinct feature of ASM is its combination of parallel and sequential execution semantics. This makes it an excellent choice to formally specify and verify micro-architectures. We present a compilation scheme and an implementation of a runtime system supporting efficient execution of ASM. After introducing novel analysis techniques we present optimizations allowing us to eliminate many costly operations. Benchmark results show that our baseline compiler is 2-3 magnitudes faster than other ASM implementations. The optimizations further increase the performance of the compiled programs up to 264\%. The achieved performance allows our ASM implementation to be used with industry-size applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Lozano:2014:CSC, author = "Roberto Casta{\~n}eda Lozano and Mats Carlsson and Gabriel Hjort Blindell and Christian Schulte", title = "Combinatorial spill code optimization and ultimate coalescing", journal = j-SIGPLAN, volume = "49", number = "5", pages = "23--32", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597815", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel combinatorial model that integrates global register allocation based on ultimate coalescing, spill code optimization, register packing, and multiple register banks with instruction scheduling (including VLIW). The model exploits alternative temporaries that hold the same value as a new concept for ultimate coalescing and spill code optimization. The paper presents Unison as a code generator based on the model and advanced solving techniques using constraint programming. Thorough experiments using MediaBench and a processor (Hexagon) that are typical for embedded systems demonstrate that Unison: is robust and scalable; generates faster code than LLVM (up to 41\% with a mean improvement of 7\%); possibly generates optimal code (for 29\% of the experiments); effortlessly supports different optimization criteria (code size on par with LLVM). Unison is significant as it addresses the same aspects as traditional code generation algorithms, yet is based on a simple integrated model and robustly can generate optimal code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Ballabriga:2014:CRP, author = "Cl{\'e}ment Ballabriga and Lee Kee Chong and Abhik Roychoudhury", title = "Cache-related preemption delay analysis for {FIFO} caches", journal = j-SIGPLAN, volume = "49", number = "5", pages = "33--42", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597814", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hard real-time systems are typically composed of multiple tasks, subjected to timing constraints. To guarantee that these constraints will be respected, the Worst-Case Response Time (WCRT) of each task is needed. In the presence of systems supporting preemptible tasks, we need to take into account the time lost due to task preemption. A major part of this delay is the Cache-Related Preemption Delay (CRPD), which represents the penalties due to cache block evictions by preempting tasks. Previous works on CRPD have focused on caches with Least Recently used (LRU) replacement policy. However, for many real-world processors such as ARM9 or ARM11, the use of First-in-first-out (FIFO) cache replacement policy is common. In this paper, we propose an approach to compute CRPD in the presence of instruction caches with FIFO replacement policy. We use the result of a FIFO instruction cache categorization analysis to account for single-task cache misses, and we model as an Integer Linear Programming (ILP) system the additional preemption-related cache misses. We study the effect of cache related timing anomalies, our work is the first to deal with the effect of timing anomalies in CRPD computation. We also present a WCRT computation method that takes advantage of the fact that our computed CRPD does not increase linearly with respect to the preemption count. We evaluated our method by computing the CRPD with realistic benchmarks (e.g. drone control application, robot controller application), under various cache configuration parameters. The experimentation shows that our method is able to compute tight CRPD bound for benchmark tasks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Henry:2014:HCW, author = "Julien Henry and Mihail Asavoae and David Monniaux and Claire Ma{\"\i}za", title = "How to compute worst-case execution time by optimization modulo theory and a clever encoding of program semantics", journal = j-SIGPLAN, volume = "49", number = "5", pages = "43--52", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597817", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In systems with hard real-time constraints, it is necessary to compute upper bounds on the worst-case execution time (WCET) of programs; the closer the bound to the real WCET, the better. This is especially the case of synchronous reactive control loops with a fixed clock; the WCET of the loop body must not exceed the clock period. We compute the WCET (or at least a close upper bound thereof) as the solution of an optimization modulo theory problem that takes into account the semantics of the program, in contrast to other methods that compute the longest path whether or not it is feasible according to these semantics. Optimization modulo theory extends satisfiability modulo theory (SMT) to maximization problems. Immediate encodings of WCET problems into SMT yield formulas intractable for all current production-grade solvers --- this is inherent to the DPLL(T) approach to SMT implemented in these solvers. By conjoining some appropriate ``cuts'' to these formulas, we considerably reduce the computation time of the SMT-solver. We experimented our approach on a variety of control programs, using the OTAWA analyzer both as baseline and as underlying microarchitectural analysis for our analysis, and show notable improvement on the WCET bound on a variety of benchmarks and control programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Zheng:2014:WAD, author = "Wenguang Zheng and Hui Wu", title = "{WCET}: aware dynamic instruction cache locking", journal = j-SIGPLAN, volume = "49", number = "5", pages = "53--62", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597820", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Caches are widely used in embedded systems to bridge the increasing speed gap between processors and off-chip memory. However, caches make it significantly harder to compute the WCET(Worst Case Execution Time) of a task. To alleviate this problem, cache locking has been proposed. We investigate the I-cache locking problem, and propose a WCET-aware, min-cut based dynamic instruction cache locking approach for reducing the WCET of a single task. We have implemented our approach and compared it with the two state-of-the-art cache locking approaches by using a set of benchmarks from the MRTC benchmark suite. The experimental results show that our approach achieves the average improvements of 41\%, 15\% and 7\% over the partial locking approach for the 256B, 512B and 1KB caches, respectively, and 7\%, 18\% and 17\% over the longest path based dynamic locking approach for the 256B, 512B and 1KB caches, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Martins:2014:ECO, author = "Luiz G. A. Martins and Ricardo Nobre and Alexandre C. B. Delbem and Eduardo Marques and Jo{\~a}o M. P. Cardoso", title = "Exploration of compiler optimization sequences using clustering-based selection", journal = j-SIGPLAN, volume = "49", number = "5", pages = "63--72", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597821", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Due to the large number of optimizations provided in modern compilers and to compiler optimization specific opportunities, a Design Space Exploration (DSE) is necessary to search for the best sequence of compiler optimizations for a given code fragment (e.g., function). As this exploration is a complex and time consuming task, in this paper we present DSE strategies to select optimization sequences to both improve the performance of each function and reduce the exploration time. The DSE is based on a clustering approach which groups functions with similarities and then explore the reduced search space provided by the optimizations previously suggested for the functions in each group. The identification of similarities between functions uses a data mining method which is applied to a symbolic code representation of the source code. The DSE process uses the reduced set identified by clustering in two ways: as the design space or as the initial configuration. In both ways, the adoption of a pre-selection based on clustering allows the use of simple and fast DSE algorithms. Our experiments for evaluating the effectiveness of the proposed approach address the exploration of compiler optimization sequences considering 49 compilation passes and targeting a Xilinx MicroBlaze processor, and were performed aiming performance improvements for 41 functions. Experimental results reveal that the use of our new clustering-based DSE approach achieved a significant reduction on the total exploration time of the search space (18x over a Genetic Algorithm approach for DSE) at the same time that important performance speedups (43\% over the baseline) were obtained by the optimized codes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Chandramohan:2014:PDP, author = "Kiran Chandramohan and Michael F. P. O'Boyle", title = "Partitioning data-parallel programs for heterogeneous {MPSoCs}: time and energy design space exploration", journal = j-SIGPLAN, volume = "49", number = "5", pages = "73--82", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597822", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multiprocessor System-on-Chips(MPSoCs) are now widely used in embedded devices. MPSoCs typically contain a range of specialised processors. Alongside the CPU, there are microcontrollers, DSPs and other hardware accelerators. Programming these MPSoCs is difficult because of the difference in instruction-set architecture (ISA) and disjoint address spaces. In this paper we consider MPSoCs as a target for individual benchmarks. We examine how data-parallel programs can be optimally mapped to heterogeneous multicores for different criteria such as performance, power and energy. We investigate the partitioning of seven benchmarks taken from DSPstone, UTDSP and Polybench suites. Based on design space exploration we show that the best partition depends on compiler optimization level, program, input size and crucially optimization criteria. We develop a straightforward approach that attempts to select the best partitioning for a given program. On average it achieves speedups of 2.2x and energy improvements of 1.45x on the OMAP 4430 platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Guo:2014:EED, author = "Minyi Guo", title = "Energy efficient data access and storage through {HW\slash SW} co-design", journal = j-SIGPLAN, volume = "49", number = "5", pages = "83--83", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2602569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Massive energy consumption has become a major factor for the design and implementation of datacenters. This has led to numerous academic and industrial efforts to improve the energy efficiency of datacenter infrastructures. As a result, in state-of-the-art datacenter facilities, over 80\% of power is now consumed by servers themselves. Historically, the processor has dominated energy consumption in the server. However, as processors have become more energy efficient, their contribution has been decreasing. On the contrary, energy consumed by data accesses and storage is growing, since multi- and many-core severs are requiring increased main memory bandwidth/capacity, large register file and large-scale storage system. Accordingly, energy consumed by data accesses and storage approaching or even surpassing that consumed by processors in many servers. For example, it has been reported that main memory contributes to as much as 40-46\% of total energy consumption in server applications. In this talk, we present our continuing efforts to improve the energy efficiency of data accesses and storage. We study on a series of approaches with hardware-software cooperation to save energy consumption of on-chip memory, register file, main memory and storage devices for embedded systems, multi- and many-core servers, respectively. Experiments with a large set of workloads show the accuracy of our analytical models and the effectiveness of our optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{vonKoch:2014:EFS, author = "Tobias J. K. Edler von Koch and Bj{\"o}rn Franke and Pranav Bhandarkar and Anshuman Dasgupta", title = "Exploiting function similarity for code size reduction", journal = j-SIGPLAN, volume = "49", number = "5", pages = "85--94", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597811", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For cost-sensitive or memory constrained embedded systems, code size is at least as important as performance. Consequently, compact code generation has become a major focus of attention within the compiler community. In this paper we develop a pragmatic, yet effective code size reduction technique, which exploits structural similarity of functions. It avoids code duplication through merging of similar functions and targeted insertion of control flow to resolve small differences. We have implemented our purely software based and platform-independent technique in the LLVM compiler frame work and evaluated it against the SPEC CPU2006 benchmarks and three target platforms: Intel x86, ARM based Qualcomm Krait(TM), and Qualcomm Hexagon(TM) DSP. We demonstrate that code size for SPEC CPU2006 can be reduced by more than 550KB on x86. This corresponds to an overall code size reduction of 4\%, and up to 11.5\% for individual programs. Overhead introduced by additional control flow is compensated for by better I-cache performance of the compacted programs. We also show that identifying suitable candidates and subsequent merging of functions can be implemented efficiently.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Roy:2014:AAS, author = "Pooja Roy and Rajarshi Ray and Chundong Wang and Weng Fai Wong", title = "{ASAC}: automatic sensitivity analysis for approximate computing", journal = j-SIGPLAN, volume = "49", number = "5", pages = "95--104", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597812", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The approximation based programming paradigm is especially attractive for developing error-resilient applications, targeting low power embedded devices. It allows for program data to be computed and stored approximately for better energy efficiency. The duration of battery in the smartphones, tablets, etc. is generally more of a concern to users than an application's accuracy or fidelity beyond certain acceptable quality of service. Therefore, relaxing accuracy to improve energy efficiency is an attractive trade-off when permissible by the application's domain. Recent works suggest source code annotations and type qualifiers to facilitate safe approximate computation and data manipulation. It requires rewriting of programs or the availability of source codes for annotations. This may not be feasible as real-world applications tend to be large, with source code that is not readily available. In this paper, we propose a novel sensitivity analysis that automatically generates annotations for programs for the purpose of approximate computing. Our framework, ASAC, extracts information about the sensitivity of the output with respect to program data. We show that the program output is sensitive to only a subset of program data that we deem critical, and hence must be precise. The rest of the data can be computed and stored approximately.We evaluated our analysis on a range of applications, and achieved a 86\% accuracy compared to manual annotations by programmers. We validated our analysis by showing that the applications are within the acceptable QoS threshold if we approximate the non-critical data.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Chaudhary:2014:ESC, author = "Sandeep Chaudhary and Sebastian Fischmeister and Lin Tan", title = "{em-SPADE}: a compiler extension for checking rules extracted from processor specifications", journal = j-SIGPLAN, volume = "49", number = "5", pages = "105--114", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597823", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traditional compilers ignore processor specifications, thousands of pages of which are available for modern processors. To bridge this gap, em-SPADE analyzes processor specifications and creates processor-specific rules to reduce low-level programming errors. This work shows the potential of automatically analyzing processor- and other hardware specifications to detect low-level programming errors at compile time. em-SPADE is a compiler extension to automatically detect software bugs in low-level programs. From processor specifications, a preprocessor extracts target-specific rules such as register use and read-only or reserved registers. A special LLVM pass then uses these rules to detect incorrect register assignments. Our experiments with em-SPADE have correctly extracted 652 rules from 15 specifications and consequently found 20 bugs in ten software projects. The work is generalizable to other types of specifications and shows the clear prospects of using hardware specifications to enhance compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Beaugnon:2014:VVO, author = "Ulysse Beaugnon and Alexey Kravets and Sven van Haastregt and Riyadh Baghdadi and David Tweed and Javed Absar and Anton Lokhmotov", title = "{VOBLA}: a vehicle for optimized basic linear algebra", journal = j-SIGPLAN, volume = "49", number = "5", pages = "115--124", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597818", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present VOBLA, a domain-specific language designed for programming linear algebra libraries. VOBLA is compiled to PENCIL, a domain independent intermediate language designed for efficient mapping to accelerator architectures such as GPGPUs. PENCIL is compiled to efficient, platform-specific OpenCL code using techniques based on the polyhedral model. This approach addresses both the programmer productivity and performance portability concerns associated with accelerator programming. We demonstrate our approach by using VOBLA to implement a BLAS library. We have evaluated the performance of OpenCL code generated using our compilation flow on ARM Mali, AMD Radeon, and AMD Opteron platforms. The generated code is currently on average 1.9x slower than highly hand-optimized OpenCL code, but on average 8.1x faster than straightforward OpenCL code. Given that the VOBLA coding takes significantly less effort compared to hand-optimizing OpenCL code, we believe our approach leads to improved productivity and performance portability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Bebelis:2014:FSP, author = "Vagelis Bebelis and Pascal Fradet and Alain Girault", title = "A framework to schedule parametric dataflow applications on many-core platforms", journal = j-SIGPLAN, volume = "49", number = "5", pages = "125--134", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597819", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dataflow models, such as SDF, have been effectively used to program streaming applications while ensuring their liveness and boundedness. Yet, industrials are struggling to design the next generation of high definition video applications using these models. Such applications demand new features such as parameters to express dynamic input/output rate and topology modifications. Their implementation on modern many-core platforms is a major challenge. We tackle these problems by proposing a generic and flexible framework to schedule streaming applications designed in a parametric dataflow model of computation. We generate parallel as soon as possible (ASAP) schedules targeted to the new STHORM many-core platform of STMicroelectronics. Furthermore, these schedules can be customized using user-defined ordering and resource constraints. The parametric dataflow graph is associated with generic or user-defined specific constraints aimed at minimizing timing, buffer sizes, power consumption, or other criteria. The scheduling algorithm executes with minimal overhead and can be adapted to different scheduling policies just by adding some constraints. The safety of both the dataflow graph and constraints can be checked statically and all schedules are guaranteed to be bounded and deadlock free. We illustrate the scheduling capabilities of our approach using a real world application: the VC-1 video decoder for high definition video streaming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Lee:2014:IPL, author = "Jinyong Lee and Jongwon Lee and Jongeun Lee and Yunheung Paek", title = "Improving performance of loops on {DIAM-based} {VLIW} architectures", journal = j-SIGPLAN, volume = "49", number = "5", pages = "135--144", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597825", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent studies show that very long instruction word (VLIW) architectures, which inherently have wide datapath (e.g. 128 or 256 bits for one VLIW instruction word), can benefit from dynamic implied addressing mode (DIAM) and can achieve lower power consumption and smaller code size with a small performance overhead. Such overhead, which is claimed to be small, is mainly caused by the execution of additionally generated special instructions for conveying information that cannot be encoded in reduced instruction bit-width. In this paper, however, we show that the performance impact of applying DIAM on VLIW architecture cannot be overlooked expecially when applications possess high level of instruction level parallelism (ILP), which is mostly the case for loops because of the result of aggressive code scheduling. We also propose a way to relieve the performance degradation especially focusing on loops since loops spend almost 90\% of total execution time in programs and tend to have high ILP. We first implement the original DIAM compilation technique in a compiler, and augment it with the proposed loop optimization scheme to show that ours can clearly alleviate the performance loss caused by the excessive number of additional instructions, with the help of slightly modified hardware. Moreover, the well-known loop unrolling scheme, which would produce denser code in loops at the cost of substantial code size bloating, is integrated into our compiler. The experiment result shows that the loop unrolling technique, combined with our augmented DIAM scheme, produces far better code in terms of performance with quite an acceptable amount of code increase.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Wingbermuehle:2014:SMS, author = "Joseph G. Wingbermuehle and Ron K. Cytron and Roger D. Chamberlain", title = "Superoptimization of memory subsystems", journal = j-SIGPLAN, volume = "49", number = "5", pages = "145--154", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597816", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The disparity in performance between processors and main memories has led computer architects to incorporate large cache hierarchies in modern computers. Because these cache hierarchies are designed to be general-purpose, they may not provide the best possible performance for a given application. In this paper, we determine a memory subsystem well suited for a given application and main memory by discovering a memory subsystem comprised of caches,scratchpads, and other components that are combined to provide better performance. We draw motivation from the superoptimization of instruction sequences, which successfully finds unusually clever instruction sequences for programs. Targeting both ASIC and FPGA devices, we show that it is possible to discover unusual memory subsystems that provide performance improvements over a typical memory subsystem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Kim:2014:LBL, author = "Hongjune Kim and Seonmyeong Bak and Jaejin Lee", title = "Lightweight and block-level concurrent sweeping for {JavaScript} garbage collection", journal = j-SIGPLAN, volume = "49", number = "5", pages = "155--164", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597824", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript is a dynamic-typed language originally developed for the purpose of giving dynamic client-side behaviors to web pages. It is mainly used in web application development and because of its popularity and rapid development style it is now also used in other types of applications. Increasing data processing requirements and growing usage in more resource-limited environments, such as mobile devices, has given demands for JavaScript implementations to handle memory more efficiently through garbage collection. Since aggressive use of time consuming operations in garbage collection can slow down the JavaScript application, there is a trade-off relationship between the effectiveness and the execution time of garbage collection. In this paper, we present a lightweight, block-level concurrent sweeping mechanism for a mark-and-sweep garbage collector. The sweeping process is detached to an additional thread to eagerly collect free memory blocks and recycle it. To minimize the overhead that comes from the synchronization between the mutator thread and the new sweeping thread, we have chosen a course grained block-level collecting scheme for sweeping. To avoid contention that comes from object destruction, we execute the object destruction phase concurrently with the foreground marking phase. We have implemented our algorithm in JavaScript Core (JSC) engine embedded in the WebKit browser that uses a variant of mark-and-sweep algorithm to manage JavaScript objects. The original garbage collection implementation performs lazy sweeping that cannot reuse the free blocks. We evaluate our implementation on an ARM-based mobile system and show that memory utilization of the system is significantly improved without performance degradation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '14 conference proceedings.", } @Article{Padua:2014:WEI, author = "David Padua", title = "What exactly is inexact computation good for?", journal = j-SIGPLAN, volume = "49", number = "6", pages = "1--1", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2604001", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Our willingness to deliberately trade accuracy of computing systems for significant resource savings, notably energy consumption, got a boost from two directions. First, energy (or power, the more popularly used measure) consumption started emerging as a serious hurdle to our ability to continue scaling the complexity of processors, and thus enable ever richer computing applications. This ``energy hurdle'' spanned the gamut from large data-centers to portable embedded computing systems. Second, many believed that an engine of growth that supported scaling, captured by Gordon Moore's remarkable prophecy (Moore's law), was headed towards an irrevocable cliff edge --- when this happens, our ability to produce computing systems whose hardware would support precise or exact computing would diminish greatly. In this talk which emphasizes the physical and hardware layers of abstraction where all of these troubles start (after all energy is rooted in thermodynamics), I will first review reasons that compelled and encouraged us to consider trading accuracy for energy savings deliberately resulting in inexact computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Kuper:2014:TPE, author = "Lindsey Kuper and Aaron Todd and Sam Tobin-Hochstadt and Ryan R. Newton", title = "Taming the parallel effect zoo: extensible deterministic parallelism with {LVish}", journal = j-SIGPLAN, volume = "49", number = "6", pages = "2--14", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594312", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A fundamental challenge of parallel programming is to ensure that the observable outcome of a program remains deterministic in spite of parallel execution. Language-level enforcement of determinism is possible, but existing deterministic-by-construction parallel programming models tend to lack features that would make them applicable to a broad range of problems. Moreover, they lack extensibility: it is difficult to add or change language features without breaking the determinism guarantee. The recently proposed LVars programming model, and the accompanying LVish Haskell library, took a step toward broadly-applicable guaranteed-deterministic parallel programming. The LVars model allows communication through shared monotonic data structures to which information can only be added, never removed, and for which the order in which information is added is not observable. LVish provides a Par monad for parallel computation that encapsulates determinism-preserving effects while allowing a more flexible form of communication between parallel tasks than previous guaranteed-deterministic models provided. While applying LVar-based programming to real problems using LVish, we have identified and implemented three capabilities that extend its reach: inflationary updates other than least-upper-bound writes; transitive task cancellation; and parallel mutation of non-overlapping memory locations. The unifying abstraction we use to add these capabilities to LVish---without suffering added complexity or cost in the core LVish implementation, or compromising determinism---is a form of monad transformer, extended to handle the Par monad. With our extensions, LVish provides the most broadly applicable guaranteed-deterministic parallel programming interface available to date. We demonstrate the viability of our approach both with traditional parallel benchmarks and with results from a real-world case study: a bioinformatics application that we parallelized using our extended version of LVish.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Surendran:2014:TDR, author = "Rishi Surendran and Raghavan Raman and Swarat Chaudhuri and John Mellor-Crummey and Vivek Sarkar", title = "Test-driven repair of data races in structured parallel programs", journal = j-SIGPLAN, volume = "49", number = "6", pages = "15--25", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594335", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A common workflow for developing parallel software is as follows: (1) start with a sequential program, (2) identify subcomputations that should be converted to parallel tasks, (3) insert synchronization to achieve the same semantics as the sequential program, and repeat steps (2) and (3) as needed to improve performance. Though this is not the only approach to developing parallel software, it is sufficiently common to warrant special attention as parallel programming becomes ubiquitous. This paper focuses on automating step (3), which is usually the hardest step for developers who lack expertise in parallel programming. Past solutions to the problem of repairing parallel programs have used static-only or dynamic-only approaches, both of which incur significant limitations in practice. Static approaches can guarantee soundness in many cases but are limited in precision when analyzing medium or large-scale software with accesses to pointer-based data structures in multiple procedures. Dynamic approaches are more precise, but their proposed repairs are limited to a single input and are not reflected back in the original source program. In this paper, we introduce a hybrid static+dynamic test-driven approach to repairing data races in structured parallel programs. Our approach includes a novel coupling between static and dynamic analyses. First, we execute the program on a concrete test input and determine the set of data races for this input dynamically. Next, we compute a set of ``finish'' placements that prevent these races and also respects the static scoping rules of the program while maximizing parallelism. Empirical results on standard benchmarks and student homework submissions from a parallel computing course establish the effectiveness of our approach with respect to compile-time overhead, precision, and performance of the repaired code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Stork:2014:APB, author = "Sven Stork and Karl Naden and Joshua Sunshine and Manual Mohr and Alcides Fonseca and Paulo Marques and Jonathan Aldrich", title = "{{\AE}minium}: a permission based concurrent-by-default programming language approach", journal = j-SIGPLAN, volume = "49", number = "6", pages = "26--26", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594344", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The aim of {\AE}MINIUM is to study the implications of having a concurrent-by-default programming language. This includes language design, runtime system, performance and software engineering considerations. We conduct our study through the design of the concurrent-by-default {\AE}MINIUM programming language. {\AE}MINIUM leverages the permission flow of object and group permissions through the program to validate the program's correctness and to automatically infer a possible parallelization strategy via a dataflow graph. {\AE}MINIUM supports not only fork-join parallelism but more general dataflow patterns of parallelism. In this paper we present a formal system, called \mu {\AE}MINIUM, modeling the core concepts of {\AE}MINIUM. \mu {\AE}MINIUM's static type system is based on Featherweight Java with {\AE}MINIUM-specific extensions. Besides checking for correctness {\AE}MINIUM's type system it also uses the permission flow to compute a potential parallel execution strategy for the program. \mu {\AE}MINIUM's dynamic semantics use a concurrent-by-default evaluation approach. Along with the formal system we present its soundness proof. We provide a full description of the implementation along with the description of various optimization techniques we used. We implemented {\AE}MINIUM as an extension of the Plaid programming language, which has first-class support for permissions built-in. The {\AE}MINIUM implementation and all case studies are publicly available under the General Public License. We use various case studies to evaluate {\AE}MINIUM's applicability and to demonstrate that {\AE}MINIUM parallelized code has performance improvements compared to its sequential counterpart. We chose to use case studies from common domains or problems that are known to benefit from parallelization, to show that {\AE}MINIUM is powerful enough to encode them. We demonstrate through a webserver application, which evaluates {\AE}MINIUM's impact on latency-bound applications, that {\AE}MINIUM can achieve a 70\% performance improvement over the sequential counterpart. In another case study we chose to implement a dictionary function to evaluate {\AE}MINIUM's capabilities to express essential data structures. Our evaluation demonstrates that {\AE}MINIUM can be used to express parallelism in such data-structures and that the performance benefits scale with the amount of annotation effort which is put into the implementation. We chose an integral computationally example to evaluate pure functional programming and computational intensive use cases. Our experiments show that {\AE}MINIUM is capable of extracting parallelism from functional code and achieving performance improvements up to the limits of Plaid's inherent performance bounds. Overall, we hope that the work helps to advance concurrent programming in modern programming environments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Jagannathan:2014:ARV, author = "Suresh Jagannathan and Vincent Laporte and Gustavo Petri and David Pichardie and Jan Vitek", title = "Atomicity refinement for verified compilation", journal = j-SIGPLAN, volume = "49", number = "6", pages = "27--27", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594346", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the verified compilation of high-level managed languages like Java or C\# whose intermediate representations provide support for shared-memory synchronization and automatic memory management. In this environment, the interactions between application threads and the language runtime (e.g., the garbage collector) are regulated by compiler-injected code snippets. Example of snippets include allocation fast paths among others. In our TOPLAS paper we propose a refinement-based proof methodology that precisely relates concurrent code expressed at different abstraction levels, cognizant throughout of the relaxed memory semantics of the underlying processor. Our technique allows the compiler writer to reason compositionally about the atomicity of low-level concurrent code used to implement managed services. We illustrate our approach with examples taken from the verification of a concurrent garbage collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Biswas:2014:DES, author = "Swarnendu Biswas and Jipeng Huang and Aritra Sengupta and Michael D. Bond", title = "{DoubleChecker}: efficient sound and precise atomicity checking", journal = j-SIGPLAN, volume = "49", number = "6", pages = "28--39", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594323", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Atomicity is a key correctness property that allows programmers to reason about code regions in isolation. However, programs often fail to enforce atomicity correctly, leading to atomicity violations that are difficult to detect. Dynamic program analysis can detect atomicity violations based on an atomicity specification, but existing approaches slow programs substantially. This paper presents DoubleChecker, a novel sound and precise atomicity checker whose key insight lies in its use of two new cooperating dynamic analyses. Its imprecise analysis tracks cross-thread dependences soundly but imprecisely with significantly better performance than a fully precise analysis. Its precise analysis is more expensive but only needs to process a subset of the execution identified as potentially involved in atomicity violations by the imprecise analysis. If DoubleChecker operates in single-run mode, the two analyses execute in the same program run, which guarantees soundness and precision but requires logging program accesses to pass from the imprecise to the precise analysis. In multi-run mode, the first program run executes only the imprecise analysis, and a second run executes both analyses. Multi-run mode trades accuracy for performance; each run of multi-run mode outperforms single-run mode, but can potentially miss violations. We have implemented DoubleChecker and an existing state-of-the-art atomicity checker called Velodrome in a high-performance Java virtual machine. DoubleChecker's single-run mode significantly outperforms Velodrome, while still providing full soundness and precision. DoubleChecker's multi-run mode improves performance further, without significantly impacting soundness in practice. These results suggest that DoubleChecker's approach is a promising direction for improving the performance of dynamic atomicity checking over prior work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Alglave:2014:HCM, author = "Jade Alglave and Luc Maranget and Michael Tautschnig", title = "Herding cats: modelling, simulation, testing, and data-mining for weak memory", journal = j-SIGPLAN, volume = "49", number = "6", pages = "40--40", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594347", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There is a joke where a physicist and a mathematician are asked to herd cats. The physicist starts with an infinitely large pen which he reduces until it is of reasonable diameter yet contains all the cats. The mathematician builds a fence around himself and declares the outside to be the inside. Defining memory models is akin to herding cats: both the physicist's or mathematician's attitudes are tempting, but neither can go without the other.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Rompf:2014:SPJ, author = "Tiark Rompf and Arvind K. Sujeeth and Kevin J. Brown and HyoukJoong Lee and Hassan Chafi and Kunle Olukotun", title = "Surgical precision {JIT} compilers", journal = j-SIGPLAN, volume = "49", number = "6", pages = "41--52", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594316", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Just-in-time (JIT) compilation of running programs provides more optimization opportunities than offline compilation. Modern JIT compilers, such as those in virtual machines like Oracle's HotSpot for Java or Google's V8 for JavaScript, rely on dynamic profiling as their key mechanism to guide optimizations. While these JIT compilers offer good average performance, their behavior is a black box and the achieved performance is highly unpredictable. In this paper, we propose to turn JIT compilation into a precision tool by adding two essential and generic metaprogramming facilities: First, allow programs to invoke JIT compilation explicitly. This enables controlled specialization of arbitrary code at run-time, in the style of partial evaluation. It also enables the JIT compiler to report warnings and errors to the program when it is unable to compile a code path in the demanded way. Second, allow the JIT compiler to call back into the program to perform compile-time computation. This lets the program itself define the translation strategy for certain constructs on the fly and gives rise to a powerful JIT macro facility that enables ``smart'' libraries to supply domain-specific compiler optimizations or safety checks. We present Lancet, a JIT compiler framework for Java bytecode that enables such a tight, two-way integration with the running program. Lancet itself was derived from a high-level Java bytecode interpreter: staging the interpreter using LMS (Lightweight Modular Staging) produced a simple bytecode compiler. Adding abstract interpretation turned the simple compiler into an optimizing compiler. This fact provides compelling evidence for the scalability of the staged-interpreter approach to compiler construction. In the case of Lancet, JIT macros also provide a natural interface to existing LMS-based toolchains such as the Delite parallelism and DSL framework, which can now serve as accelerator macros for arbitrary JVM bytecode.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Schkufza:2014:SOF, author = "Eric Schkufza and Rahul Sharma and Alex Aiken", title = "Stochastic optimization of floating-point programs with tunable precision", journal = j-SIGPLAN, volume = "49", number = "6", pages = "53--64", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594302", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The aggressive optimization of floating-point computations is an important problem in high-performance computing. Unfortunately, floating-point instruction sets have complicated semantics that often force compilers to preserve programs as written. We present a method that treats floating-point optimization as a stochastic search problem. We demonstrate the ability to generate reduced precision implementations of Intel's handwritten C numeric library which are up to 6 times faster than the original code, and achieve end-to-end speedups of over 30\% on a direct numeric simulation and a ray tracer by optimizing kernels that can tolerate a loss of precision while still remaining correct. Because these optimizations are mostly not amenable to formal verification using the current state of the art, we present a stochastic search technique for characterizing maximum error. The technique comes with an asymptotic guarantee and provides strong evidence of correctness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Stock:2014:FED, author = "Kevin Stock and Martin Kong and Tobias Grosser and Louis-No{\"e}l Pouchet and Fabrice Rastello and J. Ramanujam and P. Sadayappan", title = "A framework for enhancing data reuse via associative reordering", journal = j-SIGPLAN, volume = "49", number = "6", pages = "65--76", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594342", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The freedom to reorder computations involving associative operators has been widely recognized and exploited in designing parallel algorithms and to a more limited extent in optimizing compilers. In this paper, we develop a novel framework utilizing the associativity and commutativity of operations in regular loop computations to enhance register reuse. Stencils represent a particular class of important computations where the optimization framework can be applied to enhance performance. We show how stencil operations can be implemented to better exploit register reuse and reduce load/stores. We develop a multi-dimensional retiming formalism to characterize the space of valid implementations in conjunction with other program transformations. Experimental results demonstrate the effectiveness of the framework on a collection of high-order stencils.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{DeVito:2014:FCR, author = "Zachary DeVito and Daniel Ritchie and Matt Fisher and Alex Aiken and Pat Hanrahan", title = "First-class runtime generation of high-performance types using exotypes", journal = j-SIGPLAN, volume = "49", number = "6", pages = "77--88", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594307", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce exotypes, user-defined types that combine the flexibility of meta-object protocols in dynamically-typed languages with the performance control of low-level languages. Like objects in dynamic languages, exotypes are defined programmatically at run-time, allowing behavior based on external data such as a database schema. To achieve high performance, we use staged programming to define the behavior of an exotype during a runtime compilation step and implement exotypes in Terra, a low-level staged programming language. We show how exotype constructors compose, and use exotypes to implement high-performance libraries for serialization, dynamic assembly, automatic differentiation, and probabilistic programming. Each exotype achieves expressiveness similar to libraries written in dynamically-typed languages but implements optimizations that exceed the performance of existing libraries written in low-level statically-typed languages. Though each implementation is significantly shorter, our serialization library is 11 times faster than Kryo, and our dynamic assembler is 3--20 times faster than Google's Chrome assembler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Greenman:2014:GFB, author = "Ben Greenman and Fabian Muehlboeck and Ross Tate", title = "Getting {F}-bounded polymorphism into shape", journal = j-SIGPLAN, volume = "49", number = "6", pages = "89--99", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594308", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a way to restrict recursive inheritance without sacrificing the benefits of F-bounded polymorphism. In particular, we distinguish two new concepts, materials and shapes, and demonstrate through a survey of 13.5 million lines of open-source generic-Java code that these two concepts never actually overlap in practice. With this Material-Shape Separation, we prove that even na{\"\i}ve type-checking algorithms are sound and complete, some of which address problems that were unsolvable even under the existing proposals for restricting inheritance. We illustrate how the simplicity of our design reflects the design intuitions employed by programmers and potentially enables new features coming into demand for upcoming programming languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Simon:2014:OIF, author = "Axel Simon", title = "Optimal inference of fields in row-polymorphic records", journal = j-SIGPLAN, volume = "49", number = "6", pages = "100--111", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594313", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Flexible records are a powerful concept in type systems that form the basis of, for instance, objects in dynamically typed languages. One caveat of using flexible records is that a program may try to access a record field that does not exist. We present a type inference algorithm that checks for these runtime errors. The novelty of our algorithm is that it satisfies a clear notion of completeness: The inferred types are optimal in the sense that type annotations cannot increase the set of typeable programs. Under certain assumptions, our algorithm guarantees the following stronger property: it rejects a program if and only if it contains a path from an empty record to a field access on which the field has not been added. We derive this optimal algorithm by abstracting a semantics to types. The derived inference rules use a novel combination of type terms and Boolean functions that retains the simplicity of unification-based type inference but adds the ability of Boolean functions to express implications, thereby addressing the challenge of combining implications and types. By following our derivation method, we show how various operations such as record concatenation and branching if a field exists lead to Boolean satisfiability problems of different complexity. Analogously, we show that more expressive type systems give rise to SMT problems. On the practical side, we present an implementation of the select and update operations and give practical evidence that these are sufficient in real-world applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Sampson:2014:EVP, author = "Adrian Sampson and Pavel Panchekha and Todd Mytkowicz and Kathryn S. McKinley and Dan Grossman and Luis Ceze", title = "Expressing and verifying probabilistic assertions", journal = j-SIGPLAN, volume = "49", number = "6", pages = "112--122", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594294", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traditional assertions express correctness properties that must hold on every program execution. However, many applications have probabilistic outcomes and consequently their correctness properties are also probabilistic (e.g., they identify faces in images, consume sensor data, or run on unreliable hardware). Traditional assertions do not capture these correctness properties. This paper proposes that programmers express probabilistic correctness properties with probabilistic assertions and describes a new probabilistic evaluation approach to efficiently verify these assertions. Probabilistic assertions are Boolean expressions that express the probability that a property will be true in a given execution rather than asserting that the property must always be true. Given either specific inputs or distributions on the input space, probabilistic evaluation verifies probabilistic assertions by first performing distribution extraction to represent the program as a Bayesian network. Probabilistic evaluation then uses statistical properties to simplify this representation to efficiently compute assertion probabilities directly or with sampling. Our approach is a mix of both static and dynamic analysis: distribution extraction statically builds and optimizes the Bayesian network representation and sampling dynamically interprets this representation. We implement our approach in a tool called Mayhap for C and C++ programs. We evaluate expressiveness, correctness, and performance of Mayhap on programs that use sensors, perform approximate computation, and obfuscate data for privacy. Our case studies demonstrate that probabilistic assertions describe useful correctness properties and that Mayhap efficiently verifies them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Borges:2014:CSS, author = "Mateus Borges and Antonio Filieri and Marcelo d'Amorim and Corina S. Pasareanu and Willem Visser", title = "Compositional solution space quantification for probabilistic software analysis", journal = j-SIGPLAN, volume = "49", number = "6", pages = "123--132", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594329", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Probabilistic software analysis aims at quantifying how likely a target event is to occur during program execution. Current approaches rely on symbolic execution to identify the conditions to reach the target event and try to quantify the fraction of the input domain satisfying these conditions. Precise quantification is usually limited to linear constraints, while only approximate solutions can be provided in general through statistical approaches. However, statistical approaches may fail to converge to an acceptable accuracy within a reasonable time. We present a compositional statistical approach for the efficient quantification of solution spaces for arbitrarily complex constraints over bounded floating-point domains. The approach leverages interval constraint propagation to improve the accuracy of the estimation by focusing the sampling on the regions of the input domain containing the sought solutions. Preliminary experiments show significant improvement on previous approaches both in results accuracy and analysis time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Hur:2014:SPP, author = "Chung-Kil Hur and Aditya V. Nori and Sriram K. Rajamani and Selva Samuel", title = "Slicing probabilistic programs", journal = j-SIGPLAN, volume = "49", number = "6", pages = "133--144", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594303", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Probabilistic programs use familiar notation of programming languages to specify probabilistic models. Suppose we are interested in estimating the distribution of the return expression r of a probabilistic program P. We are interested in slicing the probabilistic program P and obtaining a simpler program Sli( P ) which retains only those parts of P that are relevant to estimating r, and elides those parts of P that are not relevant to estimating r. We desire that the Sli transformation be both correct and efficient. By correct, we mean that P and Sli( P ) have identical estimates on r. By efficient, we mean that estimation over Sli( P ) be as fast as possible. We show that the usual notion of program slicing, which traverses control and data dependencies backward from the return expression r, is unsatisfactory for probabilistic programs, since it produces incorrect slices on some programs and sub-optimal ones on others. Our key insight is that in addition to the usual notions of control dependence and data dependence that are used to slice non-probabilistic programs, a new kind of dependence called observe dependence arises naturally due to observe statements in probabilistic programs. We propose a new definition of Sli( P ) which is both correct and efficient for probabilistic programs, by including observe dependence in addition to control and data dependences for computing slices. We prove correctness mathematically, and we demonstrate efficiency empirically. We show that by applying the Sli transformation as a pre-pass, we can improve the efficiency of probabilistic inference, not only in our own inference tool R2, but also in other systems for performing inference such as Church and Infer.NET.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Cai:2014:TCH, author = "Yufei Cai and Paolo G. Giarrusso and Tillmann Rendel and Klaus Ostermann", title = "A theory of changes for higher-order languages: incrementalizing $ \lambda $-calculi by static differentiation", journal = j-SIGPLAN, volume = "49", number = "6", pages = "145--155", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594304", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "If the result of an expensive computation is invalidated by a small change to the input, the old result should be updated incrementally instead of reexecuting the whole computation. We incrementalize programs through their derivative. A derivative maps changes in the program's input directly to changes in the program's output, without reexecuting the original program. We present a program transformation taking programs to their derivatives, which is fully static and automatic, supports first-class functions, and produces derivatives amenable to standard optimization. We prove the program transformation correct in Agda for a family of simply-typed $ \lambda $-calculi, parameterized by base types and primitives. A precise interface specifies what is required to incrementalize the chosen primitives. We investigate performance by a case study: We implement in Scala the program transformation, a plugin and improve performance of a nontrivial program by orders of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Hammer:2014:ACD, author = "Matthew A. Hammer and Khoo Yit Phang and Michael Hicks and Jeffrey S. Foster", title = "{Adapton}: composable, demand-driven incremental computation", journal = j-SIGPLAN, volume = "49", number = "6", pages = "156--166", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594324", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many researchers have proposed programming languages that support incremental computation (IC), which allows programs to be efficiently re-executed after a small change to the input. However, existing implementations of such languages have two important drawbacks. First, recomputation is oblivious to specific demands on the program output; that is, if a program input changes, all dependencies will be recomputed, even if an observer no longer requires certain outputs. Second, programs are made incremental as a unit, with little or no support for reusing results outside of their original context, e.g., when reordered. To address these problems, we present $ \lambda_{ic}^{cdd} $, a core calculus that applies a demand-driven semantics to incremental computation, tracking changes in a hierarchical fashion in a novel demanded computation graph. $ \lambda_{ic}^{cdd} $ also formalizes an explicit separation between inner, incremental computations and outer observers. This combination ensures $ \lambda_{ic}^{cdd} $ programs only recompute computations as demanded by observers, and allows inner computations to be reused more liberally. We present Adapton, an OCaml library implementing $ \lambda_{ic}^{cdd} $. We evaluated Adapton on a range of benchmarks, and found that it provides reliable speedups, and in many cases dramatically outperforms state-of-the-art IC approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Aung:2014:SS, author = "Min Aung and Susan Horwitz and Rich Joiner and Thomas Reps", title = "Specialization slicing", journal = j-SIGPLAN, volume = "49", number = "6", pages = "167--167", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594345", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we investigate opportunities to be gained from broadening the definition of program slicing. A major inspiration for our work comes from the field of partial evaluation, in which a wide repertoire of techniques have been developed for specializing programs. While slicing can also be harnessed for specializing programs, the kind of specialization obtainable via slicing has heretofore been quite restricted, compared to the kind of specialization allowed in partial evaluation. In particular, most slicing algorithms are what the partial-evaluation community calls monovariant: each program element of the original program generates at most one element in the answer. In contrast, partial-evaluation algorithms can be polyvariant, i.e., one program element in the original program may correspond to more than one element in the specialized program. The full paper appears in ACM TOPLAS 36 (2), 2014.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Hoare:2014:LCP, author = "Tony Hoare", title = "Laws of concurrent programming", journal = j-SIGPLAN, volume = "49", number = "6", pages = "168--168", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2604002", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The talk extends the Laws of Programming [1] by four laws governing concurrent composition of programs. This operator is associative and commutative and distributive through union; and it has the same unit (do nothing) as sequential composition. Furthermore, sequential and concurrent composition distribute through each other, in accordance with an exchange law; this permits an implementation of concurrency by partial interleaving.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Sridharan:2014:AEP, author = "Srinath Sridharan and Gagan Gupta and Gurindar S. Sohi", title = "Adaptive, efficient, parallel execution of parallel programs", journal = j-SIGPLAN, volume = "49", number = "6", pages = "169--180", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594292", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Future multicore processors will be heterogeneous, be increasingly less reliable, and operate in dynamically changing operating conditions. Such environments will result in a constantly varying pool of hardware resources which can greatly complicate the task of efficiently exposing a program's parallelism onto these resources. Coupled with this uncertainty is the diverse set of efficiency metrics that users may desire. This paper proposes Varuna, a system that dynamically, continuously, rapidly and transparently adapts a program's parallelism to best match the instantaneous capabilities of the hardware resources while satisfying different efficiency metrics. Varuna is applicable to both multithreaded and task-based programs and can be seamlessly inserted between the program and the operating system without needing to change the source code of either. We demonstrate Varuna's effectiveness in diverse execution environments using unaltered C/C++ parallel programs from various benchmark suites. Regardless of the execution environment, Varuna always outperformed the state-of-the-art approaches for the efficiency metrics considered.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Gupta:2014:GPR, author = "Gagan Gupta and Srinath Sridharan and Gurindar S. Sohi", title = "Globally precise-restartable execution of parallel programs", journal = j-SIGPLAN, volume = "49", number = "6", pages = "181--192", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594306", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging trends in computer design and use are likely to make exceptions, once rare, the norm, especially as the system size grows. Due to exceptions, arising from hardware faults, approximate computing, dynamic resource management, etc., successful and error-free execution of programs may no longer be assured. Yet, designers will want to tolerate the exceptions so that the programs execute completely, efficiently and without external intervention. Modern computers easily handle exceptions in sequential programs, using precise interrupts. But they are ill-equipped to handle exceptions in parallel programs, which are growing in prevalence. In this work we introduce the notion of globally precise-restartable execution of parallel programs, analogous to precise-interruptible execution of sequential programs. We present a software runtime recovery system based on the approach to handle exceptions in suitably-written parallel programs. Qualitative and quantitative analyses show that the proposed system scales with the system size, especially when exceptions are frequent, unlike the conventional checkpoint-and-recovery method.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Mitra:2014:AAP, author = "Subrata Mitra and Ignacio Laguna and Dong H. Ahn and Saurabh Bagchi and Martin Schulz and Todd Gamblin", title = "Accurate application progress analysis for large-scale parallel debugging", journal = j-SIGPLAN, volume = "49", number = "6", pages = "193--203", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594336", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging large-scale parallel applications is challenging. In most HPC applications, parallel tasks progress in a coordinated fashion, and thus a fault in one task can quickly propagate to other tasks, making it difficult to debug. Finding the least-progressed tasks can significantly reduce the effort to identify the task where the fault originated. However, existing approaches for detecting them suffer low accuracy and large overheads; either they use imprecise static analysis or are unable to infer progress dependence inside loops. We present a loop-aware progress-dependence analysis tool, Prodometer, which determines relative progress among parallel tasks via dynamic analysis. Our fault-injection experiments suggest that its accuracy and precision are over 90\% for most cases and that it scales well up to 16,384 MPI tasks. Further, our case study shows that it significantly helped diagnosing a perplexing error in MPI, which only manifested at large scale.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Tavarageri:2014:CAD, author = "Sanket Tavarageri and Sriram Krishnamoorthy and P. Sadayappan", title = "Compiler-assisted detection of transient memory errors", journal = j-SIGPLAN, volume = "49", number = "6", pages = "204--215", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594298", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The probability of bit flips in hardware memory systems is projected to increase significantly as memory systems continue to scale in size and complexity. Effective hardware-based error detection and correction require that the complete data path, involving all parts of the memory system, be protected with sufficient redundancy. First, this may be costly to employ on commodity computing platforms, and second, even on high-end systems, protection against multi-bit errors may be lacking. Therefore, augmenting hardware error detection schemes with software techniques is of considerable interest. In this paper, we consider software-level mechanisms to comprehensively detect transient memory faults. We develop novel compile-time algorithms to instrument application programs with checksum computation codes to detect memory errors. Unlike prior approaches that employ checksums on computational and architectural states, our scheme verifies every data access and works by tracking variables as they are produced and consumed. Experimental evaluation demonstrates that the proposed comprehensive error detection solution is viable as a completely software-only scheme. We also demonstrate that with limited hardware support, overheads of error detection can be further reduced.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Le:2014:CVE, author = "Vu Le and Mehrdad Afshari and Zhendong Su", title = "Compiler validation via equivalence modulo inputs", journal = j-SIGPLAN, volume = "49", number = "6", pages = "216--226", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594334", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce equivalence modulo inputs (EMI), a simple, widely applicable methodology for validating optimizing compilers. Our key insight is to exploit the close interplay between (1) dynamically executing a program on some test inputs and (2) statically compiling the program to work on all possible inputs. Indeed, the test inputs induce a natural collection of the original program's EMI variants, which can help differentially test any compiler and specifically target the difficult-to-find miscompilations. To create a practical implementation of EMI for validating C compilers, we profile a program's test executions and stochastically prune its unexecuted code. Our extensive testing in eleven months has led to 147 confirmed, unique bug reports for GCC and LLVM alone. The majority of those bugs are miscompilations, and more than 100 have already been fixed. Beyond testing compilers, EMI can be adapted to validate program transformation and analysis systems in general. This work opens up this exciting, new direction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Long:2014:ARE, author = "Fan Long and Stelios Sidiroglou-Douskos and Martin Rinard", title = "Automatic runtime error repair and containment via recovery shepherding", journal = j-SIGPLAN, volume = "49", number = "6", pages = "227--238", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594337", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a system, RCV, for enabling software applications to survive divide-by-zero and null-dereference errors. RCV operates directly on off-the-shelf, production, stripped x86 binary executables. RCV implements recovery shepherding, which attaches to the application process when an error occurs, repairs the execution, tracks the repair effects as the execution continues, contains the repair effects within the application process, and detaches from the process after all repair effects are flushed from the process state. RCV therefore incurs negligible overhead during the normal execution of the application. We evaluate RCV on all divide-by-zero and null-dereference errors available in the CVE database [2] from January 2011 to March 2013 that (1) provide publicly-available inputs that trigger the error which (2) we were able to use to trigger the reported error in our experimental environment. We collected a total of 18 errors in seven real world applications, Wireshark, the FreeType library, Claws Mail, LibreOffice, GIMP, the PHP interpreter, and Chromium. For 17 of the 18 errors, RCV enables the application to continue to execute to provide acceptable output and service to its users on the error-triggering inputs. For 13 of the 18 errors, the continued RCV execution eventually flushes all of the repair effects and RCV detaches to restore the application to full clean functionality. We perform a manual analysis of the a source code relevant to our benchmark errors, which indicates that for 11 of the 18 errors the RCV and later patched versions produce identical or equivalent results on all inputs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Zhang:2014:ARP, author = "Xin Zhang and Ravi Mangal and Radu Grigore and Mayur Naik and Hongseok Yang", title = "On abstraction refinement for program analyses in {Datalog}", journal = j-SIGPLAN, volume = "49", number = "6", pages = "239--248", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594327", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A central task for a program analysis concerns how to efficiently find a program abstraction that keeps only information relevant for proving properties of interest. We present a new approach for finding such abstractions for program analyses written in Datalog. Our approach is based on counterexample-guided abstraction refinement: when a Datalog analysis run fails using an abstraction, it seeks to generalize the cause of the failure to other abstractions, and pick a new abstraction that avoids a similar failure. Our solution uses a boolean satisfiability formulation that is general, complete, and optimal: it is independent of the Datalog solver, it generalizes the failure of an abstraction to as many other abstractions as possible, and it identifies the cheapest refined abstraction to try next. We show the performance of our approach on a pointer analysis and a typestate analysis, on eight real-world Java benchmark programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Zhang:2014:HTB, author = "Xin Zhang and Ravi Mangal and Mayur Naik and Hongseok Yang", title = "Hybrid top-down and bottom-up interprocedural analysis", journal = j-SIGPLAN, volume = "49", number = "6", pages = "249--258", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594328", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interprocedural static analyses are broadly classified into top-down and bottom-up, depending upon how they compute, instantiate, and reuse procedure summaries. Both kinds of analyses are challenging to scale: top-down analyses are hindered by ineffective reuse of summaries whereas bottom-up analyses are hindered by inefficient computation and instantiation of summaries. This paper presents a hybrid approach Swift that combines top-down and bottom-up analyses in a manner that gains their benefits without suffering their drawbacks. Swift is general in that it is parametrized by the top-down and bottom-up analyses it combines. We show an instantiation of Swift on a type-state analysis and evaluate it on a suite of 12 Java programs of size 60-250 KLOC each. Swift outperforms both conventional approaches, finishing on all the programs while both of those approaches fail on the larger programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Arzt:2014:FPC, author = "Steven Arzt and Siegfried Rasthofer and Christian Fritz and Eric Bodden and Alexandre Bartel and Jacques Klein and Yves {Le Traon} and Damien Octeau and Patrick McDaniel", title = "{FlowDroid}: precise context, flow, field, object-sensitive and lifecycle-aware taint analysis for {Android} apps", journal = j-SIGPLAN, volume = "49", number = "6", pages = "259--269", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594299", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's smartphones are a ubiquitous source of private and confidential data. At the same time, smartphone users are plagued by carelessly programmed apps that leak important data by accident, and by malicious apps that exploit their given privileges to copy such data intentionally. While existing static taint-analysis approaches have the potential of detecting such data leaks ahead of time, all approaches for Android use a number of coarse-grain approximations that can yield high numbers of missed leaks and false alarms. In this work we thus present FlowDroid, a novel and highly precise static taint analysis for Android applications. A precise model of Android's lifecycle allows the analysis to properly handle callbacks invoked by the Android framework, while context, flow, field and object-sensitivity allows the analysis to reduce the number of false alarms. Novel on-demand algorithms help FlowDroid maintain high efficiency and precision at the same time. We also propose DroidBench, an open test suite for evaluating the effectiveness and accuracy of taint-analysis tools specifically for Android apps. As we show through a set of experiments using SecuriBench Micro, DroidBench, and a set of well-known Android test applications, FlowDroid finds a very high fraction of data leaks while keeping the rate of false positives low. On DroidBench, FlowDroid achieves 93\% recall and 86\% precision, greatly outperforming the commercial tools IBM AppScan Source and Fortify SCA. FlowDroid successfully finds leaks in a subset of 500 apps from Google Play and about 1,000 malware apps from the VirusShare project.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Carbonneaux:2014:EEV, author = "Quentin Carbonneaux and Jan Hoffmann and Tahina Ramananandro and Zhong Shao", title = "End-to-end verification of stack-space bounds for {C} programs", journal = j-SIGPLAN, volume = "49", number = "6", pages = "270--281", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594301", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Verified compilers guarantee the preservation of semantic properties and thus enable formal verification of programs at the source level. However, important quantitative properties such as memory and time usage still have to be verified at the machine level where interactive proofs tend to be more tedious and automation is more challenging. This article describes a framework that enables the formal verification of stack-space bounds of compiled machine code at the C level. It consists of a verified CompCert-based compiler that preserves quantitative properties, a verified quantitative program logic for interactive stack-bound development, and a verified stack analyzer that automatically derives stack bounds during compilation. The framework is based on event traces that record function calls and returns. The source language is CompCert Clight and the target language is x86 assembly. The compiler is implemented in the Coq Proof Assistant and it is proved that crucial properties of event traces are preserved during compilation. A novel quantitative Hoare logic is developed to verify stack-space bounds at the CompCert Clight level. The quantitative logic is implemented in Coq and proved sound with respect to event traces generated by the small-step semantics of CompCert Clight. Stack-space bounds can be proved at the source level without taking into account low-level details that depend on the implementation of the compiler. The compiler fills in these low-level details during compilation and generates a concrete stack-space bound that applies to the produced machine code. The verified stack analyzer is guaranteed to automatically derive bounds for code with non-recursive functions. It generates a derivation in the quantitative logic to ensure soundness as well as interoperability with interactively developed stack bounds. In an experimental evaluation, the developed framework is used to obtain verified stack-space bounds for micro benchmarks as well as real system code. The examples include the verified operating-system kernel CertiKOS, parts of the MiBench embedded benchmark suite, and programs from the CompCert benchmarks. The derived bounds are close to the measured stack-space usage of executions of the compiled programs on a Linux x86 system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Ball:2014:VTV, author = "Thomas Ball and Nikolaj Bj{\o}rner and Aaron Gember and Shachar Itzhaky and Aleksandr Karbyshev and Mooly Sagiv and Michael Schapira and Asaf Valadarsky", title = "{VeriCon}: towards verifying controller programs in software-defined networks", journal = j-SIGPLAN, volume = "49", number = "6", pages = "282--293", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594317", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-defined networking (SDN) is a new paradigm for operating and managing computer networks. SDN enables logically-centralized control over network devices through a ``controller'' software that operates independently from the network hardware, and can be viewed as the network operating system. Network operators can run both inhouse and third-party SDN programs (often called applications) on top of the controller, e.g., to specify routing and access control policies. SDN opens up the possibility of applying formal methods to prove the correctness of computer networks. Indeed, recently much effort has been invested in applying finite state model checking to check that SDN programs behave correctly. However, in general, scaling these methods to large networks is challenging and, moreover, they cannot guarantee the absence of errors. We present VeriCon, the first system for verifying that an SDN program is correct on all admissible topologies and for all possible (infinite) sequences of network events. VeriCon either confirms the correctness of the controller program on all admissible network topologies or outputs a concrete counterexample. VeriCon uses first-order logic to specify admissible network topologies and desired network-wide invariants, and then implements classical Floyd-Hoare-Dijkstra deductive verification using Z3. Our preliminary experience indicates that VeriCon is able to rapidly verify correctness, or identify bugs, for a large repertoire of simple core SDN programs. VeriCon is compositional, in the sense that it verifies the correctness of execution of any single network event w.r.t. the specified invariant, and can thus scale to handle large programs. To relieve the burden of specifying inductive invariants from the programmer, VeriCon includes a separate procedure for inferring invariants, which is shown to be effective on simple controller programs. We view VeriCon as a first step en route to practical mechanisms for verifying network-wide invariants of SDN programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Logozzo:2014:VMV, author = "Francesco Logozzo and Shuvendu K. Lahiri and Manuel F{\"a}hndrich and Sam Blackshear", title = "Verification modulo versions: towards usable verification", journal = j-SIGPLAN, volume = "49", number = "6", pages = "294--304", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594326", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce Verification Modulo Versions (VMV), a new static analysis technique for reducing the number of alarms reported by static verifiers while providing sound semantic guarantees. First, VMV extracts semantic environment conditions from a base program P. Environmental conditions can either be sufficient conditions (implying the safety of P) or necessary conditions (implied by the safety of P). Then, VMV instruments a new version of the program, P', with the inferred conditions. We prove that we can use (i) sufficient conditions to identify abstract regressions of P' w.r.t. P; and (ii) necessary conditions to prove the relative correctness of P' w.r.t. P. We show that the extraction of environmental conditions can be performed at a hierarchy of abstraction levels (history, state, or call conditions) with each subsequent level requiring a less sophisticated matching of the syntactic changes between P' and P. Call conditions are particularly useful because they only require the syntactic matching of entry points and callee names across program versions. We have implemented VMV in a widely used static analysis and verification tool. We report our experience on two large code bases and demonstrate a substantial reduction in alarms while additionally providing relative correctness guarantees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Dimitrov:2014:CRD, author = "Dimitar Dimitrov and Veselin Raychev and Martin Vechev and Eric Koskinen", title = "Commutativity race detection", journal = j-SIGPLAN, volume = "49", number = "6", pages = "305--315", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594322", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces the concept of a commutativity race. A commutativity race occurs in a given execution when two library method invocations can happen concurrently yet they do not commute. Commutativity races are an elegant concept enabling reasoning about concurrent interaction at the library interface. We present a dynamic commutativity race detector. Our technique is based on a novel combination of vector clocks and a structural representation automatically obtained from a commutativity specification. Conceptually, our work can be seen as generalizing classical read-write race detection. We also present a new logical fragment for specifying commutativity conditions. This fragment is expressive, yet guarantees a constant number of comparisons per method invocation rather than linear with unrestricted specifications. We implemented our analyzer and evaluated it on real-world applications. Experimental results indicate that our analysis is practical: it discovered harmful commutativity races with overhead comparable to state-of-the-art, low-level race detectors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Maiya:2014:RDA, author = "Pallavi Maiya and Aditya Kanade and Rupak Majumdar", title = "Race detection for {Android} applications", journal = j-SIGPLAN, volume = "49", number = "6", pages = "316--325", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594311", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming environments for smartphones expose a concurrency model that combines multi-threading and asynchronous event-based dispatch. While this enables the development of efficient and feature-rich applications, unforeseen thread interleavings coupled with non-deterministic reorderings of asynchronous tasks can lead to subtle concurrency errors in the applications. In this paper, we formalize the concurrency semantics of the Android programming model. We further define the happens-before relation for Android applications, and develop a dynamic race detection technique based on this relation. Our relation generalizes the so far independently studied happens-before relations for multi-threaded programs and single-threaded event-driven programs. Additionally, our race detection technique uses a model of the Android runtime environment to reduce false positives. We have implemented a tool called DroidRacer. It generates execution traces by systematically testing Android applications and detects data races by computing the happens-before relation on the traces. We analyzed 15 Android applications including popular applications such as Facebook, Twitter and K-9 Mail. Our results indicate that data races are prevalent in Android applications, and that DroidRacer is an effective tool to identify data races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Hsiao:2014:RDE, author = "Chun-Hung Hsiao and Jie Yu and Satish Narayanasamy and Ziyun Kong and Cristiano L. Pereira and Gilles A. Pokam and Peter M. Chen and Jason Flinn", title = "Race detection for event-driven mobile applications", journal = j-SIGPLAN, volume = "49", number = "6", pages = "326--336", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594330", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mobile systems commonly support an event-based model of concurrent programming. This model, used in popular platforms such as Android, naturally supports mobile devices that have a rich array of sensors and user input modalities. Unfortunately, most existing tools for detecting concurrency errors of parallel programs focus on a thread-based model of concurrency. If one applies such tools directly to an event-based program, they work poorly because they infer false dependencies between unrelated events handled sequentially by the same thread. In this paper we present a race detection tool named CAFA for event-driven mobile systems. CAFA uses the causality model that we have developed for the Android event-driven system. A novel contribution of our model is that it accounts for the causal order due to the event queues, which are not accounted for in past data race detectors. Detecting races based on low-level races between memory accesses leads to a large number of false positives. CAFA overcomes this problem by checking for races between high-level operations. We discuss our experience in using CAFA for finding and understanding a number of known and unknown harmful races in open-source Android applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Huang:2014:MSP, author = "Jeff Huang and Patrick O'Neil Meredith and Grigore Rosu", title = "Maximal sound predictive race detection with control flow abstraction", journal = j-SIGPLAN, volume = "49", number = "6", pages = "337--348", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594315", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the numerous static and dynamic program analysis techniques in the literature, data races remain one of the most common bugs in modern concurrent software. Further, the techniques that do exist either have limited detection capability or are unsound, meaning that they report false positives. We present a sound race detection technique that achieves a provably higher detection capability than existing sound techniques. A key insight of our technique is the inclusion of abstracted control flow information into the execution model, which increases the space of the causal model permitted by classical happens-before or causally-precedes based detectors. By encoding the control flow and a minimal set of feasibility constraints as a group of first-order logic formulae, we formulate race detection as a constraint solving problem. Moreover, we formally prove that our formulation achieves the maximal possible detection capability for any sound dynamic race detector with respect to the same input trace under the sequential consistency memory model. We demonstrate via extensive experimentation that our technique detects more races than the other state-of-the-art sound race detection techniques, and that it is scalable to executions of real world concurrent applications with tens of millions of critical events. These experiments also revealed several previously unknown races in real systems (e.g., Eclipse) that have been confirmed or fixed by the developers. Our tool is also adopted by Eclipse developers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{David:2014:TBC, author = "Yaniv David and Eran Yahav", title = "Tracelet-based code search in executables", journal = j-SIGPLAN, volume = "49", number = "6", pages = "349--360", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594343", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address the problem of code search in executables. Given a function in binary form and a large code base, our goal is to statically find similar functions in the code base. Towards this end, we present a novel technique for computing similarity between functions. Our notion of similarity is based on decomposition of functions into tracelets: continuous, short, partial traces of an execution. To establish tracelet similarity in the face of low-level compiler transformations, we employ a simple rewriting engine. This engine uses constraint solving over alignment constraints and data dependencies to match registers and memory addresses between tracelets, bridging the gap between tracelets that are otherwise similar. We have implemented our approach and applied it to find matches in over a million binary functions. We compare tracelet matching to approaches based on n-grams and graphlets and show that tracelet matching obtains dramatically better precision and recall.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Pombrio:2014:RLE, author = "Justin Pombrio and Shriram Krishnamurthi", title = "Resugaring: lifting evaluation sequences through syntactic sugar", journal = j-SIGPLAN, volume = "49", number = "6", pages = "361--371", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594319", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Syntactic sugar is pervasive in language technology. It is used to shrink the size of a core language; to define domain-specific languages; and even to let programmers extend their language. Unfortunately, syntactic sugar is eliminated by transformation, so the resulting programs become unfamiliar to authors. Thus, it comes at a price: it obscures the relationship between the user's source program and the program being evaluated. We address this problem by showing how to compute reduction steps in terms of the surface syntax. Each step in the surface language emulates one or more steps in the core language. The computed steps hide the transformation, thus maintaining the abstraction provided by the surface language. We make these statements about emulation and abstraction precise, prove that they hold in our formalism, and verify part of the system in Coq. We have implemented this work and applied it to three very different languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{vonHanxleden:2014:SSC, author = "Reinhard von Hanxleden and Bj{\"o}rn Duderstadt and Christian Motika and Steven Smyth and Michael Mendler and Joaqu{\'\i}n Aguado and Stephen Mercer and Owen O'Brien", title = "{SCCharts}: sequentially constructive statecharts for safety-critical applications: {HW\slash SW}-synthesis for a conservative extension of synchronous statecharts", journal = j-SIGPLAN, volume = "49", number = "6", pages = "372--383", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594310", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new visual language, SCCharts, designed for specifying safety-critical reactive systems. SCCharts use a statechart notation and provide determinate concurrency based on a synchronous model of computation (MoC), without restrictions common to previous synchronous MoCs. Specifically, we lift earlier limitations on sequential accesses to shared variables, by leveraging the sequentially constructive MoC. The semantics and key features of SCCharts are defined by a very small set of elements, the Core SCCharts, consisting of state machines plus fork/join concurrency. We also present a compilation chain that allows efficient synthesis of software and hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{DAntoni:2014:FTB, author = "Loris D'Antoni and Margus Veanes and Benjamin Livshits and David Molnar", title = "{Fast}: a transducer-based language for tree manipulation", journal = j-SIGPLAN, volume = "49", number = "6", pages = "384--394", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594309", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tree automata and tree transducers are used in a wide range of applications in software engineering, from XML processing to language type-checking. While these formalisms are of immense practical use, they can only model finite alphabets, and since many real-world applications operate over infinite domains such as integers, this is often a limitation. To overcome this problem we augment tree automata and transducers with symbolic alphabets represented as parametric theories. Admitting infinite alphabets makes these models more general and succinct than their classical counterparts. Despite this, we show how the main operations, such as composition and language equivalence, remain computable given a decision procedure for the alphabet theory. We introduce a high-level language called Fast that acts as a front-end for the above formalisms. Fast supports symbolic alphabets through tight integration with state-of-the-art satisfiability modulo theory (SMT) solvers. We demonstrate our techniques on practical case studies, covering a wide range of applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Misra:2014:PPC, author = "Jayadev Misra", title = "A personal perspective on concurrency", journal = j-SIGPLAN, volume = "49", number = "6", pages = "395--395", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2604003", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This talk will describe a view of concurrency, the author's own, as it has evolved since the late 1970s. Early notions of concurrency were intimately tied with physical hardware and speeding up of computations, which proved to be an impediment to the development of a logical theory of concurrency. In collaboration with K. Mani Chandy, the author developed a theory called UNITY that combined a programming notation with a verification logic to describe a large class of fundamental concurrent algorithms arising in operating systems, communication protocols and distributed systems. Several model checkers, including Murphi, developed by David Dill, are based on UNITY. A limitation of UNITY was a lack of adequate structuring mechanism. While this was not a major problem in low-level applications, the current wide-spread use of concurrency requires theories that go beyond managing infrastructure to the level of massive applications. Our current research, a programming model called Orc, introduces mechanisms to organize the communication, synchronization and coordination in programs that run on wide-area networks. Orc includes constructs to orchestrate the concurrent invocation of services to achieve a goal --- while managing time-outs, priorities, and failure of sites or communication.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Phothilimthana:2014:CSA, author = "Phitchaya Mangpo Phothilimthana and Tikhon Jelvis and Rohin Shah and Nishant Totla and Sarah Chasins and Rastislav Bodik", title = "{Chlorophyll}: synthesis-aided compiler for low-power spatial architectures", journal = j-SIGPLAN, volume = "49", number = "6", pages = "396--407", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594339", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We developed Chlorophyll, a synthesis-aided programming model and compiler for the GreenArrays GA144, an extremely minimalist low-power spatial architecture that requires partitioning the program into fragments of no more than 256 instructions and 64 words of data. This processor is 100-times more energy efficient than its competitors, but currently can only be programmed using a low-level stack-based language. The Chlorophyll programming model allows programmers to provide human insight by specifying partial partitioning of data and computation. The Chlorophyll compiler relies on synthesis, sidestepping the need to develop classical optimizations, which may be challenging given the unusual architecture. To scale synthesis to real problems, we decompose the compilation into smaller synthesis subproblems---partitioning, layout, and code generation. We show that the synthesized programs are no more than 65\% slower than highly optimized expert-written programs and are faster than programs produced by a heuristic, non-synthesizing version of our compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Perelman:2014:TDS, author = "Daniel Perelman and Sumit Gulwani and Dan Grossman and Peter Provost", title = "Test-driven synthesis", journal = j-SIGPLAN, volume = "49", number = "6", pages = "408--418", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594297", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming-by-example technologies empower end-users to create simple programs merely by providing input/output examples. Existing systems are designed around solvers specialized for a specific set of data types or domain-specific language (DSL). We present a program synthesizer which can be parameterized by an arbitrary DSL that may contain conditionals and loops and therefore is able to synthesize programs in any domain. In order to use our synthesizer, the user provides a sequence of increasingly sophisticated input/output examples along with an expert-written DSL definition. These two inputs correspond to the two key ideas that allow our synthesizer to work in arbitrary domains. First, we developed a novel iterative synthesis technique inspired by test-driven development---which also gives our technique the name of test-driven synthesis ---where the input/output examples are consumed one at a time as the program is refined. Second, the DSL allows our system to take an efficient component-based approach to enumerating possible programs. We present applications of our synthesis methodology to end-user programming for transformations over strings, XML, and table layouts. We compare our synthesizer on these applications to state-of-the-art DSL-specific synthesizers as well to the general purpose synthesizer Sketch.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Raychev:2014:CCS, author = "Veselin Raychev and Martin Vechev and Eran Yahav", title = "Code completion with statistical language models", journal = j-SIGPLAN, volume = "49", number = "6", pages = "419--428", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594321", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address the problem of synthesizing code completions for programs using APIs. Given a program with holes, we synthesize completions for holes with the most likely sequences of method calls. Our main idea is to reduce the problem of code completion to a natural-language processing problem of predicting probabilities of sentences. We design a simple and scalable static analysis that extracts sequences of method calls from a large codebase, and index these into a statistical language model. We then employ the language model to find the highest ranked sentences, and use them to synthesize a code completion. Our approach is able to synthesize sequences of calls across multiple objects together with their arguments. Experiments show that our approach is fast and effective. Virtually all computed completions typecheck, and the desired completion appears in the top 3 results in 90\% of the cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Greenaway:2014:DSS, author = "David Greenaway and Japheth Lim and June Andronick and Gerwin Klein", title = "Don't sweat the small stuff: formal verification of {C} code without the pain", journal = j-SIGPLAN, volume = "49", number = "6", pages = "429--439", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594296", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an approach for automatically generating provably correct abstractions from C source code that are useful for practical implementation verification. The abstractions are easier for a human verification engineer to reason about than the implementation and increase the productivity of interactive code proof. We guarantee soundness by automatically generating proofs that the abstractions are correct. In particular, we show two key abstractions that are critical for verifying systems-level C code: automatically turning potentially overflowing machine-word arithmetic into ideal integers, and transforming low-level C pointer reasoning into separate abstract heaps. Previous work carrying out such transformations has either done so using unverified translations, or required significant proof engineering effort. We implement these abstractions in an existing proof-producing specification transformation framework named AutoCorres, developed in Isabelle/HOL, and demonstrate its effectiveness in a number of case studies. We show scalability on multiple OS microkernels, and we show how our changes to AutoCorres improve productivity for total correctness by porting an existing high-level verification of the Schorr--Waite algorithm to a low-level C implementation with minimal effort.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Pek:2014:NPD, author = "Edgar Pek and Xiaokang Qiu and P. Madhusudan", title = "Natural proofs for data structure manipulation in {C} using separation logic", journal = j-SIGPLAN, volume = "49", number = "6", pages = "440--451", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594325", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The natural proof technique for heap verification developed by Qiu et al. [32] provides a platform for powerful sound reasoning for specifications written in a dialect of separation logic called Dryad. Natural proofs are proof tactics that enable automated reasoning exploiting recursion, mimicking common patterns found in human proofs. However, these proofs are known to work only for a simple toy language [32]. In this work, we develop a framework called VCDryad that extends the Vcc framework [9] to provide an automated deductive framework against separation logic specifications for C programs based on natural proofs. We develop several new techniques to build this framework, including (a) a novel tool architecture that allows encoding natural proofs at a higher level in order to use the existing Vcc framework (including its intricate memory model, the underlying type-checker, and the SMT-based verification infrastructure), and (b) a synthesis of ghost-code annotations that captures natural proof tactics, in essence forcing Vcc to find natural proofs using primarily decidable theories. We evaluate our tool extensively, on more than 150 programs, ranging from code manipulating standard data structures, well-known open source library routines (Glib, OpenBSD), Linux kernel routines, customized OS data structures, etc. We show that all these C programs can be fully automatically verified using natural proofs (given pre/post conditions and loop invariants) without any user-provided proof tactics. VCDryad is perhaps the first deductive verification framework for heap-manipulating programs in a real language that can prove such a wide variety of programs automatically.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Ricketts:2014:AFP, author = "Daniel Ricketts and Valentin Robert and Dongseok Jang and Zachary Tatlock and Sorin Lerner", title = "Automating formal proofs for reactive systems", journal = j-SIGPLAN, volume = "49", number = "6", pages = "452--462", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594338", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Implementing systems in proof assistants like Coq and proving their correctness in full formal detail has consistently demonstrated promise for making extremely strong guarantees about critical software, ranging from compilers and operating systems to databases and web browsers. Unfortunately, these verifications demand such heroic manual proof effort, even for a single system, that the approach has not been widely adopted. We demonstrate a technique to eliminate the manual proof burden for verifying many properties within an entire class of applications, in our case reactive systems, while only expending effort comparable to the manual verification of a single system. A crucial insight of our approach is simultaneously designing both (1) a domain-specific language (DSL) for expressing reactive systems and their correctness properties and (2) proof automation which exploits the constrained language of both programs and properties to enable fully automatic, pushbutton verification. We apply this insight in a deeply embedded Coq DSL, dubbed Reflex, and illustrate Reflex's expressiveness by implementing and automatically verifying realistic systems including a modern web browser, an SSH server, and a web server. Using Reflex radically reduced the proof burden: in previous, similar versions of our benchmarks written in Coq by experts, proofs accounted for over 80\% of the code base; our versions require no manual proofs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Xiao:2014:PPI, author = "Xiao Xiao and Qirun Zhang and Jinguo Zhou and Charles Zhang", title = "Persistent pointer information", journal = j-SIGPLAN, volume = "49", number = "6", pages = "463--474", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594314", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Pointer information, indispensable for static analysis tools, is expensive to compute and query. We provide a query-efficient persistence technique, Pestrie, to mitigate the costly computation and slow querying of precise pointer information. Leveraging equivalence and hub properties, Pestrie can compress pointer information and answers pointer related queries very efficiently. The experiment shows that Pestrie produces 10.5X and 17.5X smaller persistent files than the traditional bitmap and BDD encodings. Meanwhile, Pestrie is 2.9X to 123.6X faster than traditional demand-driven approaches for serving points-to related queries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Oh:2014:SCS, author = "Hakjoo Oh and Wonchan Lee and Kihong Heo and Hongseok Yang and Kwangkeun Yi", title = "Selective context-sensitivity guided by impact pre-analysis", journal = j-SIGPLAN, volume = "49", number = "6", pages = "475--484", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594318", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a method for selectively applying context-sensitivity during interprocedural program analysis. Our method applies context-sensitivity only when and where doing so is likely to improve the precision that matters for resolving given queries. The idea is to use a pre-analysis to estimate the impact of context-sensitivity on the main analysis's precision, and to use this information to find out when and where the main analysis should turn on or off its context-sensitivity. We formalize this approach and prove that the analysis always benefits from the pre-analysis-guided context-sensitivity. We implemented this selective method for an existing industrial-strength interval analyzer for full C. The method reduced the number of (false) alarms by 24.4\%, while increasing the analysis cost by 27.8\% on average. The use of the selective method is not limited to context-sensitivity. We demonstrate this generality by following the same principle and developing a selective relational analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Smaragdakis:2014:IAC, author = "Yannis Smaragdakis and George Kastrinis and George Balatsouras", title = "Introspective analysis: context-sensitivity, across the board", journal = j-SIGPLAN, volume = "49", number = "6", pages = "485--495", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594320", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Context-sensitivity is the primary approach for adding more precision to a points-to analysis, while hopefully also maintaining scalability. An oft-reported problem with context-sensitive analyses, however, is that they are bi-modal: either the analysis is precise enough that it manipulates only manageable sets of data, and thus scales impressively well, or the analysis gets quickly derailed at the first sign of imprecision and becomes orders-of-magnitude more expensive than would be expected given the program's size. There is currently no approach that makes precise context-sensitive analyses (of any flavor: call-site-, object-, or type-sensitive) scale across the board at a level comparable to that of a context-insensitive analysis. To address this issue, we propose introspective analysis: a technique for uniformly scaling context-sensitive analysis by eliminating its performance-detrimental behavior, at a small precision expense. Introspective analysis consists of a common adaptivity pattern: first perform a context-insensitive analysis, then use the results to selectively refine (i.e., analyze context-sensitively) program elements that will not cause explosion in the running time or space. The technical challenge is to appropriately identify such program elements. We show that a simple but principled approach can be remarkably effective, achieving scalability (often with dramatic speedup) for benchmarks previously completely out-of-reach for deep context-sensitive analyses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Ahn:2014:IJP, author = "Wonsun Ahn and Jiho Choi and Thomas Shull and Mar{\'\i}a J. Garzar{\'a}n and Josep Torrellas", title = "Improving {JavaScript} performance by deconstructing the type system", journal = j-SIGPLAN, volume = "49", number = "6", pages = "496--507", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594332", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Increased focus on JavaScript performance has resulted in vast performance improvements for many benchmarks. However, for actual code used in websites, the attained improvements often lag far behind those for popular benchmarks. This paper shows that the main reason behind this short-fall is how the compiler understands types. JavaScript has no concept of types, but the compiler assigns types to objects anyway for ease of code generation. We examine the way that the Chrome V8 compiler defines types, and identify two design decisions that are the main reasons for the lack of improvement: (1) the inherited prototype object is part of the current object's type definition, and (2) method bindings are also part of the type definition. These requirements make types very unpredictable, which hinders type specialization by the compiler. Hence, we modify V8 to remove these requirements, and use it to compile the JavaScript code assembled by JSBench from real websites. On average, we reduce the execution time of JSBench by 36\%, and the dynamic instruction count by 49\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Vilk:2014:DBB, author = "John Vilk and Emery D. Berger", title = "{Doppio}: breaking the browser language barrier", journal = j-SIGPLAN, volume = "49", number = "6", pages = "508--518", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594293", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Web browsers have become a de facto universal operating system, and JavaScript its instruction set. Unfortunately, running other languages in the browser is not generally possible. Translation to JavaScript is not enough because browsers are a hostile environment for other languages. Previous approaches are either non-portable or require extensive modifications for programs to work in a browser. This paper presents Doppio, a JavaScript-based runtime system that makes it possible to run unaltered applications written in general-purpose languages directly inside the browser. Doppio provides a wide range of runtime services, including a file system that enables local and external (cloud-based) storage, an unmanaged heap, sockets, blocking I/O, and multiple threads. We demonstrate DOPPIO's usefulness with two case studies: we extend Emscripten with Doppio, letting it run an unmodified C++ application in the browser with full functionality, and present DoppioJVM, an interpreter that runs unmodified JVM programs directly in the browser. While substantially slower than a native JVM (between 24X and 42X slower on CPU-intensive benchmarks in Google Chrome), DoppioJVM makes it feasible to directly reuse existing, non compute-intensive code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Lu:2014:DED, author = "Li Lu and Weixing Ji and Michael L. Scott", title = "Dynamic enforcement of determinism in a parallel scripting language", journal = j-SIGPLAN, volume = "49", number = "6", pages = "519--529", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594300", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Determinism is an appealing property for parallel programs, as it simplifies understanding, reasoning and debugging. It is particularly appealing in dynamic (scripting) languages, where ease of programming is a dominant design goal. Some existing parallel languages use the type system to enforce determinism statically, but this is not generally practical for dynamic languages. In this paper, we describe how determinism can be obtained---and dynamically enforced/verified---for appropriate extensions to a parallel scripting language. Specifically, we introduce the constructs of Deterministic Parallel Ruby (DPR), together with a run-time system (Tardis) that verifies properties required for determinism, including correct usage of reductions and commutative operators, and the mutual independence (data-race freedom) of concurrent tasks. Experimental results confirm that DPR can provide scalable performance on multicore machines and that the overhead of Tardis is low enough for practical testing. In particular, Tardis significantly outperforms alternative data-race detectors with comparable functionality. We conclude with a discussion of future directions in the dynamic enforcement of determinism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Torlak:2014:LSV, author = "Emina Torlak and Rastislav Bodik", title = "A lightweight symbolic virtual machine for solver-aided host languages", journal = j-SIGPLAN, volume = "49", number = "6", pages = "530--541", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594340", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Solver-aided domain-specific languages (SDSLs) are an emerging class of computer-aided programming systems. They ease the construction of programs by using satisfiability solvers to automate tasks such as verification, debugging, synthesis, and non-deterministic execution. But reducing programming tasks to satisfiability problems involves translating programs to logical constraints, which is an engineering challenge even for domain-specific languages. We have previously shown that translation to constraints can be avoided if SDSLs are implemented by (traditional) embedding into a host language that is itself solver-aided. This paper describes how to implement a symbolic virtual machine (SVM) for such a host language. Our symbolic virtual machine is lightweight because it compiles to constraints only a small subset of the host's constructs, while allowing SDSL designers to use the entire language, including constructs for DSL embedding. This lightweight compilation employs a novel symbolic execution technique with two key properties: it produces compact encodings, and it enables concrete evaluation to strip away host constructs that are outside the subset compilable to constraints. Our symbolic virtual machine architecture is at the heart of Rosette, a solver-aided language that is host to several new SDSLs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Le:2014:FFD, author = "Vu Le and Sumit Gulwani", title = "{FlashExtract}: a framework for data extraction by examples", journal = j-SIGPLAN, volume = "49", number = "6", pages = "542--553", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594333", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Various document types that combine model and view (e.g., text files, webpages, spreadsheets) make it easy to organize (possibly hierarchical) data, but make it difficult to extract raw data for any further manipulation or querying. We present a general framework FlashExtract to extract relevant data from semi-structured documents using examples. It includes: (a) an interaction model that allows end-users to give examples to extract various fields and to relate them in a hierarchical organization using structure and sequence constructs. (b) an inductive synthesis algorithm to synthesize the intended program from few examples in any underlying domain-specific language for data extraction that has been built using our specified algebra of few core operators (map, filter, merge, and pair). We describe instantiation of our framework to three different domains: text files, webpages, and spreadsheets. On our benchmark comprising 75 documents, FlashExtract is able to extract intended data using an average of 2.36 examples in 0.84 seconds per field.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Sousa:2014:CQU, author = "Marcelo Sousa and Isil Dillig and Dimitrios Vytiniotis and Thomas Dillig and Christos Gkantsidis", title = "Consolidation of queries with user-defined functions", journal = j-SIGPLAN, volume = "49", number = "6", pages = "554--564", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594305", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Motivated by streaming and data analytics scenarios where many queries operate on the same data and perform similar computations, we propose program consolidation for merging multiple user-defined functions (UDFs) that operate on the same input. Program consolidation exploits common computations between UDFs to generate an equivalent optimized function whose execution cost is often much smaller (and never greater) than the sum of the costs of executing each function individually. We present a sound consolidation calculus and an effective algorithm for consolidating multiple UDFs. Our approach is purely static and uses symbolic SMT-based techniques to identify shared or redundant computations. We have implemented the proposed technique on top of the Naiad data processing system. Our experiments show that our algorithm dramatically improves overall job completion time when executing user-defined filters that operate on the same data and perform similar computations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Luu:2014:MCC, author = "Loi Luu and Shweta Shinde and Prateek Saxena and Brian Demsky", title = "A model counter for constraints over unbounded strings", journal = j-SIGPLAN, volume = "49", number = "6", pages = "565--576", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594331", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Model counting is the problem of determining the number of solutions that satisfy a given set of constraints. Model counting has numerous applications in the quantitative analyses of program execution time, information flow, combinatorial circuit designs as well as probabilistic reasoning. We present a new approach to model counting for structured data types, specifically strings in this work. The key ingredient is a new technique that leverages generating functions as a basic primitive for combinatorial counting. Our tool SMC which embodies this approach can model count for constraints specified in an expressive string language efficiently and precisely, thereby outperforming previous finite-size analysis tools. SMC is expressive enough to model constraints arising in real-world JavaScript applications and UNIX C utilities. We demonstrate the practical feasibility of performing quantitative analyses arising in security applications, such as determining the comparative strengths of password strength meters and determining the information leakage via side channels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Niu:2014:MCF, author = "Ben Niu and Gang Tan", title = "Modular control-flow integrity", journal = j-SIGPLAN, volume = "49", number = "6", pages = "577--587", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594295", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Control-Flow Integrity (CFI) is a software-hardening technique. It inlines checks into a program so that its execution always follows a predetermined Control-Flow Graph (CFG). As a result, CFI is effective at preventing control-flow hijacking attacks. However, past fine-grained CFI implementations do not support separate compilation, which hinders its adoption. We present Modular Control-Flow Integrity (MCFI), a new CFI technique that supports separate compilation. MCFI allows modules to be independently instrumented and linked statically or dynamically. The combined module enforces a CFG that is a combination of the individual modules' CFGs. One challenge in supporting dynamic linking in multithreaded code is how to ensure a safe transition from the old CFG to the new CFG when libraries are dynamically linked. The key technique we use is to have the CFG represented in a runtime data structure and have reads and updates of the data structure wrapped in transactions to ensure thread safety. Our evaluation on SPECCPU2006 benchmarks shows that MCFI supports separate compilation, incurs low overhead of around 5\%, and enhances security.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Yang:2014:DSL, author = "Edward Z. Yang and David Mazi{\`e}res", title = "Dynamic space limits for {Haskell}", journal = j-SIGPLAN, volume = "49", number = "6", pages = "588--598", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594341", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe the semantics and implementation of a space limits system for Haskell, which allows programmers to create resource containers that enforce bounded resident memory usage at runtime. Our system is distinguished by a clear allocator-pays semantics drawn from previous experience with profiling in Haskell and an implementation strategy which uses a block-structured heap to organize containers, allowing us to enforce limits with high accuracy. To deal with the problem of deallocating data in a garbage collected heap, we propose a novel taint-based mechanism that unifies the existing practices of revocable pointers and killing threads in order to reclaim memory. Our system is implemented in GHC, a production-strength compiler for Haskell.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", received = "PLDI '14 conference proceedings.", } @Article{Tsafrir:2014:ELV, author = "Dan Tsafrir", title = "Experiences in the land of virtual abstractions", journal = j-SIGPLAN, volume = "49", number = "7", pages = "1--2", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576215", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "The Microsoft Research Drawbridge Project began with a simple question: Is it possible to achieve the benefits of hardware virtual machines without the overheads? Following that question, we have built a line of exploratory prototypes. These prototypes range from an ARM-based phone that runs x86 Windows binaries to new forms of secure computation. In this talk, I'll briefly describe our various prototypes and the evidence we have accumulated that our first question can be answered in the affirmative.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Hizver:2014:RTD, author = "Jennia Hizver and Tzi-cker Chiueh", title = "Real-time deep virtual machine introspection and its applications", journal = j-SIGPLAN, volume = "49", number = "7", pages = "3--14", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576196", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Virtual Machine Introspection (VMI) provides the ability to monitor virtual machines (VM) in an agentless fashion by gathering VM execution states from the hypervisor and analyzing those states to extract information about a running operating system (OS) without installing an agent inside the VM. VMI's main challenge lies in the difficulty in converting low-level byte string values into high-level semantic states of the monitored VM's OS. In this work, we tackle this challenge by developing a real-time kernel data structure monitoring (RTKDSM) system that leverages the rich OS analysis capabilities of Volatility, an open source computer forensics framework, to significantly simplify and automate analysis of VM execution states. The RTKDSM system is designed as an extensible software framework that is meant to be extended to perform application-specific VM state analysis. In addition, the RTKDSM system is able to perform real-time monitoring of any changes made to the extracted OS states of guest VMs. This real-time monitoring capability is especially important for VMI-based security applications. To minimize the performance overhead associated with real-time kernel data structure monitoring, the RTKDSM system has incorporated several optimizations whose effectiveness is reported in this paper.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Arya:2014:TRG, author = "Kapil Arya and Yury Baskakov and Alex Garthwaite", title = "Tesseract: reconciling guest {I/O} and hypervisor swapping in a {VM}", journal = j-SIGPLAN, volume = "49", number = "7", pages = "15--28", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576198", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Double-paging is an often-cited, if unsubstantiated, problem in multi-level scheduling of memory between virtual machines (VMs) and the hypervisor. This problem occurs when both a virtualized guest and the hypervisor overcommit their respective physical address-spaces. When the guest pages out memory previously swapped out by the hypervisor, it initiates an expensive sequence of steps causing the contents to be read in from the hypervisor swapfile only to be written out again, significantly lengthening the time to complete the guest I/O request. As a result, performance rapidly drops. We present Tesseract, a system that directly and transparently addresses the double-paging problem. Tesseract tracks when guest and hypervisor I/O operations are redundant and modifies these I/Os to create indirections to existing disk blocks containing the page contents. Although our focus is on reconciling I/Os between the guest disks and hypervisor swap, our technique is general and can reconcile, or deduplicate, I/Os for guest pages read or written by the VM. Deduplication of disk blocks for file contents accessed in a common manner is well-understood. One challenge that our approach faces is that the locality of guest I/Os (reflecting the guest's notion of disk layout) often differs from that of the blocks in the hypervisor swap. This loss of locality through indirection results in significant performance loss on subsequent guest reads. We propose two alternatives to recovering this lost locality, each based on the idea of asynchronously reorganizing the indirected blocks in persistent storage. We evaluate our system and show that it can significantly reduce the costs of double-paging. We focus our experiments on a synthetic benchmark designed to highlight its effects. In our experiments we observe Tesseract can improve our benchmark's throughput by as much as 200\% when using traditional disks and by as much as 30\% when using SSD. At the same time worst case application responsiveness can be improved by a factor of 5.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Kim:2014:VAM, author = "Hwanju Kim and Sangwook Kim and Jinkyu Jeong and Joonwon Lee", title = "Virtual asymmetric multiprocessor for interactive performance of consolidated desktops", journal = j-SIGPLAN, volume = "49", number = "7", pages = "29--40", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576199", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "This paper presents virtual asymmetric multiprocessor, a new scheme of virtual desktop scheduling on multi-core processors for user-interactive performance. The proposed scheme enables virtual CPUs to be dynamically performance-asymmetric based on their hosted workloads. To enhance user experience on consolidated desktops, our scheme provides interactive workloads with fast virtual CPUs, which have more computing power than those hosting background workloads in the same virtual machine. To this end, we devise a hypervisor extension that transparently classifies background tasks from potentially interactive workloads. In addition, we introduce a guest extension that manipulates the scheduling policy of an operating system in favor of our hypervisor-level scheme so that interactive performance can be further improved. Our evaluation shows that the proposed scheme significantly improves interactive performance of application launch, Web browsing, and video playback applications when CPU-intensive workloads highly disturb the interactive workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Ben-Yehuda:2014:GMD, author = "Orna Agmon Ben-Yehuda and Eyal Posener and Muli Ben-Yehuda and Assaf Schuster and Ahuva Mu'alem", title = "{Ginseng}: market-driven memory allocation", journal = j-SIGPLAN, volume = "49", number = "7", pages = "41--52", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576197", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Physical memory is the scarcest resource in today's cloud computing platforms. Cloud providers would like to maximize their clients' satisfaction by renting precious physical memory to those clients who value it the most. But real-world cloud clients are selfish: they will only tell their providers the truth about how much they value memory when it is in their own best interest to do so. How can real-world cloud providers allocate memory efficiently to those (selfish) clients who value it the most? We present Ginseng, the first market-driven cloud system that allocates memory efficiently to selfish cloud clients. Ginseng incentivizes selfish clients to bid their true value for the memory they need when they need it. Ginseng continuously collects client bids, finds an efficient memory allocation, and re-allocates physical memory to the clients that value it the most. Ginseng achieves a 6.2$ \times $--15.8x improvement (83\%--100\% of the optimum) in aggregate client satisfaction when compared with state-of-the-art approaches for cloud memory allocation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Hwang:2014:MFG, author = "Jinho Hwang and Ahsen Uppal and Timothy Wood and Howie Huang", title = "{Mortar}: filling the gaps in data center memory", journal = j-SIGPLAN, volume = "49", number = "7", pages = "53--64", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576203", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Data center servers are typically overprovisioned, leaving spare memory and CPU capacity idle to handle unpredictable workload bursts by the virtual machines running on them. While this allows for fast hotspot mitigation, it is also wasteful. Unfortunately, making use of spare capacity without impacting active applications is particularly difficult for memory since it typically must be allocated in coarse chunks over long timescales. In this work we propose repurposing the poorly utilized memory in a data center to store a volatile data store that is managed by the hypervisor. We present two uses for our Mortar framework: as a cache for prefetching disk blocks, and as an application-level distributed cache that follows the memcached protocol. Both prototypes use the framework to ask the hypervisor to store useful, but recoverable data within its free memory pool. This allows the hypervisor to control eviction policies and prioritize access to the cache. We demonstrate the benefits of our prototypes using realistic web applications and disk benchmarks, as well as memory traces gathered from live servers in our university's IT department. By expanding and contracting the data store size based on the free memory available, Mortar improves average response time of a web application by up to 35\% compared to a fixed size memcached deployment, and improves overall video streaming performance by 45\% through prefetching.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Chen:2014:CCB, author = "Licheng Chen and Zhipeng Wei and Zehan Cui and Mingyu Chen and Haiyang Pan and Yungang Bao", title = "{CMD}: classification-based memory deduplication through page access characteristics", journal = j-SIGPLAN, volume = "49", number = "7", pages = "65--76", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576204", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Limited main memory size is considered as one of the major bottlenecks in virtualization environments. Content-Based Page Sharing (CBPS) is an efficient memory deduplication technique to reduce server memory requirements, in which pages with same content are detected and shared into a single copy. As the widely used implementation of CBPS, Kernel Samepage Merging (KSM) maintains the whole memory pages into two global comparison trees (a stable tree and an unstable tree). To detect page sharing opportunities, each tracked page needs to be compared with pages already in these two large global trees. However since the vast majority of compared pages have different content with it, that will induce massive futility comparisons and thus heavy overhead. In this paper, we propose a lightweight page Classification-based Memory Deduplication approach named CMD to reduce futile page comparison overhead meanwhile to detect page sharing opportunities efficiently. The main innovation of CMD is that pages are grouped into different classifications based on page access characteristics. Pages with similar access characteristics are suggested to have higher possibility with same content, thus they are grouped into the same classification. In CMD, the large global comparison trees are divided into multiple small trees with dedicated local ones in each page classification. Page comparisons are performed just in the same classification, and pages from different classifications are never compared (since they probably result in futile comparisons). The experimental results show that CMD can efficiently reduce page comparisons (by about 68.5\%) meanwhile detect nearly the same (by more than 98\%) or even more page sharing opportunities.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Robatmili:2014:MRL, author = "Behnam Robatmili and Calin Cascaval and Mehrdad Reshadi and Madhukar N. Kedlaya and Seth Fowler and Vrajesh Bhavsar and Michael Weber and Ben Hardekopf", title = "{MuscalietJS}: rethinking layered dynamic web runtimes", journal = j-SIGPLAN, volume = "49", number = "7", pages = "77--88", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576211", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Layered JavaScript engines, in which the JavaScript runtime is built on top another managed runtime, provide better extensibility and portability compared to traditional monolithic engines. In this paper, we revisit the design of layered JavaScript engines and propose a layered architecture, called MuscalietJS2, that splits the responsibilities of a JavaScript engine between a high-level, JavaScript-specific component and a low-level, language-agnostic .NET VM. To make up for the performance loss due to layering, we propose a two pronged approach: high-level JavaScript optimizations and exploitation of low-level VM features that produce very efficient code for hot functions. We demonstrate the validity of the MuscalietJS design through a comprehensive evaluation using both the Sunspider benchmarks and a set of web workloads. We demonstrate that our approach outperforms other layered engines such as IronJS and Rhino engines while providing extensibility, adaptability and portability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Kalibera:2014:FAS, author = "Tomas Kalibera and Petr Maj and Floreal Morandat and Jan Vitek", title = "A fast abstract syntax tree interpreter for {R}", journal = j-SIGPLAN, volume = "49", number = "7", pages = "89--102", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576205", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Dynamic languages have been gaining popularity to the point that their performance is starting to matter. The effort required to develop a production-quality, high-performance runtime is, however, staggering and the expertise required to do so is often out of reach of the community maintaining a particular language. Many domain specific languages remain stuck with naive implementations, as they are easy to write and simple to maintain for domain scientists. In this paper, we try to see how far one can push a naive implementation while remaining portable and not requiring expertise in compilers and runtime systems. We choose the R language, a dynamic language used in statistics, as the target of our experiment and adopt the simplest possible implementation strategy, one based on evaluation of abstract syntax trees. We build our interpreter on top of a Java virtual machine and use only facilities available to all Java programmers. We compare our results to other implementations of R.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Kedlaya:2014:DDL, author = "Madhukar N. Kedlaya and Behnam Robatmili and Cglin Cascaval and Ben Hardekopf", title = "Deoptimization for dynamic language {JITs} on typed, stack-based virtual machines", journal = j-SIGPLAN, volume = "49", number = "7", pages = "103--114", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576209", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We are interested in implementing dynamic language runtimes on top of language-level virtual machines. Type specialization is a critical optimization for dynamic language runtimes: generic code that handles any type of data is replaced with specialized code for particular types observed during execution. However, types can change, and the runtime must recover whenever unexpected types are encountered. The state-of-the-art recovery mechanism is called deoptimization. Deoptimization is a well-known technique for dynamic language runtimes implemented in low-level languages like C. However, no dynamic language runtime implemented on top of a virtual machine such as the Common Language Runtime (CLR) or the Java Virtual Machine (JVM) uses deoptimization, because the implementation thereof used in low-level languages is not possible. In this paper we propose a novel technique that enables deoptimization for dynamic language runtimes implemented on top of typed, stack-based virtual machines. Our technique does not require any changes to the underlying virtual machine. We implement our proposed technique in a JavaScript language implementation, MCJS, running on top of the Mono runtime (CLR). We evaluate our implementation against the current state-of-the-art recovery mechanism for virtual machine-based runtimes, as implemented both in MCJS and in IronJS. We show that deoptimization provides significant performance benefits, even for runtimes running on top of a virtual machine.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Vitek:2014:CTR, author = "Jan Vitek", title = "The case for the three {R}'s of systems research: repeatability, reproducibility and rigor", journal = j-SIGPLAN, volume = "49", number = "7", pages = "115--116", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576216", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Computer systems research spans sub-disciplines that include embedded systems, programming languages, networking, and operating systems. In this talk my contention is that a number of structural factors inhibit quality systems research. Symptoms of the problem include unrepeatable and unreproduced results as well as results that are either devoid of meaning or that measure the wrong thing. I will illustrate the impact of these issues on our research output with examples from the development and empirical evaluation of the Schism real-time garbage collection algorithm that is shipped with the FijiVM --- a Java virtual machine for embedded and mobile devices. I will argue that our field should foster: repetition of results, independent reproduction, as well as rigorous evaluation. I will outline some baby steps taken by several computer conferences. In particular I will focus on the introduction of Artifact Evaluation Committees or AECs to ECOOP, OOPLSA, PLDI and soon POPL. The goal of the AECs is to encourage author to package the software artifacts that they used to support the claims made in their paper and to submit these artifacts for evaluation. AECs were carefully designed to provide positive feedback to the authors that take the time to create repeatable research.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Chang:2014:EMV, author = "Chao-Jui Chang and Jan-Jan Wu and Wei-Chung Hsu and Pangfeng Liu and Pen-Chung Yew", title = "Efficient memory virtualization for {Cross-ISA} system mode emulation", journal = j-SIGPLAN, volume = "49", number = "7", pages = "117--128", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576201", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Cross-ISA system-mode emulation has many important applications. For example, Cross-ISA system-mode emulation helps computer architects and OS developers trace and debug kernel execution-flow efficiently by emulating a slower platform (such as ARM) on a more powerful plat-form (such as an x86 machine). Cross-ISA system-mode emulation also enables workload consolidation in data centers with platforms of different instruction-set architectures (ISAs). However, system-mode emulation is much slower. One major overhead in system-mode emulation is the multi-level memory address translation that maps guest virtual address to host physical address. Shadow page tables (SPT) have been used to reduce such overheads, but primarily for same-ISA virtualization. In this paper we propose a novel approach called embedded shadow page tables (ESPT). EPST embeds a shadow page table into the address space of a cross-ISA dynamic binary translation (DBT) and uses hardware memory management unit in the CPU to translate memory addresses, instead of software translation in a current DBT emulator like QEMU. We also use the larger address space on modern 64-bit CPUs to accommodate our DBT emulator so that it will not interfere with the guest operating system. We incorporate our new scheme into QEMU, a popular, retargetable cross-ISA system emulator. SPEC CINT2006 benchmark results indicate that our technique achieves an average speedup of 1.51 times in system mode when emulating ARM on x86, and a 1.59 times speedup for emulating IA32 on x86_64.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Zhang:2014:PSS, author = "Mingwei Zhang and Rui Qiao and Niranjan Hasabnis and R. Sekar", title = "A platform for secure static binary instrumentation", journal = j-SIGPLAN, volume = "49", number = "7", pages = "129--140", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576208", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program instrumentation techniques form the basis of many recent software security defenses, including defenses against common exploits and security policy enforcement. As compared to source-code instrumentation, binary instrumentation is easier to use and more broadly applicable due to the ready availability of binary code. Two key features needed for security instrumentations are (a) it should be applied to all application code, including code contained in various system and application libraries, and (b) it should be non-bypassable. So far, dynamic binary instrumentation (DBI) techniques have provided these features, whereas static binary instrumentation (SBI) techniques have lacked them. These features, combined with ease of use, have made DBI the de facto choice for security instrumentations. However, DBI techniques can incur high overheads in several common usage scenarios, such as application startups, system-calls, and many real-world applications. We therefore develop a new platform for secure static binary instrumentation (PSI) that overcomes these drawbacks of DBI techniques, while retaining the security, robustness and ease-of-use features. We illustrate the versatility of PSI by developing several instrumentation applications: basic block counting, shadow stack defense against control-flow hijack and return-oriented programming attacks, and system call and library policy enforcement. While being competitive with the best DBI tools on CPU-intensive SPEC 2006 benchmark, PSI provides an order of magnitude reduction in overheads on a collection of real-world applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Lyu:2014:DER, author = "Yi-Hong Lyu and Ding-Yong Hong and Tai-Yi Wu and Jan-Jan Wu and Wei-Chung Hsu and Pangfeng Liu and Pen-Chung Yew", title = "{DBILL}: an efficient and retargetable dynamic binary instrumentation framework using {LLVM} backend", journal = j-SIGPLAN, volume = "49", number = "7", pages = "141--152", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576213", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic Binary Instrumentation (DBI) is a core technology for building debugging and profiling tools for application executables. Most state-of-the-art DBI systems have focused on the same instruction set architecture (ISA) where the guest binary and the host binary have the same ISA. It is uncommon to have a cross-ISA DBI system, such as a system that instruments ARM executables to run on x86 machines. We believe cross-ISA DBI systems are increasingly more important, since ARM executables could be more productively analyzed on x86 based machines such as commonly available PCs and servers. In this paper, we present DBILL, a cross-ISA and retargetable dynamic binary instrumentation framework that builds on both QEMU and LLVM. The DBILL framework enables LLVM-based static instrumentation tools to become DBI ready, and deployable to different target architectures. Using address sanitizer and memory sanitizer as implementation examples, we show DBILL is an efficient, versatile and easy to use cross-ISA retargetable DBI framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Zheng:2014:CCM, author = "Jie Zheng and Tze Sing Eugene Ng and Kunwadee Sripanidkulchai and Zhaolei Liu", title = "{COMMA}: coordinating the migration of multi-tier applications", journal = j-SIGPLAN, volume = "49", number = "7", pages = "153--164", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576200", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Multi-tier applications are widely deployed in today's virtualized cloud computing environments. At the same time, management operations in these virtualized environments, such as load balancing, hardware maintenance, workload consolidation, etc., often make use of live virtual machine (VM) migration to control the placement of VMs. Although existing solutions are able to migrate a single VM efficiently, little attention has been devoted to migrating related VMs in multi-tier applications. Ignoring the relatedness of VMs during migration can lead to serious application performance degradation. This paper formulates the multi-tier application migration problem, and presents a new communication-impact-driven coordinated approach, as well as a system called COMMA that realizes this approach. Through extensive testbed experiments, numerical analyses, and a demonstration of COMMA on Amazon EC2, we show that this approach is highly effective in minimizing migration's impact on multi-tier applications' performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Kumar:2014:FBE, author = "Vivek Kumar and Stephen M. Blackburn and David Grove", title = "Friendly barriers: efficient work-stealing with return barriers", journal = j-SIGPLAN, volume = "49", number = "7", pages = "165--176", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576207", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper addresses the problem of efficiently supporting parallelism within a managed runtime. A popular approach for exploiting software parallelism on parallel hardware is task parallelism, where the programmer explicitly identifies potential parallelism and the runtime then schedules the work. Work-stealing is a promising scheduling strategy that a runtime may use to keep otherwise idle hardware busy while relieving overloaded hardware of its burden. However, work-stealing comes with substantial overheads. Recent work identified sequential overheads of work-stealing, those that occur even when no stealing takes place, as a significant source of overhead. That work was able to reduce sequential overheads to just 15\%. In this work, we turn to dynamic overheads, those that occur each time a steal takes place. We show that the dynamic overhead is dominated by introspection of the victim's stack when a steal takes place. We exploit the idea of a low overhead return barrier to reduce the dynamic overhead by approximately half, resulting in total performance improvements of as much as 20\%. Because, unlike prior work, we attack the overheads directly due to stealing and therefore attack the overheads that grow as parallelism grows, we improve the scalability of work-stealing applications. This result is complementary to recent work addressing the sequential overheads of work-stealing. This work therefore substantially relieves work-stealing of the increasing pressure due to increasing intra-node hardware parallelism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Horie:2014:SDJ, author = "Michihiro Horie and Kazunori Ogata and Kiyokuni Kawachiya and Tamiya Onodera", title = "String deduplication for {Java}-based middleware in virtualized environments", journal = j-SIGPLAN, volume = "49", number = "7", pages = "177--188", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576210", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "To increase the memory efficiency in physical servers is a significant concern for increasing the number of virtual machines (VM) in them. When similar web application service runs in each guest VM, many string data with the same values are created in every guest VMs. These duplications of string data are redundant from the viewpoint of memory efficiency in the host OS. This paper proposes two approaches to reduce the duplication in Java string in a single Java VM (JVM) and across JVMs. The first approach is to share string objects cross JVMs by using a read-only memory-mapped file. The other approach is to selectively unify string objects created at runtime in the web applications. This paper evaluates our approach by using the Apache DayTrader and the DaCapo benchmark suite. Our prototype implementation achieved 7\% to 12\% reduction in the total size of the objects allocated over the lifetime of the programs. In addition, we observed the performance of DayTrader was maintained even under a situation of high density guest VMs in a KVM host machine.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Stecklina:2014:SHO, author = "Julian Stecklina", title = "Shrinking the hypervisor one subsystem at a time: a userspace packet switch for virtual machines", journal = j-SIGPLAN, volume = "49", number = "7", pages = "189--200", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576202", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Efficient and secure networking between virtual machines is crucial in a time where a large share of the services on the Internet and in private datacenters run in virtual machines. To achieve this efficiency, virtualization solutions, such as Qemu/KVM, move toward a monolithic system architecture in which all performance critical functionality is implemented directly in the hypervisor in privileged mode. This is an attack surface in the hypervisor that can be used from compromised VMs to take over the virtual machine host and all VMs running on it. We show that it is possible to implement an efficient network switch for virtual machines as an unprivileged userspace component running in the host system including the driver for the upstream network adapter. Our network switch relies on functionality already present in the KVM hypervisor and requires no changes to Linux, the host operating system, and the guest. Our userspace implementation compares favorably to the existing in-kernel implementation with respect to throughput and latency. We reduced per-packet overhead by using a run-to-completion model an are able to outperform the unmodified system for VM-to-VM traffic by a large margin when packet rates are high.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Li:2014:VSK, author = "Ye Li and Richard West and Eric Missimer", title = "A virtualized separation kernel for mixed criticality systems", journal = j-SIGPLAN, volume = "49", number = "7", pages = "201--212", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576206", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Multi- and many-core processors are becoming increasingly popular in embedded systems. Many of these processors now feature hardware virtualization capabilities, such as the ARM Cortex A15, and x86 processors with Intel VT-x or AMD-V support. Hardware virtualization offers opportunities to partition physical resources, including processor cores, memory and I/O devices amongst guest virtual machines. Mixed criticality systems and services can then co-exist on the same platform in separate virtual machines. However, traditional virtual machine systems are too expensive because of the costs of trapping into hypervisors to multiplex and manage machine physical resources on behalf of separate guests. For example, hypervisors are needed to schedule separate VMs on physical processor cores. In this paper, we discuss the design of the Quest-V separation kernel, which partitions services of different criticalities in separate virtual machines, or sandboxes. Each sandbox encapsulates a subset of machine physical resources that it manages without requiring intervention of a hypervisor. Moreover, a hypervisor is not needed for normal operation, except to bootstrap the system and establish communication channels between sandboxes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Johnson:2014:CML, author = "David Johnson and Mike Hibler and Eric Eric", title = "Composable multi-level debugging with {Stackdb}", journal = j-SIGPLAN, volume = "49", number = "7", pages = "213--226", month = jul, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2674025.2576212", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:29:50 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Virtual machine introspection (VMI) allows users to debug software that executes within a virtual machine. To support rich, whole-system analyses, a VMI tool must inspect and control systems at multiple levels of the software stack. Traditional debuggers enable inspection and control, but they limit users to treating a whole system as just one kind of target: e.g., just a kernel, or just a process, but not both. We created Stackdb, a debugging library with VMI support that allows one to monitor and control a whole system through multiple, coordinated targets. A target corresponds to a particular level of the system's software stack; multiple targets allow a user to observe a VM guest at several levels of abstraction simultaneously. For example, with Stackdb, one can observe a PHP script running in a Linux process in a Xen VM via three coordinated targets at the language, process, and kernel levels. Within Stackdb, higher-level targets are components that utilize lower-level targets; a key contribution of Stackdb is its API that supports multi-level and flexible ``stacks'' of targets. This paper describes the challenges we faced in creating Stackdb, presents the solutions we devised, and evaluates Stackdb through its application to a security-focused, whole-system case study.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '14 conference proceedings.", } @Article{Hill:2014:CCA, author = "Mark D. Hill", title = "21st century computer architecture", journal = j-SIGPLAN, volume = "49", number = "8", pages = "1--2", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2558890", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This talk has two parts. The first part will discuss possible directions for computer architecture research, including architecture as infrastructure, energy first, impact of new technologies, and cross-layer opportunities. This part is based on a 2012 Computing Community Consortium (CCC) whitepaper effort led by Hill, as well as other recent National Academy and ISAT studies. See: \url{http://cra.org/ccc/docs/init/21stcenturyarchitecturewhitepaper.pdf}. The second part of the talk will discuss one or more examples of cross-layer research advocated in the first part. For example, our analysis shows that many ``big-memory'' server workloads, such as databases, in-memory caches, and graph analytics, pay a high cost for page-based virtual memory: up to 50\% of execution time wasted. Via small changes to the operating system (Linux) and hardware (x86-64 MMU), this work reduces execution time these workloads waste to less than 0.5\%. The key idea is to map part of a process's linear virtual address space with a new incarnation of segmentation, while providing compatibility by mapping the rest of the virtual address space with paging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Liu:2014:PPF, author = "Tongping Liu and Chen Tian and Ziang Hu and Emery D. Berger", title = "{PREDATOR}: predictive false sharing detection", journal = j-SIGPLAN, volume = "49", number = "8", pages = "3--14", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555244", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "False sharing is a notorious problem for multithreaded applications that can drastically degrade both performance and scalability. Existing approaches can precisely identify the sources of false sharing, but only report false sharing actually observed during execution; they do not generalize across executions. Because false sharing is extremely sensitive to object layout, these detectors can easily miss false sharing problems that can arise due to slight differences in memory allocation order or object placement decisions by the compiler. In addition, they cannot predict the impact of false sharing on hardware with different cache line sizes. This paper presents PREDATOR, a predictive software-based false sharing detector. PREDATOR generalizes from a single execution to precisely predict false sharing that is latent in the current execution. PREDATOR tracks accesses within a range that could lead to false sharing given different object placement. It also tracks accesses within virtual cache lines, contiguous memory ranges that span actual hardware cache lines, to predict sharing on hardware platforms with larger cache line sizes. For each, it reports the exact program location of predicted false sharing problems, ranked by their projected impact on performance. We evaluate PREDATOR across a range of benchmarks and actual applications. PREDATOR identifies problems undetectable with previous tools, including two previously-unknown false sharing problems, with no false positives. PREDATOR is able to immediately locate false sharing problems in MySQL and the Boost library that had eluded detection for years.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Thomson:2014:CTU, author = "Paul Thomson and Alastair F. Donaldson and Adam Betts", title = "Concurrency testing using schedule bounding: an empirical study", journal = j-SIGPLAN, volume = "49", number = "8", pages = "15--28", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555260", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the first independent empirical study on schedule bounding techniques for systematic concurrency testing (SCT). We have gathered 52 buggy concurrent software benchmarks, drawn from public code bases, which we call SCTBench. We applied a modified version of an existing concurrency testing tool to SCTBench to attempt to answer several research questions, including: How effective are the two main schedule bounding techniques, preemption bounding and delay bounding, at bug finding? What challenges are associated with applying SCT to existing code? How effective is schedule bounding compared to a naive random scheduler at finding bugs? Our findings confirm that delay bounding is superior to preemption bounding and that schedule bounding is more effective at finding bugs than unbounded depth-first search. The majority of bugs in SCTBench can be exposed using a small bound (1-3), supporting previous claims, but there is at least one benchmark that requires 5 preemptions. Surprisingly, we found that a naive random scheduler is at least as effective as schedule bounding for finding bugs. We have made SCTBench and our tools publicly available for reproducibility and use in future work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Samak:2014:TDD, author = "Malavika Samak and Murali Krishna Ramanathan", title = "Trace driven dynamic deadlock detection and reproduction", journal = j-SIGPLAN, volume = "49", number = "8", pages = "29--42", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555262", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic analysis techniques have been proposed to detect potential deadlocks. Analyzing and comprehending each potential deadlock to determine whether the deadlock is feasible in a real execution requires significant programmer effort. Moreover, empirical evidence shows that existing analyses are quite imprecise. This imprecision of the analyses further void the manual effort invested in reasoning about non-existent defects. In this paper, we address the problems of imprecision of existing analyses and the subsequent manual effort necessary to reason about deadlocks. We propose a novel approach for deadlock detection by designing a dynamic analysis that intelligently leverages execution traces. To reduce the manual effort, we replay the program by making the execution follow a schedule derived based on the observed trace. For a real deadlock, its feasibility is automatically verified if the replay causes the execution to deadlock. We have implemented our approach as part of WOLF and have analyzed many large (upto 160KLoC) Java programs. Our experimental results show that we are able to identify 74\% of the reported defects as true (or false) positives automatically leaving very few defects for manual analysis. The overhead of our approach is negligible making it a compelling tool for practical adoption.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Chiang:2014:ESI, author = "Wei-Fan Chiang and Ganesh Gopalakrishnan and Zvonimir Rakamaric and Alexey Solovyev", title = "Efficient search for inputs causing high floating-point errors", journal = j-SIGPLAN, volume = "49", number = "8", pages = "43--52", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555265", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tools for floating-point error estimation are fundamental to program understanding and optimization. In this paper, we focus on tools for determining the input settings to a floating point routine that maximizes its result error. Such tools can help support activities such as precision allocation, performance optimization, and auto-tuning. We benchmark current abstraction-based precision analysis methods, and show that they often do not work at scale, or generate highly pessimistic error estimates, often caused by non-linear operators or complex input constraints that define the set of legal inputs. We show that while concrete-testing-based error estimation methods based on maintaining shadow values at higher precision can search out higher error-inducing inputs, suit able heuristic search guidance is key to finding higher errors. We develop a heuristic search algorithm called Binary Guided Random Testing (BGRT). In 45 of the 48 total benchmarks, including many real-world routines, BGRT returns higher guaranteed errors. We also evaluate BGRT against two other heuristic search methods called ILS and PSO, obtaining better results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Tardieu:2014:XAP, author = "Olivier Tardieu and Benjamin Herta and David Cunningham and David Grove and Prabhanjan Kambadur and Vijay Saraswat and Avraham Shinnar and Mikio Takeuchi and Mandana Vaziri", title = "{X10} and {APGAS} at Petascale", journal = j-SIGPLAN, volume = "49", number = "8", pages = "53--66", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555245", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "X10 is a high-performance, high-productivity programming language aimed at large-scale distributed and shared-memory parallel applications. It is based on the Asynchronous Partitioned Global Address Space (APGAS) programming model, supporting the same fine-grained concurrency mechanisms within and across shared-memory nodes. We demonstrate that X10 delivers solid performance at petascale by running (weak scaling) eight application kernels on an IBM Power 775 supercomputer utilizing up to 55,680 Power7 cores (for 1.7 Pflop/s of theoretical peak performance). We detail our advances in distributed termination detection, distributed load balancing, and use of high-performance interconnects that enable X10 to scale out to tens of thousands of cores. For the four HPC Class 2 Challenge benchmarks, X10 achieves 41\% to 87\% of the system's potential at scale (as measured by IBM's HPCC Class 1 optimized runs). We also implement K-Means, Smith-Waterman, Betweenness Centrality, and Unbalanced Tree Search (UTS) for geometric trees. Our UTS implementation is the first to scale to petaflop systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Cunningham:2014:RXE, author = "David Cunningham and David Grove and Benjamin Herta and Arun Iyengar and Kiyokuni Kawachiya and Hiroki Murata and Vijay Saraswat and Mikio Takeuchi and Olivier Tardieu", title = "Resilient {X10}: efficient failure-aware programming", journal = j-SIGPLAN, volume = "49", number = "8", pages = "67--80", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555248", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scale-out programs run on multiple processes in a cluster. In scale-out systems, processes can fail. Computations using traditional libraries such as MPI fail when any component process fails. The advent of Map Reduce, Resilient Data Sets and MillWheel has shown dramatic improvements in productivity are possible when a high-level programming framework handles scale-out and resilience automatically. We are concerned with the development of general-purpose languages that support resilient programming. In this paper we show how the X10 language and implementation can be extended to support resilience. In Resilient X10, places may fail asynchronously, causing loss of the data and tasks at the failed place. Failure is exposed through exceptions. We identify a {\em Happens Before Invariance Principle} and require the runtime to automatically repair the global control structure of the program to maintain this principle. We show this reduces much of the burden of resilient programming. The programmer is only responsible for continuing execution with fewer computational resources and the loss of part of the heap, and can do so while taking advantage of domain knowledge. We build a complete implementation of the language, capable of executing benchmark applications on hundreds of nodes. We describe the algorithms required to make the language runtime resilient. We then give three applications, each with a different approach to fault tolerance (replay, decimation, and domain-level checkpointing). These can be executed at scale and survive node failure. We show that for these programs the overhead of resilience is a small fraction of overall runtime by comparing to equivalent non-resilient X10 programs. On one program we show end-to-end performance of Resilient X10 is ~100x faster than Hadoop.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Yang:2014:PMI, author = "Chaoran Yang and Wesley Bland and John Mellor-Crummey and Pavan Balaji", title = "Portable, {MPI}-interoperable {Coarray Fortran}", journal = j-SIGPLAN, volume = "49", number = "8", pages = "81--92", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555270", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The past decade has seen the advent of a number of parallel programming models such as Coarray Fortran (CAF), Unified Parallel C, X10, and Chapel. Despite the productivity gains promised by these models, most parallel scientific applications still rely on MPI as their data movement model. One reason for this trend is that it is hard for users to incrementally adopt these new programming models in existing MPI applications. Because each model use its own runtime system, they duplicate resources and are potentially error-prone. Such independent runtime systems were deemed necessary because MPI was considered insufficient in the past to play this role for these languages. The recently released MPI-3, however, adds several new capabilities that now provide all of the functionality needed to act as a runtime, including a much more comprehensive one-sided communication framework. In this paper, we investigate how MPI-3 can form a runtime system for one example programming model, CAF, with a broader goal of enabling a single application to use both MPI and CAF with the highest level of interoperability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Yang:2014:CNR, author = "Yi Yang and Huiyang Zhou", title = "{CUDA-NP}: realizing nested thread-level parallelism in {GPGPU} applications", journal = j-SIGPLAN, volume = "49", number = "8", pages = "93--106", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555254", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parallel programs consist of series of code sections with different thread-level parallelism (TLP). As a result, it is rather common that a thread in a parallel program, such as a GPU kernel in CUDA programs, still contains both sequential code and parallel loops. In order to leverage such parallel loops, the latest Nvidia Kepler architecture introduces dynamic parallelism, which allows a GPU thread to start another GPU kernel, thereby reducing the overhead of launching kernels from a CPU. However, with dynamic parallelism, a parent thread can only communicate with its child threads through global memory and the overhead of launching GPU kernels is non-trivial even within GPUs. In this paper, we first study a set of GPGPU benchmarks that contain parallel loops, and highlight that these bench-marks do not have a very high loop count or high degrees of TLP. Consequently, the benefits of leveraging such parallel loops using dynamic parallelism are too limited to offset its overhead. We then present our proposed solution to exploit nested parallelism in CUDA, referred to as CUDA-NP. With CUDA-NP, we initially enable a high number of threads when a GPU program starts, and use control flow to activate different numbers of threads for different code sections. We implemented our proposed CUDA-NP framework using a directive-based compiler approach. For a GPU kernel, an application developer only needs to add OpenMP-like pragmas for parallelizable code sections. Then, our CUDA-NP compiler automatically generates the optimized GPU kernels. It supports both the reduction and the scan primitives, explores different ways to distribute parallel loop iterations into threads, and efficiently manages on-chip resource. Our experiments show that for a set of GPGPU benchmarks, which have already been optimized and contain nested parallelism, our pro-posed CUDA-NP framework further improves the performance by up to 6.69 times and 2.18 times on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Yan:2014:YYA, author = "Shengen Yan and Chao Li and Yunquan Zhang and Huiyang Zhou", title = "{yaSpMV}: yet another {SpMV} framework on {GPUs}", journal = j-SIGPLAN, volume = "49", number = "8", pages = "107--118", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555255", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "SpMV is a key linear algebra algorithm and has been widely used in many important application domains. As a result, numerous attempts have been made to optimize SpMV on GPUs to leverage their massive computational throughput. Although the previous work has shown impressive progress, load imbalance and high memory bandwidth remain the critical performance bottlenecks for SpMV. In this paper, we present our novel solutions to these problems. First, we devise a new SpMV format, called blocked compressed common coordinate (BCCOO), which uses bit flags to store the row indices in a blocked common coordinate (COO) format so as to alleviate the bandwidth problem. We further improve this format by partitioning the matrix into vertical slices to enhance the cache hit rates when accessing the vector to be multiplied. Second, we revisit the segmented scan approach for SpMV to address the load imbalance problem. We propose a highly efficient matrix-based segmented sum/scan for SpMV and further improve it by eliminating global synchronization. Then, we introduce an auto-tuning framework to choose optimization parameters based on the characteristics of input sparse matrices and target hardware platforms. Our experimental results on GTX680 GPUs and GTX480 GPUs show that our proposed framework achieves significant performance improvement over the vendor tuned CUSPARSE V5.0 (up to 229\% and 65\% on average on GTX680 GPUs, up to 150\% and 42\% on average on GTX480 GPUs) and some most recently proposed schemes (e.g., up to 195\% and 70\% on average over clSpMV on GTX680 GPUs, up to 162\% and 40\% on average over clSpMV on GTX480 GPUs).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Bauer:2014:SLW, author = "Michael Bauer and Sean Treichler and Alex Aiken", title = "{Singe}: leveraging warp specialization for high performance on {GPUs}", journal = j-SIGPLAN, volume = "49", number = "8", pages = "119--130", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555258", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Singe, a Domain Specific Language (DSL) compiler for combustion chemistry that leverages warp specialization to produce high performance code for GPUs. Instead of relying on traditional GPU programming models that emphasize data-parallel computations, warp specialization allows compilers like Singe to partition computations into sub-computations which are then assigned to different warps within a thread block. Fine-grain synchronization between warps is performed efficiently in hardware using producer-consumer named barriers. Partitioning computations using warp specialization allows Singe to deal efficiently with the irregularity in both data access patterns and computation. Furthermore, warp-specialized partitioning of computations allows Singe to fit extremely large working sets into on-chip memories. Finally, we describe the architecture and general compilation techniques necessary for constructing a warp-specializing compiler. We show that the warp-specialized code emitted by Singe is up to 3.75X faster than previously optimized data-parallel GPU kernels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Odaira:2014:EGI, author = "Rei Odaira and Jose G. Castanos and Hisanobu Tomari", title = "Eliminating global interpreter locks in {Ruby} through hardware transactional memory", journal = j-SIGPLAN, volume = "49", number = "8", pages = "131--142", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555247", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many scripting languages use a Global Interpreter Lock (GIL) to simplify the internal designs of their interpreters, but this kind of lock severely lowers the multi-thread performance on multi-core machines. This paper presents our first results eliminating the GIL in Ruby using Hardware Transactional Memory (HTM) in the IBM zEnterprise EC12 and Intel 4th Generation Core processors. Though prior prototypes replaced a GIL with HTM, we tested realistic programs, the Ruby NAS Parallel Benchmarks (NPB), the WEBrick HTTP server, and Ruby on Rails. We devised a new technique to dynamically adjust the transaction lengths on a per-bytecode basis, so that we can optimize the likelihood of transaction aborts against the relative overhead of the instructions to begin and end the transactions. Our results show that HTM achieved 1.9- to 4.4-fold speedups in the NPB programs over the GIL with 12 threads, and 1.6- and 1.2-fold speedups in WEBrick and Ruby on Rails, respectively. The dynamic transaction-length adjustment chose the best transaction lengths for any number of threads and applications with sufficiently long running times.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Petrovic:2014:LHM, author = "Darko Petrovi{\'c} and Thomas Ropars and Andr{\'e} Schiper", title = "Leveraging hardware message passing for efficient thread synchronization", journal = j-SIGPLAN, volume = "49", number = "8", pages = "143--154", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555251", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As the level of parallelism in manycore processors keeps increasing, providing efficient mechanisms for thread synchronization in concurrent programs is becoming a major concern. On cache-coherent shared-memory processors, synchronization efficiency is ultimately limited by the performance of the underlying cache coherence protocol. This paper studies how hardware support for message passing can improve synchronization performance. Considering the ubiquitous problem of mutual exclusion, we adapt two state-of-the-art solutions used on shared-memory processors, namely the server approach and the combining approach, to leverage the potential of hardware message passing. We propose HybComb, a novel combining algorithm that uses both message passing and shared memory features of emerging hybrid processors. We also introduce MP-Server, a straightforward adaptation of the server approach to hardware message passing. Evaluation on Tilera's TILE-Gx processor shows that MP-Server can execute contended critical sections with unprecedented throughput, as stalls related to cache coherence are removed from the critical path. HybComb can achieve comparable performance, while avoiding the need to dedicate server cores. Consequently, our queue and stack implementations, based on MP-Server and HybComb, largely outperform their most efficient pure-shared-memory counterparts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Herlihy:2014:WSF, author = "Maurice Herlihy and Zhiyu Liu", title = "Well-structured futures and cache locality", journal = j-SIGPLAN, volume = "49", number = "8", pages = "155--166", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555257", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In fork-join parallelism, a sequential program is split into a directed acyclic graph of tasks linked by directed dependency edges, and the tasks are executed, possibly in parallel, in an order consistent with their dependencies. A popular and effective way to extend fork-join parallelism is to allow threads to create {futures. A thread creates a future to hold the results of a computation, which may or may not be executed in parallel. That result is returned when some thread touches that future, blocking if necessary until the result is ready. Recent research has shown that while futures can, of course, enhance parallelism in a structured way, they can have a deleterious effect on cache locality. In the worst case, futures can incur \Omega (P T \infty + t T \infty ) deviations, which implies \Omega (C P T \infty + C t T \infty ) additional cache misses, where C is the number of cache lines, P is the number of processors, t is the number of touches, and T \infty is the computation span. Since cache locality has a large impact on software performance on modern multicores, this result is troubling. In this paper, however, we show that if futures are used in a simple, disciplined way, then the situation is much better: if each future is touched only once, either by the thread that created it, or by a later descendant of the thread that created it, then parallel executions with work stealing can incur at most O(C P T$^2$ \infty ) additional cache misses, a substantial improvement. This structured use of futures is characteristic of many (but not all) parallel applications.}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Diegues:2014:TWL, author = "Nuno Diegues and Paolo Romano", title = "{Time-Warp}: lightweight abort minimization in transactional memory", journal = j-SIGPLAN, volume = "49", number = "8", pages = "167--178", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555259", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The notion of permissiveness in Transactional Memory (TM) translates to only aborting a transaction when it cannot be accepted in any history that guarantees correctness criterion. This property is neglected by most TMs, which, in order to maximize implementation's efficiency, resort to aborting transactions under overly conservative conditions. In this paper we seek to identify a sweet spot between permissiveness and efficiency by introducing the Time-Warp Multi-version algorithm (TWM). TWM is based on the key idea of allowing an update transaction that has performed stale reads (i.e., missed the writes of concurrently committed transactions) to be serialized by committing it in the past, which we call a time-warp commit. At its core, TWM uses a novel, lightweight validation mechanism with little computational overheads. TWM also guarantees that read-only transactions can never be aborted. Further, TWM guarantees Virtual World Consistency, a safety property that is deemed as particularly relevant in the context of TM. We demonstrate the practicality of this approach through an extensive experimental study, where we compare TWM with four other TMs, and show an average performance improvement of 65\% in high concurrency scenarios.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Olukotun:2014:BPP, author = "Kunle Olukotun", title = "Beyond parallel programming with domain specific languages", journal = j-SIGPLAN, volume = "49", number = "8", pages = "179--180", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2557966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today, almost all computer architectures are parallel and heterogeneous; a combination of multiple CPUs, GPUs and specialized processors. This creates a challenging problem for application developers who want to develop high performance programs without the effort required to use low-level, architecture specific parallel programming models (e.g., OpenMP for CMPs, CUDA for GPUs, MPI for clusters). Domain-specific languages (DSLs) are a promising solution to this problem because they can provide an avenue for high-level application-specific abstractions with implicit parallelism to be mapped directly to low level architecture-specific programming models; providing both high programmer productivity and high execution performance. In this talk I will describe an approach to building high performance DSLs, which is based on DSL embedding in a general purpose programming language, metaprogramming and a DSL infrastructure called Delite. I will describe how we transform DSL programs into efficient first-order low-level code using domain specific optimization, parallelism and locality optimization with parallel patterns, and architecture-specific code generation. All optimizations and transformations are implemented in Delite: an extensible DSL compiler infrastucture that significantly reduces the effort required to develop new DSLs. Delite DSLs for machine learning, data querying, graph analysis, and scientific computing all achieve performance competitive with manually parallelized C++ code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Song:2014:DAT, author = "Sukhyun Song and Jeffrey K. Hollingsworth", title = "Designing and auto-tuning parallel {$3$-D FFT} for computation-communication overlap", journal = j-SIGPLAN, volume = "49", number = "8", pages = "181--192", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555249", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a method to design and auto-tune a new parallel 3-D FFT code using the non-blocking MPI all-to-all operation. We achieve high performance by optimizing computation-communication overlap. Our code performs fully asynchronous communication without any support from special hardware. We also improve cache performance through loop tiling. To cope with the complex trade-off regarding our optimization techniques, we parameterize our code and auto-tune the parameters efficiently in a large parameter space. Experimental results from two systems confirm that our code achieves a speedup of up to 1.76x over the FFTW library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Catanzaro:2014:DPM, author = "Bryan Catanzaro and Alexander Keller and Michael Garland", title = "A decomposition for in-place matrix transposition", journal = j-SIGPLAN, volume = "49", number = "8", pages = "193--206", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555253", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a decomposition for in-place matrix transposition, with applications to Array of Structures memory accesses on SIMD processors. Traditional approaches to in-place matrix transposition involve cycle following, which is difficult to parallelize, and on matrices of dimension $m$ by $n$ require $O(mn \log mn)$ work when limited to less than $O(mn)$ auxiliary space. Our decomposition allows the rows and columns to be operated on independently during in-place transposition, reducing work complexity to $O(mn)$, given $O(\max(m, n))$ auxiliary space. This decomposition leads to an efficient and naturally parallel algorithm: we have measured median throughput of 19.5 GB/s on an NVIDIA Tesla K20c processor. An implementation specialized for the skinny matrices that arise when converting Arrays of Structures to Structures of Arrays yields median throughput of 34.3 GB/s, and a maximum throughput of 51 GB/s. Because of the simple structure of this algorithm, it is particularly suited for implementation using SIMD instructions to transpose the small arrays that arise when SIMD processors load from or store to Arrays of Structures. Using this algorithm to cooperatively perform accesses to Arrays of Structures, we measure 180 GB/s throughput on the K20c, which is up to 45 times faster than compiler-generated Array of Structures accesses. In this paper, we explain the algorithm, prove its correctness and complexity, and explain how it can be instantiated efficiently for solving various transpose problems on both CPUs and GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Sung:2014:PTR, author = "I-Jui Sung and Juan G{\'o}mez-Luna and Jos{\'e} Mar{\'\i}a Gonz{\'a}lez-Linares and Nicol{\'a}s Guil and Wen-Mei W. Hwu", title = "In-place transposition of rectangular matrices on accelerators", journal = j-SIGPLAN, volume = "49", number = "8", pages = "207--218", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555266", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Matrix transposition is an important algorithmic building block for many numeric algorithms such as FFT. It has also been used to convert the storage layout of arrays. With more and more algebra libraries offloaded to GPUs, a high performance in-place transposition becomes necessary. Intuitively, in-place transposition should be a good fit for GPU architectures due to limited available on-board memory capacity and high throughput. However, direct application of CPU in-place transposition algorithms lacks the amount of parallelism and locality required by GPUs to achieve good performance. In this paper we present the first known in-place matrix transposition approach for the GPUs. Our implementation is based on a novel 3-stage transposition algorithm where each stage is performed using an elementary tiled-wise transposition. Additionally, when transposition is done as part of the memory transfer between GPU and host, our staged approach allows hiding transposition overhead by overlap with PCIe transfer. We show that the 3-stage algorithm allows larger tiles and achieves 3X speedup over a traditional 4-stage algorithm, with both algorithms based on our high-performance elementary transpositions on the GPU. We also show our proposed low-level optimizations improve the sustained throughput to more than 20 GB/s. Finally, we propose an asynchronous execution scheme that allows CPU threads to delegate in-place matrix transposition to GPU, achieving a throughput of more than 3.4 GB/s (including data transfers costs), and improving current multithreaded implementations of in-place transposition on CPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Maleki:2014:PDP, author = "Saeed Maleki and Madanlal Musuvathi and Todd Mytkowicz", title = "Parallelizing dynamic programming through rank convergence", journal = j-SIGPLAN, volume = "49", number = "8", pages = "219--232", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555264", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes an efficient parallel algorithm for an important class of dynamic programming problems that includes Viterbi, Needleman-Wunsch, Smith-Waterman, and Longest Common Subsequence. In dynamic programming, the subproblems that do not depend on each other, and thus can be computed in parallel, form stages or wavefronts. The algorithm presented in this paper provides additional parallelism allowing multiple stages to be computed in parallel despite dependences among them. The correctness and the performance of the algorithm relies on rank convergence properties of matrix multiplication in the tropical semiring, formed with plus as the multiplicative operation and max as the additive operation. This paper demonstrates the efficiency of the parallel algorithm by showing significant speed ups on a variety of important dynamic programming problems. In particular, the parallel Viterbi decoder is up-to 24x faster (with 64 processors) than a highly optimized commercial baseline.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Mehta:2014:RLF, author = "Sanyam Mehta and Pei-Hung Lin and Pen-Chung Yew", title = "Revisiting loop fusion in the polyhedral framework", journal = j-SIGPLAN, volume = "49", number = "8", pages = "233--246", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555250", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Loop fusion is an important compiler optimization for improving memory hierarchy performance through enabling data reuse. Traditional compilers have approached loop fusion in a manner decoupled from other high-level loop optimizations, missing several interesting solutions. Recently, the polyhedral compiler framework with its ability to compose complex transformations, has proved to be promising in performing loop optimizations for small programs. However, our experiments with large programs using state-of-the-art polyhedral compiler frameworks reveal suboptimal fusion partitions in the transformed code. We trace the reason for this to be lack of an effective cost model to choose a good fusion partitioning among the possible choices, which increase exponentially with the number of program statements. In this paper, we propose a fusion algorithm to choose good fusion partitions with two objective functions --- achieving good data reuse and preserving parallelism inherent in the source code. These objectives, although targeted by previous work in traditional compilers, pose new challenges within the polyhedral compiler framework and have thus not been addressed. In our algorithm, we propose several heuristics that work effectively within the polyhedral compiler framework and allow us to achieve the proposed objectives. Experimental results show that our fusion algorithm achieves performance comparable to the existing polyhedral compilers for small kernel programs, and significantly outperforms them for large benchmark programs such as those in the SPEC benchmark suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Rodrigues:2014:TPS, author = "Christopher Rodrigues and Thomas Jablin and Abdul Dakkak and Wen-Mei Hwu", title = "{Triolet}: a programming system that unifies algorithmic skeleton interfaces for high-performance cluster computing", journal = j-SIGPLAN, volume = "49", number = "8", pages = "247--258", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555268", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional algorithmic skeletons promise a high-level programming interface for distributed-memory clusters that free developers from concerns of task decomposition, scheduling, and communication. Unfortunately, prior distributed functional skeleton frameworks do not deliver performance comparable to that achievable in a low-level distributed programming model such as C with MPI and OpenMP, even when used in concert with high-performance array libraries. There are several causes: they do not take advantage of shared memory on each cluster node; they impose a fixed partitioning strategy on input data; and they have limited ability to fuse loops involving skeletons that produce a variable number of outputs per input. We address these shortcomings in the Triolet programming language through a modular library design that separates concerns of parallelism, loop nesting, and data partitioning. We show how Triolet substantially improves the parallel performance of algorithms involving array traversals and nested, variable-size loops over what is achievable in Eden, a distributed variant of Haskell. We further demonstrate how Triolet can substantially simplify parallel programming relative to C with MPI and OpenMP while achieving 23--100\% of its performance on a 128-core cluster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Liu:2014:TAP, author = "Xu Liu and John Mellor-Crummey", title = "A tool to analyze the performance of multithreaded programs on {NUMA} architectures", journal = j-SIGPLAN, volume = "49", number = "8", pages = "259--272", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555271", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Almost all of today's microprocessors contain memory controllers and directly attach to memory. Modern multiprocessor systems support non-uniform memory access (NUMA): it is faster for a microprocessor to access memory that is directly attached than it is to access memory attached to another processor. Without careful distribution of computation and data, a multithreaded program running on such a system may have high average memory access latency. To use multiprocessor systems efficiently, programmers need performance tools to guide the design of NUMA-aware codes. To address this need, we enhanced the HPCToolkit performance tools to support measurement and analysis of performance problems on multiprocessor systems with multiple NUMA domains. With these extensions, HPCToolkit helps pinpoint, quantify, and analyze NUMA bottlenecks in executions of multithreaded programs. It computes derived metrics to assess the severity of bottlenecks, analyzes memory accesses, and provides a wealth of information to guide NUMA optimization, including information about how to distribute data to reduce access latency and minimize contention. This paper describes the design and implementation of our extensions to HPCToolkit. We demonstrate their utility by describing case studies in which we use these capabilities to diagnose NUMA bottlenecks in four multithreaded applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Rao:2014:TFE, author = "Jia Rao and Xiaobo Zhou", title = "Towards fair and efficient {SMP} virtual machine scheduling", journal = j-SIGPLAN, volume = "49", number = "8", pages = "273--286", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555246", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "As multicore processors become prevalent in modern computer systems, there is a growing need for increasing hardware utilization and exploiting the parallelism of such platforms. With virtualization technology, hardware utilization is improved by encapsulating independent workloads into virtual machines (VMs) and consolidating them onto the same machine. SMP virtual machines have been widely adopted to exploit parallelism. For virtualized systems, such as a public cloud, fairness between tenants and the efficiency of running their applications are keys to success. However, we find that existing virtualization platforms fail to enforce fairness between VMs with different number of virtual CPUs (vCPU) that run on multiple CPUs. We attribute the unfairness to the use of per-CPU schedulers and the load imbalance on these CPUs that incur inaccurate CPU allocations. Unfortunately, existing approaches to reduce unfairness, e.g., dynamic load balancing and CPU capping, introduce significant inefficiencies to parallel workloads. In this paper, we present Flex, a vCPU scheduling scheme that enforces fairness at VM-level and improves the efficiency of hosted parallel applications. Flex centers on two key designs: (1) dynamically adjusting vCPU weights (FlexW) on multiple CPUs to achieve VM-level fairness and (2) flexibly scheduling vCPUs (FlexS) to minimize wasted busy-waiting time. We have implemented Flex in Xen and performed comprehensive evaluations with various parallel workloads. Results show that Flex is able to achieve CPU allocations with on average no more than 5\% error compared to the ideal fair allocation. Further, Flex outperforms Xen's credit scheduler and two representative co-scheduling approaches by as much as $ 10 \times $ for parallel applications using busy-waiting or blocking synchronization methods.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Lu:2014:EDM, author = "Kai Lu and Xu Zhou and Tom Bergan and Xiaoping Wang", title = "Efficient deterministic multithreading without global barriers", journal = j-SIGPLAN, volume = "49", number = "8", pages = "287--300", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555252", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multithreaded programs execute nondeterministically on conventional architectures and operating systems. This complicates many tasks, including debugging and testing. Deterministic multithreading (DMT) makes the output of a multithreaded program depend on its inputs only, which can totally solve the above problem. However, current DMT implementations suffer from a common inefficiency: they use frequent global barriers to enforce a deterministic ordering on memory accesses. In this paper, we eliminate that inefficiency using an execution model we call deterministic lazy release consistency (DLRC). Our execution model uses the Kendo algorithm to enforce a deterministic ordering on synchronization, and it uses a deterministic version of the lazy release consistency memory model to propagate memory updates across threads. Our approach guarantees that programs execute deterministically even when they contain data races. We implemented a DMT system based on these ideas (RFDet) and evaluated it using 16 parallel applications. Our implementation targets C/C++ programs that use POSIX threads. Results show that RFDet gains nearly 2x speedup compared with DThreads-a start-of-the-art DMT system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Eslamimehr:2014:RDS, author = "Mahdi Eslamimehr and Jens Palsberg", title = "Race directed scheduling of concurrent programs", journal = j-SIGPLAN, volume = "49", number = "8", pages = "301--314", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555263", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Detection of data races in Java programs remains a difficult problem. The best static techniques produce many false positives, and also the best dynamic techniques leave room for improvement. We present a new technique called race directed scheduling that for a given race candidate searches for an input and a schedule that lead to the race. The search iterates a combination of concolic execution and schedule improvement, and turns out to find useful inputs and schedules efficiently. We use an existing technique to produce a manageable number of race candidates. Our experiments on 23 Java programs found 72 real races that were missed by the best existing dynamic techniques. Among those 72 races, 31 races were found with schedules that have between 1 million and 108 million events, which suggests that they are rare and hard-to-find races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Rubin:2014:HCW, author = "Norm Rubin", title = "Heterogeneous computing: what does it mean for compiler research?", journal = j-SIGPLAN, volume = "49", number = "8", pages = "315--316", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2558891", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The current trend in computer architecture is to increase the number of cores, to create specialized types of cores within a single machine, and to network such machines together in very fluid web/cloud computing arrangements. Compilers have traditionally focused on optimizations to code that improve performance, but is that the right target to speed up real applications? Consider loading a web page (like starting GMAIL) the page is transferred to the client, any JavaScript is compiled, the JavaScript executes, and the page gets displayed. The classic compiler model (which was first developed in the late 50's) was a great fit for single core machines but has fallen behind architecture, and language. For example how do you compile a single program for a machine that has both a CPU and a graphics coprocessor (a GPU) with a very different programming and memory model? Together with the changes in architecture there have been changes in programming languages. Dynamic languages are used more, static languages are used less. How does this effect compiler research? In this talk, I'll review a number of traditional compiler research challenges that have (or will) become burning issues and will describe some new problems areas that were not considered in the past. For example language specifications are large complex technical documents that are difficult for non-experts to follow. Application programmers are often not willing to read these documents; can a compiler bridge the gap?", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Natarajan:2014:FCL, author = "Aravind Natarajan and Neeraj Mittal", title = "Fast concurrent lock-free binary search trees", journal = j-SIGPLAN, volume = "49", number = "8", pages = "317--328", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555256", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new lock-free algorithm for concurrent manipulation of a binary search tree in an asynchronous shared memory system that supports search, insert and delete operations. In addition to read and write instructions, our algorithm uses (single-word) compare-and-swap (CAS) and bit-test-and-set (SETB) atomic instructions, both of which are commonly supported by many modern processors including Intel~64 and AMD64. In contrast to existing lock-free algorithms for a binary search tree, our algorithm is based on marking edges rather than nodes. As a result, when compared to other lock-free algorithms, modify (insert and delete) operations in our algorithm work on a smaller portion of the tree, thereby reducing conflicts, and execute fewer atomic instructions (one for insert and three for delete). Our experiments indicate that our lock-free algorithm significantly outperforms all other algorithms for a concurrent binary search tree in many cases, especially when contention is high, by as much as 100\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Brown:2014:GTN, author = "Trevor Brown and Faith Ellen and Eric Ruppert", title = "A general technique for non-blocking trees", journal = j-SIGPLAN, volume = "49", number = "8", pages = "329--342", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555267", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a general technique for obtaining provably correct, non-blocking implementations of a large class of tree data structures where pointers are directed from parents to children. Updates are permitted to modify any contiguous portion of the tree atomically. Our non-blocking algorithms make use of the LLX, SCX and VLX primitives, which are multi-word generalizations of the standard LL, SC and VL primitives and have been implemented from single-word CAS. To illustrate our technique, we describe how it can be used in a fairly straightforward way to obtain a non-blocking implementation of a chromatic tree, which is a relaxed variant of a red-black tree. The height of the tree at any time is O(c + log n), where n is the number of keys and c is the number of updates in progress. We provide an experimental performance analysis which demonstrates that our Java implementation of a chromatic tree rivals, and often significantly outperforms, other leading concurrent dictionaries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Drachsler:2014:PCB, author = "Dana Drachsler and Martin Vechev and Eran Yahav", title = "Practical concurrent binary search trees via logical ordering", journal = j-SIGPLAN, volume = "49", number = "8", pages = "343--356", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555269", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present practical, concurrent binary search tree (BST) algorithms that explicitly maintain logical ordering information in the data structure, permitting clean separation from its physical tree layout. We capture logical ordering using intervals, with the property that an item belongs to the tree if and only if the item is an endpoint of some interval. We are thus able to construct efficient, synchronization-free and intuitive lookup operations. We present (i) a concurrent non-balanced BST with a lock-free lookup, and (ii) a concurrent AVL tree with a lock-free lookup that requires no synchronization with any mutating operations, including balancing operations. Our algorithms apply on-time deletion; that is, every request for removal of a node, results in its immediate removal from the tree. This new feature did not exist in previous concurrent internal tree algorithms. We implemented our concurrent BST algorithms and evaluated them against several state-of-the-art concurrent tree algorithms. Our experimental results show that our algorithms with lock-free contains and on-time deletion are practical and often comparable to the state-of-the-art.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Timnat:2014:PWF, author = "Shahar Timnat and Erez Petrank", title = "A practical wait-free simulation for lock-free data structures", journal = j-SIGPLAN, volume = "49", number = "8", pages = "357--368", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555261", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lock-free data structures guarantee overall system progress, whereas wait-free data structures guarantee the progress of each and every thread, providing the desirable non-starvation guarantee for concurrent data structures. While practical lock-free implementations are known for various data structures, wait-free data structure designs are rare. Wait-free implementations have been notoriously hard to design and often inefficient. In this work we present a transformation of lock-free algorithms to wait-free ones allowing even a non-expert to transform a lock-free data-structure into a practical wait-free one. The transformation requires that the lock-free data structure is given in a normalized form defined in this work. Using the new method, we have designed and implemented wait-free linked-list, skiplist, and tree and we measured their performance. It turns out that for all these data structures the wait-free implementations are only a few percent slower than their lock-free counterparts, while still guaranteeing non-starvation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Pusukuri:2014:LCA, author = "Kishore Kumar Pusukuri and Rajiv Gupta and Laxmi Narayan Bhuyan", title = "Lock contention aware thread migrations", journal = j-SIGPLAN, volume = "49", number = "8", pages = "369--370", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555273", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "On a cache-coherent multicore multiprocessor system, the performance of a multithreaded application with high lock contention is very sensitive to the distribution of application threads across multiple processors. This is because the distribution of threads impacts the frequency of lock transfers between processors, which in turn impacts the frequency of last-level cache (LLC) misses that lie on the critical path of execution. Inappropriate distribution of threads across processors increases LLC misses in the critical path and significantly degrades performance of multithreaded programs. To alleviate the above problem, this paper overviews a thread migration technique, which migrates threads of a multithreaded program across multicore processors so that threads seeking locks are more likely to find the locks on the same processor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Lee:2014:IFL, author = "Kyu Hyung Lee and Dohyeong Kim and Xiangyu Zhang", title = "Infrastructure-free logging and replay of concurrent execution on multiple cores", journal = j-SIGPLAN, volume = "49", number = "8", pages = "371--372", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555274", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop a logging and replay technique for real concurrent execution on multiple cores. Our technique directly works on binaries and does not require any hardware or complex software infrastructure support. We focus on minimizing logging overhead as it only logs a subset of system calls and thread spawns. Replay is on a single core. During replay, our technique first tries to follow only the event order in the log. However, due to schedule differences, replay may fail. An exploration process is then triggered to search for a schedule that allows the replay to make progress. Exploration is performed within a window preceding the point of replay failure. During exploration, our technique first tries to reorder synchronized blocks. If that does not lead to progress, it further reorders shared variable accesses. The exploration is facilitated by a sophisticated caching mechanism. Our experiments on real world programs and real workload show that the proposed technique has very low logging overhead (2.6\% on average) and fast schedule reconstruction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Aguston:2014:PHC, author = "Cfir Aguston and Yosi Ben Asher and Gadi Haber", title = "Parallelization hints via code skeletonization", journal = j-SIGPLAN, volume = "49", number = "8", pages = "373--374", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555275", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tools that provide optimization hints for program developers are facing severe obstacles and often unable to provide meaningful guidance on how to parallelize real--life applications. The main reason is due to the high code complexity and its large size when considering commercially valuable code. Such code is often rich with pointers, heavily nested conditional statements, nested while--based loops, function calls, etc. These constructs prevent existing compiler analysis from extracting the full parallelization potential. We propose a new paradigm to overcome this issue by automatically transforming the code into a much simpler skeleton-like form that is more conductive for auto-parallelization. We then apply existing tools of source--level automatic parallelization on the skeletonized code in order to expose possible parallelization patterns. The skeleton code, along with the parallelized version, are then provided to the programmer in the form of an IDE (Integrated Development Environment) recommendation. The proposed skeletonization algorithm replaces pointers by integer indexes and C-struct references by references to multi-dimensional arrays. This is because automatic parallelizers cannot handle pointer expressions. For example, {\tt while(p != NULL)\{ p->val++; p=p->next; \}} will be skeletonized to the parallelizable {\tt for(Ip=0;Ip < N; Ip++) \{ Aval[Ip]++; \}} where {\tt Aval[]} holds the embedding of the original list. It follows that the main goal of the skeletonization process is to embed pointer-based data structures into arrays. Though the skeletonized code is not semantically equivalent to the original code, it points out a possible parallelization pattern for this code segment and can be used as an effective parallelization hint to the programmer. We applied the method on several representative benchmarks from SPEC CPU 2000 and reached up to 80\% performance gain after several sequential code segments had been manually parallelized based on the parallelization patterns of the generated skeletons. In a different set of experiments we tried to estimate the potential of skeletonization for a larger set of programs in SPEC 2000 and obtained an estimation of 27\% additional loops that can be parallelized/vectorized due to skeletonization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Wang:2014:CBL, author = "Wenwen Wang and Chenggang Wu and Pen-Chung Yew and Xiang Yuan and Zhenjiang Wang and Jianjun Li and Xiaobing Feng", title = "Concurrency bug localization using shared memory access pairs", journal = j-SIGPLAN, volume = "49", number = "8", pages = "375--376", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555276", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-determinism in concurrent programs makes their debugging much more challenging than that in sequential programs. To mitigate such difficulties, we propose a new technique to automatically locate buggy shared memory accesses that triggered concurrency bugs. Compared to existing fault localization techniques that are based on empirical statistical approaches, this technique has two advantages. First, as long as enough successful runs of a concurrent program are collected, the proposed technique can locate buggy memory accesses to the shared data even with only one single failed run captured, as opposed to the need of capturing multiple failed runs in other statistical approaches. Second, the proposed technique is more precise because it considers memory accesses in those failed runs that terminate prematurely.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Leung:2014:TMS, author = "Vitus J. Leung and David P. Bunde and Jonathan Ebbers and Stefan P. Feer and Nickolas W. Price and Zachary D. Rhodes and Matthew Swank", title = "Task mapping stencil computations for non-contiguous allocations", journal = j-SIGPLAN, volume = "49", number = "8", pages = "377--378", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555277", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We examine task mapping algorithms for systems that allocate jobs non-contiguously. Several studies have shown that task placement affects job running time. We focus on jobs with a stencil communication pattern and use experiments on a Cray XE to evaluate novel task mapping algorithms as well as some adapted to this setting. This is done with the miniGhost miniApp which mimics the performance of CTH, a shock physics application. Our strategies improve average and single-run times by as much as 28\% and 36\% over a baseline strategy, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Wimmer:2014:DST, author = "Martin Wimmer and Francesco Versaci and Jesper Larsson Tr{\"a}ff and Daniel Cederman and Philippas Tsigas", title = "Data structures for task-based priority scheduling", journal = j-SIGPLAN, volume = "49", number = "8", pages = "379--380", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555278", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present three lock-free data structures for priority task scheduling: a priority work-stealing one, a centralized one with \rho -relaxed semantics, and a hybrid one combining both concepts. With the single-source shortest path (SSSP) problem as example, we show how the different approaches affect the prioritization and provide upper bounds on the number of examined nodes. We argue that priority task scheduling allows for an intuitive and easy way to parallelize the SSSP problem, notoriously a hard task. Experimental evidence supports the good scalability of the resulting algorithm. The larger aim of this work is to understand the trade-offs between scalability and priority guarantees in task scheduling systems. We show that \rho -relaxation is a valuable technique for improving the first, while still allowing semantic constraints to be satisfied: the lock-free, hybrid $k$-priority data structure can scale as well as work-stealing, while still providing strong priority scheduling guarantees, which depend on the parameter k. Our theoretical results open up possibilities for even more scalable data structures by adopting a weaker form of \rho -relaxation, which still enables the semantic constraints to be respected.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Gomez:2014:DSD, author = "Leonardo Bautista Gomez and Franck Cappello", title = "Detecting silent data corruption through data dynamic monitoring for scientific applications", journal = j-SIGPLAN, volume = "49", number = "8", pages = "381--382", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555279", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parallel programming has become one of the best ways to express scientific models that simulate a wide range of natural phenomena. These complex parallel codes are deployed and executed on large-scale parallel computers, making them important tools for scientific discovery. As supercomputers get faster and larger, the increasing number of components is leading to higher failure rates. In particular, the miniaturization of electronic components is expected to lead to a dramatic rise in soft errors and data corruption. Moreover, soft errors can corrupt data silently and generate large inaccuracies or wrong results at the end of the computation. In this paper we propose a novel technique to detect silent data corruption based on data monitoring. Using this technique, an application can learn the normal dynamics of its datasets, allowing it to quickly spot anomalies. We evaluate our technique with synthetic benchmarks and we show that our technique can detect up to 50\% of injected errors while incurring only negligible overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Sandes:2014:FGP, author = "Edans F. de O. Sandes and Guillermo Miranda and Alba C. M. A. Melo and Xavier Martorell and Eduard Ayguade", title = "Fine-grain parallel megabase sequence comparison with multiple heterogeneous {GPUs}", journal = j-SIGPLAN, volume = "49", number = "8", pages = "383--384", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555280", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes and evaluates a parallel strategy to execute the exact Smith-Waterman (SW) algorithm for megabase DNA sequences in heterogeneous multi-GPU platforms. In our strategy, the computation of a single huge SW matrix is spread over multiple GPUs, which communicate border elements to the neighbour, using a circular buffer mechanism that hides the communication overhead. We compared 4 pairs of human-chimpanzee homologous chromosomes using 2 different GPU environments, obtaining a performance of up to 140.36 GCUPS (Billion of cells processed per second) with 3 heterogeneous GPUS.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Golan-Gueta:2014:ASL, author = "Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv and Eran Yahav", title = "Automatic semantic locking", journal = j-SIGPLAN, volume = "49", number = "8", pages = "385--386", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555281", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we consider concurrent programs in which the shared state consists of instances of linearizable ADTs (abstract data types). We develop a novel automated approach to concurrency control that addresses a common need: the need to atomically execute a code fragment, which may contain multiple ADT operations on multiple ADT instances. In our approach, each ADT implements ADT-specific semantic locking operations that serve to exploit the semantics of ADT operations. We develop a synthesis algorithm that automatically inserts calls to these locking operations in a set of given code fragments (in a client program) to ensure that these code fragments execute atomically without deadlocks, and without rollbacks. We have implemented the synthesis algorithm and several general-purpose ADTs with semantic locking. We have applied the synthesis algorithm to several Java programs that use these ADTs. Our results show that our approach enables efficient and scalable synchronization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Hassan:2014:OTB, author = "Ahmed Hassan and Roberto Palmieri and Binoy Ravindran", title = "Optimistic transactional boosting", journal = j-SIGPLAN, volume = "49", number = "8", pages = "387--388", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555283", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Herlihy and Koskinen's transactional boosting methodology addressed the challenge of converting concurrent data structures into transactional ones. We present an optimistic methodology for boosting concurrent collections. Optimistic boosting allows greater data structure-specific optimizations, easier integration with STM frameworks, and lower restrictions on the boosted operations than the original boosting methodology.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Agrawal:2014:PGS, author = "Kunal Agrawal and Jeremy T. Fineman and Brendan Sheridan and Jim Sukha and Robert Utterback", title = "Provably good scheduling for parallel programs that use data structures through implicit batching", journal = j-SIGPLAN, volume = "49", number = "8", pages = "389--390", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555284", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This poster proposes an efficient runtime scheduler that provides provable performance guarantees to parallel programs that use data structures through the use of implicit batching.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Ma:2014:TAC, author = "Lin Ma and Kunal Agrawal and Roger D. Chamberlain", title = "Theoretical analysis of classic algorithms on highly-threaded many-core {GPUs}", journal = j-SIGPLAN, volume = "49", number = "8", pages = "391--392", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555285", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Threaded many-core memory (TMM) model provides a framework to analyze the performance of algorithms on GPUs. Here, we investigate the effectiveness of the TMM model by analyzing algorithms for 3 classic problems --- suffix tree/array for string matching, fast Fourier transform, and merge sort --- under this model. Our findings indicate that the TMM model can explain and predict previously unexplained trends and artifacts in experimental data.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Tomkins:2014:SIP, author = "Daniel Tomkins and Timmie Smith and Nancy M. Amato and Lawrence Rauchwerger", title = "{SCCMulti}: an improved parallel strongly connected components algorithm", journal = j-SIGPLAN, volume = "49", number = "8", pages = "393--394", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555286", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tarjan's famous linear time, sequential algorithm for finding the strongly connected components (SCCs) of a graph relies on depth first search, which is inherently sequential. Deterministic parallel algorithms solve this problem in logarithmic time using matrix multiplication techniques, but matrix multiplication requires a large amount of total work. Randomized algorithms based on reachability --- the ability to get from one vertex to another along a directed path --- greatly improve the work bound in the average case. However, these algorithms do not always perform well; for instance, Divide-and-Conquer Strong Components (DCSC), a scalable, divide-and-conquer algorithm, has good expected theoretical limits, but can perform very poorly on graphs for which the maximum reachability of any vertex is small. A related algorithm, MultiPivot, gives very high probability guarantees on the total amount of work for all graphs, but this improvement introduces an overhead that increases the average running time. This work introduces SCCMulti, a multi-pivot improvement of DCSC that offers the same consistency as MultiPivot without the time overhead. We provide experimental results demonstrating SCCMulti's scalability; these results also show that SCCMulti is more consistent than DCSC and is always faster than MultiPivot.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Luo:2014:ISM, author = "Miao Luo and Xiaoyi Lu and Khaled Hamidouche and Krishna Kandalla and Dhabaleswar K. Panda", title = "Initial study of multi-endpoint runtime for {MPI + OpenMP} hybrid programming model on multi-core systems", journal = j-SIGPLAN, volume = "49", number = "8", pages = "395--396", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555287", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "State-of-the-art MPI libraries rely on locks to guarantee thread-safety. This discourages application developers from using multiple threads to perform MPI operations. In this paper, we propose a high performance, lock-free multi-endpoint MPI runtime, which can achieve up to 40\% improvement for point-to-point operation and one representative collective operation with minimum or no modifications to the existing applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Isaacs:2014:ELS, author = "Katherine E. Isaacs and Todd Gamblin and Abhinav Bhatele and Peer-Timo Bremer and Martin Schulz and Bernd Hamann", title = "Extracting logical structure and identifying stragglers in parallel execution traces", journal = j-SIGPLAN, volume = "49", number = "8", pages = "397--398", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555288", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a new approach to automatically extract an idealized logical structure from a parallel execution trace. We use this structure to define intuitive metrics such as the lateness of a process involved in a parallel execution. By analyzing and illustrating traces in terms of logical steps, we leverage a developer's understanding of the happened-before relations in a parallel program. This technique can uncover dependency chains, elucidate communication patterns, and highlight sources and propagation of delays, all of which may be obscured in a traditional trace visualization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '14 conference proceedings.", } @Article{Fisher:2014:UFM, author = "Kathleen Fisher", title = "Using formal methods to enable more secure vehicles: {DARPA}'s {HACMS} program", journal = j-SIGPLAN, volume = "49", number = "9", pages = "1--1", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628165", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Networked embedded systems are ubiquitous in modern society. Examples include SCADA systems that manage physical infrastructure, medical devices such as pacemakers and insulin pumps, and vehicles such as airplanes and automobiles. Such devices are connected to networks for a variety of compelling reasons, including the ability to access diagnostic information conveniently, perform software updates, provide innovative features, and lower costs. Researchers and hackers have shown that these kinds of networked embedded systems are vulnerable to remote attacks and that such attacks can cause physical damage and can be hidden from monitors [1, 4]. DARPA launched the HACMS program to create technology to make such systems dramatically harder to attack successfully. Specifically, HACMS is pursuing a clean-slate, formal methods-based approach to the creation of high-assurance vehicles, where high assurance is defined to mean functionally correct and satisfying appropriate safety and security properties. Specific technologies include program synthesis, domain-specific languages, and theorem provers used as program development environments. Targeted software includes operating system components such as hypervisors, microkernels, file systems, and device drivers as well as control systems such as autopilots and adaptive cruise controls. Program researchers are leveraging existing high-assurance software including NICTA's seL4 microkernel and INRIA's CompCert compiler. Although the HACMS project is less than halfway done, the program has already achieved some remarkable success. At program kick-off, a Red Team easily hijacked the baseline open-source quadcopter that HACMS researchers are using as a research platform. At the end of eighteen months, the Red Team was not able to hijack the newly-minted ``SMACCMCopter'' running high-assurance HACMS code, despite being given six weeks and full access to the source code of the copter. An expert in penetration testing called the SMACCMCopter ``the most secure UAV on the planet.'' In this talk, I will describe the HACMS program: its motivation, the underlying technologies, current results, and future directions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Hickey:2014:BES, author = "Patrick C. Hickey and Lee Pike and Trevor Elliott and James Bielman and John Launchbury", title = "Building embedded systems with embedded {DSLs}", journal = j-SIGPLAN, volume = "49", number = "9", pages = "3--9", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628146", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We report on our experiences in synthesizing a fully-featured autopilot from embedded domain-specific languages (EDSLs) hosted in Haskell. The autopilot is approximately 50k lines of C code generated from 10k lines of EDSL code and includes control laws, mode logic, encrypted communications system, and device drivers. The autopilot was built in less than two engineer years. This is the story of how EDSLs provided the productivity and safety gains to do large-scale low-level embedded programming and lessons we learned in doing so.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Schlesinger:2014:CNP, author = "Cole Schlesinger and Michael Greenberg and David Walker", title = "Concurrent {NetCore}: from policies to pipelines", journal = j-SIGPLAN, volume = "49", number = "9", pages = "11--24", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628157", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In a Software-Defined Network (SDN), a central, computationally powerful controller manages a set of distributed, computationally simple switches. The controller computes a policy describing how each switch should route packets and populates packet-processing tables on each switch with rules to enact the routing policy. As network conditions change, the controller continues to add and remove rules from switches to adjust the policy as needed. Recently, the SDN landscape has begun to change as several proposals for new, reconfigurable switching architectures, such as RMT [5] and FlexPipe [14] have emerged. These platforms provide switch programmers with many, flexible tables for storing packet-processing rules, and they offer programmers control over the packet fields that each table can analyze and act on. These reconfigurable switch architectures support a richer SDN model in which a switch configuration phase precedes the rule population phase [4]. In the configuration phase, the controller sends the switch a graph describing the layout and capabilities of the packet processing tables it will require during the population phase. Armed with this foreknowledge, the switch can allocate its hardware (or software) resources more efficiently. We present a new, typed language, called Concurrent NetCore, for specifying routing policies and graphs of packet-processing tables. Concurrent NetCore includes features for specifying sequential, conditional and concurrent control-flow between packet-processing tables. We develop a fine-grained operational model for the language and prove this model coincides with a higher-level denotational model when programs are well-typed. We also prove several additional properties of well-typed programs, including strong normalization and determinism. To illustrate the utility of the language, we develop linguistic models of both the RMT and FlexPipe architectures and we give a multi-pass compilation algorithm that translates graphs and routing policies to the RMT model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Schoepe:2014:STI, author = "Daniel Schoepe and Daniel Hedin and Andrei Sabelfeld", title = "{SeLINQ}: tracking information across application-database boundaries", journal = j-SIGPLAN, volume = "49", number = "9", pages = "25--38", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628151", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "The root cause for confidentiality and integrity attacks against computing systems is insecure information flow. The complexity of modern systems poses a major challenge to secure end-to-end information flow, ensuring that the insecurity of a single component does not render the entire system insecure. While information flow in a variety of languages and settings has been thoroughly studied in isolation, the problem of tracking information across component boundaries has been largely out of reach of the work so far. This is unsatisfactory because tracking information across component boundaries is necessary for end-to-end security. This paper proposes a framework for uniform tracking of information flow through both the application and the underlying database. Key enabler of the uniform treatment is recent work by Cheney et al., which studies database manipulation via an embedded language-integrated query language (with Microsoft's LINQ on the backend). Because both the host language and the embedded query languages are functional F\#-like languages, we are able to leverage information-flow enforcement for functional languages to obtain information-flow control for databases ``for free'', synergize it with information-flow control for applications and thus guarantee security across application-database boundaries. We develop the formal results in the form of a security type system that includes a treatment of algebraic data types and pattern matching, and establish its soundness. On the practical side, we implement the framework and demonstrate its usefulness in a case study with a realistic movie rental database.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Chen:2014:TBP, author = "Sheng Chen and Martin Erwig", title = "Type-based parametric analysis of program families", journal = j-SIGPLAN, volume = "49", number = "9", pages = "39--51", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628155", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Previous research on static analysis for program families has focused on lifting analyses for single, plain programs to program families by employing idiosyncratic representations. The lifting effort typically involves a significant amount of work for proving the correctness of the lifted algorithm and demonstrating its scalability. In this paper, we propose a parameterized static analysis framework for program families that can automatically lift a class of type-based static analyses for plain programs to program families. The framework consists of a parametric logical specification and a parametric variational constraint solver. We prove that a lifted algorithm is correct provided that the underlying analysis algorithm is correct. An evaluation of our framework has revealed an error in a previous manually lifted analysis. Moreover, performance tests indicate that the overhead incurred by the general framework is bounded by a factor of 2.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Stansifer:2014:RSM, author = "Paul Stansifer and Mitchell Wand", title = "{Romeo}: a system for more flexible binding-safe programming", journal = j-SIGPLAN, volume = "49", number = "9", pages = "53--65", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628162", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Current languages for safely manipulating values with names only support term languages with simple binding syntax. As a result, no tools exist to safely manipulate code written in those languages for which name problems are the most challenging. We address this problem with Romeo, a language that respects $ \alpha $-equivalence on its values, and which has access to a rich specification language for binding, inspired by attribute grammars. Our work has the complex-binding support of David Herman's $ \lambda_m$, but is a full-fledged binding-safe language like Pure FreshML.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Grabmayer:2014:MSL, author = "Clemens Grabmayer and Jan Rochel", title = "Maximal sharing in the {Lambda} calculus with letrec", journal = j-SIGPLAN, volume = "49", number = "9", pages = "67--80", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628148", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Increasing sharing in programs is desirable to compactify the code, and to avoid duplication of reduction work at run-time, thereby speeding up execution. We show how a maximal degree of sharing can be obtained for programs expressed as terms in the lambda calculus with letrec. We introduce a notion of `maximal compactness' for $ \lambda_{\rm letrec}$-terms among all terms with the same infinite unfolding. Instead of defined purely syntactically, this notion is based on a graph semantics. $ \lambda_{\rm letrec}$-terms are interpreted as first-order term graphs so that unfolding equivalence between terms is preserved and reflected through bisimilarity of the term graph interpretations. Compactness of the term graphs can then be compared via functional bisimulation. We describe practical and efficient methods for the following two problems: transforming a $ \lambda_{\rm letrec}$-term into a maximally compact form; and deciding whether two $ \lambda_{\rm letrec}$-terms are unfolding-equivalent. The transformation of a $ \lambda_{\rm letrec}$-terms $L$ into maximally compact form $ L_0$ proceeds in three steps: (i) translate $L$ into its term graph $ G = [[L]]$; (ii) compute the maximally shared form of $G$ as its bisimulation collapse $ G_0$; (iii) read back a $ \lambda_{\rm letrec}$-term $ L_0$ from the term graph $ G_0$ with the property $ [[L_0]] = G_0$. Then $ L_0$ represents a maximally shared term graph, and it has the same unfolding as $L$. The procedure for deciding whether two given $ \lambda_{\rm letrec}$-terms $ L_1$ and $ L_2$ are unfolding-equivalent computes their term graph interpretations $ [[L_1]]$ and $ [[L_2]]$, and checks whether these are bisimilar. For illustration, we also provide a readily usable implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Bergstrom:2014:PEH, author = "Lars Bergstrom and Matthew Fluet and Matthew Le and John Reppy and Nora Sandler", title = "Practical and effective higher-order optimizations", journal = j-SIGPLAN, volume = "49", number = "9", pages = "81--93", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628153", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Inlining is an optimization that replaces a call to a function with that function's body. This optimization not only reduces the overhead of a function call, but can expose additional optimization opportunities to the compiler, such as removing redundant operations or unused conditional branches. Another optimization, copy propagation, replaces a redundant copy of a still-live variable with the original. Copy propagation can reduce the total number of live variables, reducing register pressure and memory usage, and possibly eliminating redundant memory-to-memory copies. In practice, both of these optimizations are implemented in nearly every modern compiler. These two optimizations are practical to implement and effective in first-order languages, but in languages with lexically-scoped first-class functions (aka, closures), these optimizations are not available to code programmed in a higher-order style. With higher-order functions, the analysis challenge has been that the environment at the call site must be the same as at the closure capture location, up to the free variables, or the meaning of the program may change. Olin Shivers' 1991 dissertation called this family of optimizations super $ \Beta $ and he proposed one analysis technique, called reflow, to support these optimizations. Unfortunately, reflow has proven too expensive to implement in practice. Because these higher-order optimizations are not available in functional-language compilers, programmers studiously avoid uses of higher-order values that cannot be optimized (particularly in compiler benchmarks). This paper provides the first practical and effective technique for super $ \Beta $ (higher-order) inlining and copy propagation, which we call unchanged variable analysis. We show that this technique is practical by implementing it in the context of a real compiler for an ML-family language and showing that the required analyses have costs below 3\% of the total compilation time. This technique's effectiveness is shown through a set of benchmarks and example programs, where this analysis exposes additional potential optimization sites.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Hackett:2014:WWM, author = "Jennifer Hackett and Graham Hutton", title = "Worker\slash wrapper\slash makes it\slash faster", journal = j-SIGPLAN, volume = "49", number = "9", pages = "95--107", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628142", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Much research in program optimization has focused on formal approaches to correctness: proving that the meaning of programs is preserved by the optimisation. Paradoxically, there has been comparatively little work on formal approaches to efficiency: proving that the performance of optimized programs is actually improved. This paper addresses this problem for a general-purpose optimization technique, the worker/wrapper transformation. In particular, we use the call-by-need variant of improvement theory to establish conditions under which the worker/wrapper transformation is formally guaranteed to preserve or improve the time performance of programs in lazy languages such as Haskell.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Downen:2014:CSC, author = "Paul Downen and Zena M. Ariola", title = "Compositional semantics for composable continuations: from abortive to delimited control", journal = j-SIGPLAN, volume = "49", number = "9", pages = "109--122", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628147", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parigot's $ \lambda \mu $-calculus, a system for computational reasoning about classical proofs, serves as a foundation for control operations embodied by operators like Scheme's callcc. We demonstrate that the call-by-value theory of the $ \lambda \mu $-calculus contains a latent theory of delimited control, and that a known variant of $ \lambda \mu $ which unshackles the syntax yields a calculus of composable continuations from the existing constructs and rules for classical control. To relate to the various formulations of control effects, and to continuation-passing style, we use a form of compositional program transformations which preserves the underlying structure of equational theories, contexts, and substitution. Finally, we generalize the call-by-name and call-by-value theories of the $ \lambda \mu $-calculus by giving a single parametric theory that encompasses both, allowing us to generate a call-by-need instance that defines a calculus of classical and delimited control with lazy evaluation and sharing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Petricek:2014:CCC, author = "Tomas Petricek and Dominic Orchard and Alan Mycroft", title = "Coeffects: a calculus of context-dependent computation", journal = j-SIGPLAN, volume = "49", number = "9", pages = "123--135", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The notion of context in functional languages no longer refers just to variables in scope. Context can capture additional properties of variables (usage patterns in linear logics; caching requirements in dataflow languages) as well as additional resources or properties of the execution environment (rebindable resources; platform version in a cross-platform application). The recently introduced notion of coeffects captures the latter, whole-context properties, but it failed to capture fine-grained per-variable properties. We remedy this by developing a generalized coeffect system with annotations indexed by a coeffect shape. By instantiating a concrete shape, our system captures previously studied flat (whole-context) coeffects, but also structural (per-variable) coeffects, making coeffect analyses more useful. We show that the structural system enjoys desirable syntactic properties and we give a categorical semantics using extended notions of indexed comonad. The examples presented in this paper are based on analysis of established language features (liveness, linear logics, dataflow, dynamic scoping) and we argue that such context-aware properties will also be useful for future development of languages for increasingly heterogeneous and distributed platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Findler:2014:BSC, author = "Robert Bruce Findler", title = "Behavioral software contracts", journal = j-SIGPLAN, volume = "49", number = "9", pages = "137--138", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2632855", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers embrace contracts. They can use the language they know and love to formulate logical assertions about the behavior of their programs. They can use the existing IDE infrastructure to log contracts, to test, to debug, and to profile their programs. The keynote presents the challenges and rewards of supporting contracts in a modern, full-spectrum programming language. It covers technical challenges of contracts while demonstrating the non-technical motivation for contract system design choices and showing how contracts and contract research can serve practicing programmers. The remainder of this article is a literature survey of contract research, with an emphasis on recent work about higher-order contracts and blame.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Nguyen:2014:SCV, author = "Ph{\'u}c C. Nguyen and Sam Tobin-Hochstadt and David {Van Horn}", title = "Soft contract verification", journal = j-SIGPLAN, volume = "49", number = "9", pages = "139--152", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628156", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Behavioral software contracts are a widely used mechanism for governing the flow of values between components. However, run-time monitoring and enforcement of contracts imposes significant overhead and delays discovery of faulty components to run-time. To overcome these issues, we present soft contract verification, which aims to statically prove either complete or partial contract correctness of components, written in an untyped, higher-order language with first-class contracts. Our approach uses higher-order symbolic execution, leveraging contracts as a source of symbolic values including unknown behavioral values, and employs an updatable heap of contract invariants to reason about flow-sensitive facts. We prove the symbolic execution soundly approximates the dynamic semantics and that verified programs can't be blamed. The approach is able to analyze first-class contracts, recursive data structures, unknown functions, and control-flow-sensitive refinements of values, which are all idiomatic in dynamic languages. It makes effective use of an off-the-shelf solver to decide problems without heavy encodings. The approach is competitive with a wide range of existing tools --- including type systems, flow analyzers, and model checkers --- on their own benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Ramsey:2014:THD, author = "Norman Ramsey", title = "On teaching *how to design programs*: observations from a newcomer", journal = j-SIGPLAN, volume = "49", number = "9", pages = "153--166", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628137", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a personal, qualitative case study of a first course using How to Design Programs and its functional teaching languages. The paper reconceptualizes the book's six-step design process as an eight-step design process ending in a new ``review and refactor'' step. It recommends specific approaches to students' difficulties with function descriptions, function templates, data examples, and other parts of the design process. It connects the process to interactive ``world programs.'' It recounts significant, informative missteps in course design and delivery. Finally, it identifies some unsolved teaching problems and some potential solutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Ohori:2014:SIP, author = "Atsushi Ohori and Katsuhiro Ueno and Kazunori Hoshi and Shinji Nozaki and Takashi Sato and Tasuku Makabe and Yuki Ito", title = "{SML\#} in industry: a practical {ERP} system development", journal = j-SIGPLAN, volume = "49", number = "9", pages = "167--173", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628164", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper reports on our industry-academia project of using a functional language in business software production. The general motivation behind the project is our ultimate goal of adopting an ML-style higher-order typed functional language in a wide range of ordinary software development in industry. To probe the feasibility and identify various practical problems and needs, we have conducted a 15 month pilot project for developing an enterprise resource planning (ERP) system in SML\#. The project has successfully completed as we have planned, demonstrating the feasibility of SML\#. In particular, seamless integration of SQL and direct C language interface are shown to be useful in reliable and efficient development of a data intensive business application. During the program development, we have found several useful functional programming patterns and a number of possible extensions of an ML-style language with records. This paper reports on the project details and the lessons learned from the project.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Mulligan:2014:LRE, author = "Dominic P. Mulligan and Scott Owens and Kathryn E. Gray and Tom Ridge and Peter Sewell", title = "{Lem}: reusable engineering of real-world semantics", journal = j-SIGPLAN, volume = "49", number = "9", pages = "175--188", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628143", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent years have seen remarkable successes in rigorous engineering: using mathematically rigorous semantic models (not just idealised calculi) of real-world processors, programming languages, protocols, and security mechanisms, for testing, proof, analysis, and design. Building these models is challenging, requiring experimentation, dialogue with vendors or standards bodies, and validation; their scale adds engineering issues akin to those of programming to the task of writing clear and usable mathematics. But language and tool support for specification is lacking. Proof assistants can be used but bring their own difficulties, and a model produced in one, perhaps requiring many person-years effort and maintained over an extended period, cannot be used by those familiar with another. We introduce Lem, a language for engineering reusable large-scale semantic models. The Lem design takes inspiration both from functional programming languages and from proof assistants, and Lem definitions are translatable into OCaml for testing, Coq, HOL4, and Isabelle/HOL for proof, and LaTeX and HTML for presentation. This requires a delicate balance of expressiveness, careful library design, and implementation of transformations --- akin to compilation, but subject to the constraint of producing usable and human-readable code for each target. Lem's effectiveness is demonstrated by its use in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Breitner:2014:SZC, author = "Joachim Breitner and Richard A. Eisenberg and Simon Peyton Jones and Stephanie Weirich", title = "Safe zero-cost coercions for {Haskell}", journal = j-SIGPLAN, volume = "49", number = "9", pages = "189--202", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628141", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Generative type abstractions --- present in Haskell, OCaml, and other languages --- are useful concepts to help prevent programmer errors. They serve to create new types that are distinct at compile time but share a run-time representation with some base type. We present a new mechanism that allows for zero-cost conversions between generative type abstractions and their representations, even when such types are deeply nested. We prove type safety in the presence of these conversions and have implemented our work in GHC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Pottier:2014:HME, author = "Fran{\c{c}}ois Pottier", title = "{Hindley--Milner} elaboration in applicative style: functional pearl", journal = j-SIGPLAN, volume = "49", number = "9", pages = "203--212", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628145", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type inference --- the problem of determining whether a program is well-typed --- is well-understood. In contrast, elaboration --- the task of constructing an explicitly-typed representation of the program --- seems to have received relatively little attention, even though, in a non-local type inference system, it is non-trivial. We show that the constraint-based presentation of Hindley--Milner type inference can be extended to deal with elaboration, while preserving its elegance. This involves introducing a new notion of ``constraint with a value'', which forms an applicative functor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Winograd-Cort:2014:SNI, author = "Daniel Winograd-Cort and Paul Hudak", title = "Settable and non-interfering signal functions for {FRP}: how a first-order switch is more than enough", journal = j-SIGPLAN, volume = "49", number = "9", pages = "213--225", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628140", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional Reactive Programming (FRP) provides a method for programming continuous, reactive systems by utilizing signal functions that, abstractly, transform continuous input signals into continuous output signals. These signals may also be streams of events, and indeed, by allowing signal functions themselves to be the values carried by these events (in essence, signals of signal functions), one can conveniently make discrete changes in program behavior by ``switching'' into and out of these signal functions. This higher-order notion of switching is common among many FRP systems, in particular those based on arrows, such as Yampa. Although convenient, the power of switching is often an overkill and can pose problems for certain types of program optimization (such as causal commutative arrows [14]), as it causes the structure of the program to change dynamically at run-time. Without a notion of just-in-time compilation or related idea, which itself is beset with problems, such optimizations are not possible at compile time. This paper introduces two new ideas that obviate, in a predominance of cases, the need for switching. The first is a non-interference law for arrows with choice that allows an arrowized FRP program to dynamically alter its own structure (within statically limited bounds) as well as abandon unused streams. The other idea is a notion of a settable signal function that allows a signal function to capture its present state and later be restarted from some previous state. With these two features, canonical uses of higher-order switchers can be replaced with a suitable first-order design, thus enabling a broader range of static optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Chen:2014:FPD, author = "Yan Chen and Umut A. Acar and Kanat Tangwongsan", title = "Functional programming for dynamic and large data with self-adjusting computation", journal = j-SIGPLAN, volume = "49", number = "9", pages = "227--240", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628150", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Combining type theory, language design, and empirical work, we present techniques for computing with large and dynamically changing datasets. Based on lambda calculus, our techniques are suitable for expressing a diverse set of algorithms on large datasets and, via self-adjusting computation, enable computations to respond automatically to changes in their data. To improve the scalability of self-adjusting computation, we present a type system for precise dependency tracking that minimizes the time and space for storing dependency metadata. The type system eliminates an important assumption of prior work that can lead to recording spurious dependencies. We present a type-directed translation algorithm that generates correct self-adjusting programs without relying on this assumption. We then show a probabilistic-chunking technique to further decrease space usage by controlling the fundamental space-time tradeoff in self-adjusting computation. We implement and evaluate these techniques, showing promising results on challenging benchmarks involving large graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Weirich:2014:DT, author = "Stephanie Weirich", title = "Depending on types", journal = j-SIGPLAN, volume = "49", number = "9", pages = "241--241", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2631168", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Is Haskell a dependently typed programming language? Should it be? GHC's many type-system features, such as Generalized Algebraic Datatypes (GADTs), datatype promotion, multiparameter type classes, and type families, give programmers the ability to encode domain-specific invariants in their types. Clever Haskell programmers have used these features to enhance the reasoning capabilities of static type checking. But really, how far have we come? Could we do more? In this talk, I will discuss dependently typed programming in Haskell, through examples, analysis and comparisons with modern full-spectrum dependently typed languages, such as Coq, Agda and Idris. What sorts of dependently typed programming can be done in Haskell now? What could GHC learn from these languages? Conversely, what lessons can GHC offer in return?", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Angiuli:2014:HPT, author = "Carlo Angiuli and Edward Morehouse and Daniel R. Licata and Robert Harper", title = "Homotopical patch theory", journal = j-SIGPLAN, volume = "49", number = "9", pages = "243--256", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Homotopy type theory is an extension of Martin-L{\"o}f type theory, based on a correspondence with homotopy theory and higher category theory. In homotopy type theory, the propositional equality type becomes proof-relevant, and corresponds to paths in a space. This allows for a new class of datatypes, called higher inductive types, which are specified by constructors not only for points but also for paths. In this paper, we consider a programming application of higher inductive types. Version control systems such as Darcs are based on the notion of patches --- syntactic representations of edits to a repository. We show how patch theory can be developed in homotopy type theory. Our formulation separates formal theories of patches from their interpretation as edits to repositories. A patch theory is presented as a higher inductive type. Models of a patch theory are given by maps out of that type, which, being functors, automatically preserve the structure of patches. Several standard tools of homotopy theory come into play, demonstrating the use of these methods in a practical programming context.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Cockx:2014:PMK, author = "Jesper Cockx and Dominique Devriese and Frank Piessens", title = "Pattern matching without {K}", journal = j-SIGPLAN, volume = "49", number = "9", pages = "257--268", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628139", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Dependent pattern matching is an intuitive way to write programs and proofs in dependently typed languages. It is reminiscent of both pattern matching in functional languages and case analysis in on-paper mathematics. However, in general it is incompatible with new type theories such as homotopy type theory (HoTT). As a consequence, proofs in such theories are typically harder to write and to understand. The source of this incompatibility is the reliance of dependent pattern matching on the so-called K axiom --- also known as the uniqueness of identity proofs --- which is inadmissible in HoTT. The Agda language supports an experimental criterion to detect definitions by pattern matching that make use of the K axiom, but so far it lacked a formal correctness proof. In this paper, we propose a new criterion for dependent pattern matching without K, and prove it correct by a translation to eliminators in the style of Goguen et al. (2006). Our criterion both allows more good definitions than existing proposals, and solves a previously undetected problem in the criterion offered by Agda. It has been implemented in Agda and is the first to be supported by a formal proof. Thus it brings the benefits of dependent pattern matching to contexts where we cannot assume K, such as HoTT. It also points the way to new forms of dependent pattern matching, for example on higher inductive types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Vazou:2014:RTH, author = "Niki Vazou and Eric L. Seidel and Ranjit Jhala and Dimitrios Vytiniotis and Simon Peyton-Jones", title = "Refinement types for {Haskell}", journal = j-SIGPLAN, volume = "49", number = "9", pages = "269--282", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628161", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "SMT-based checking of refinement types for call-by-value languages is a well-studied subject. Unfortunately, the classical translation of refinement types to verification conditions is unsound under lazy evaluation. When checking an expression, such systems implicitly assume that all the free variables in the expression are bound to values. This property is trivially guaranteed by eager, but does not hold under lazy, evaluation. Thus, to be sound and precise, a refinement type system for Haskell and the corresponding verification conditions must take into account which subset of binders actually reduces to values. We present a stratified type system that labels binders as potentially diverging or not, and that (circularly) uses refinement types to verify the labeling. We have implemented our system in L IQUID H ASKELL and present an experimental evaluation of our approach on more than 10,000 lines of widely used Haskell libraries. We show that L IQUID H ASKELL is able to prove 96\% of all recursive functions terminating, while requiring a modest 1.7 lines of termination-annotations per 100 lines of code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Schwerter:2014:TGE, author = "Felipe Ba{\~n}ados Schwerter and Ronald Garcia and {\'E}ric Tanter", title = "A theory of gradual effect systems", journal = j-SIGPLAN, volume = "49", number = "9", pages = "283--295", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628149", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effect systems have the potential to help software developers, but their practical adoption has been very limited. We conjecture that this limited adoption is due in part to the difficulty of transitioning from a system where effects are implicit and unrestricted to a system with a static effect discipline, which must settle for conservative checking in order to be decidable. To address this hindrance, we develop a theory of gradual effect checking, which makes it possible to incrementally annotate and statically check effects, while still rejecting statically inconsistent programs. We extend the generic type-and-effect framework of Marino and Millstein with a notion of unknown effects, which turns out to be significantly more subtle than unknown types in traditional gradual typing. We appeal to abstract interpretation to develop and validate the concepts of gradual effect checking. We also demonstrate how an effect system formulated in Marino and Millstein's framework can be automatically extended to support gradual checking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{McBride:2014:HKY, author = "Conor Thomas McBride", title = "How to keep your neighbours in order", journal = j-SIGPLAN, volume = "49", number = "9", pages = "297--309", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628163", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "I present a datatype-generic treatment of recursive container types whose elements are guaranteed to be stored in increasing order, with the ordering invariant rolled out systematically. Intervals, lists and binary search trees are instances of the generic treatment. On the journey to this treatment, I report a variety of failed experiments and the transferable learning experiences they triggered. I demonstrate that a total element ordering is enough to deliver insertion and flattening algorithms, and show that (with care about the formulation of the types) the implementations remain as usual. Agda's instance arguments and pattern synonyms maximize the proof search done by the typechecker and minimize the appearance of proofs in program text, often eradicating them entirely. Generalizing to indexed recursive container types, invariants such as size and balance can be expressed in addition to ordering. By way of example, I implement insertion and deletion for 2-3 trees, ensuring both order and balance by the discipline of type checking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Kaki:2014:RFH, author = "Gowtham Kaki and Suresh Jagannathan", title = "A relational framework for higher-order shape analysis", journal = j-SIGPLAN, volume = "49", number = "9", pages = "311--324", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628159", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose the integration of a relational specification framework within a dependent type system capable of verifying complex invariants over the shapes of algebraic datatypes. Our approach is based on the observation that structural properties of such datatypes can often be naturally expressed as inductively-defined relations over the recursive structure evident in their definitions. By interpreting constructor applications (abstractly) in a relational domain, we can define expressive relational abstractions for a variety of complex data structures, whose structural and shape invariants can be automatically verified. Our specification language also allows for definitions of parametric relations for polymorphic data types that enable highly composable specifications and naturally generalizes to higher-order polymorphic functions. We describe an algorithm that translates relational specifications into a decidable fragment of first-order logic that can be efficiently discharged by an SMT solver. We have implemented these ideas in a type checker called CATALYST that is incorporated within the MLton SML compiler. Experimental results and case studies indicate that our verification strategy is both practical and effective.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Marlow:2014:TNF, author = "Simon Marlow and Louis Brandy and Jonathan Coens and Jon Purdy", title = "There is no fork: an abstraction for efficient, concurrent, and concise data access", journal = j-SIGPLAN, volume = "49", number = "9", pages = "325--337", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628144", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a new programming idiom for concurrency, based on Applicative Functors, where concurrency is implicit in the Applicative $ < * > $ operator. The result is that concurrent programs can be written in a natural applicative style, and they retain a high degree of clarity and modularity while executing with maximal concurrency. This idiom is particularly useful for programming against external data sources, where the application code is written without the use of explicit concurrency constructs, while the implementation is able to batch together multiple requests for data from the same source, and fetch data from multiple sources concurrently. Our abstraction uses a cache to ensure that multiple requests for the same data return the same result, which frees the programmer from having to arrange to fetch data only once, which in turn leads to greater modularity. While it is generally applicable, our technique was designed with a particular application in mind: an internal service at Facebook that identifies particular types of content and takes actions based on it. Our application has a large body of business logic that fetches data from several different external sources. The framework described in this paper enables the business logic to execute efficiently by automatically fetching data concurrently; we present some preliminary results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Gibbons:2014:FDS, author = "Jeremy Gibbons and Nicolas Wu", title = "Folding domain-specific languages: deep and shallow embeddings (functional Pearl)", journal = j-SIGPLAN, volume = "49", number = "9", pages = "339--347", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628138", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A domain-specific language can be implemented by embedding within a general-purpose host language. This embedding may be deep or shallow, depending on whether terms in the language construct syntactic or semantic representations. The deep and shallow styles are closely related, and intimately connected to folds; in this paper, we explore that connection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Fredriksson:2014:KNS, author = "Olle Fredriksson and Dan R. Ghica", title = "{Krivine} nets: a semantic foundation for distributed execution", journal = j-SIGPLAN, volume = "49", number = "9", pages = "349--361", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628152", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We define a new approach to compilation to distributed architectures based on networks of abstract machines. Using it we can implement a generalised and fully transparent form of Remote Procedure Call that supports calling higher-order functions across node boundaries, without sending actual code. Our starting point is the classic Krivine machine, which implements reduction for untyped call-by-name PCF. We successively add the features that we need for distributed execution and show the correctness of each addition. Then we construct a two-level operational semantics, where the high level is a network of communicating machines, and the low level is given by local machine transitions. Using these networks, we arrive at our final system, the Krivine Net. We show that Krivine Nets give a correct distributed implementation of the Krivine machine, which preserves both termination and non-termination properties. All the technical results have been formalised and proved correct in Agda. We also implement a prototype compiler which we compare with previous distributing compilers based on Girard's Geometry of Interaction and on Game Semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Accattoli:2014:DAM, author = "Beniamino Accattoli and Pablo Barenbaum and Damiano Mazza", title = "Distilling abstract machines", journal = j-SIGPLAN, volume = "49", number = "9", pages = "363--376", month = sep, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692915.2628154", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is well-known that many environment-based abstract machines can be seen as strategies in lambda calculi with explicit substitutions (ES). Recently, graphical syntaxes and linear logic led to the linear substitution calculus (LSC), a new approach to ES that is halfway between small-step calculi and traditional calculi with ES. This paper studies the relationship between the LSC and environment-based abstract machines. While traditional calculi with ES simulate abstract machines, the LSC rather distills them: some transitions are simulated while others vanish, as they map to a notion of structural congruence. The distillation process unveils that abstract machines in fact implement weak linear head reduction, a notion of evaluation having a central role in the theory of linear logic. We show that such a pattern applies uniformly in call-by-name, call-by-value, and call-by-need, catching many machines in the literature. We start by distilling the KAM, the CEK, and a sketch of the ZINC, and then provide simplified versions of the SECD, the lazy KAM, and Sestoft's machine. Along the way we also introduce some new machines with global environments. Moreover, we show that distillation preserves the time complexity of the executions, i.e. the LSC is a complexity-preserving abstraction of abstract machines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '14 conference proceedings.", } @Article{Chong:2014:CCT, author = "Stephen Chong", title = "Checking correctness of {TypeScript} interfaces for {JavaScript} libraries", journal = j-SIGPLAN, volume = "49", number = "10", pages = "1--16", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660215", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The TypeScript programming language adds optional types to JavaScript, with support for interaction with existing JavaScript libraries via interface declarations. Such declarations have been written for hundreds of libraries, but they can be difficult to write and often contain errors, which may affect the type checking and misguide code completion for the application code in IDEs. We present a pragmatic approach to check correctness of TypeScript declaration files with respect to JavaScript library implementations. The key idea in our algorithm is that many declaration errors can be detected by an analysis of the library initialization state combined with a light-weight static analysis of the library function code. Our experimental results demonstrate the effectiveness of the approach: it has found 142 errors in the declaration files of 10 libraries, with an analysis time of a few minutes per library and with a low number of false positives. Our analysis of how programmers use library interface declarations furthermore reveals some practical limitations of the TypeScript type system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Andreasen:2014:DSA, author = "Esben Andreasen and Anders M{\o}ller", title = "Determinacy in static analysis for {jQuery}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "17--31", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660214", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static analysis for JavaScript can potentially help programmers find errors early during development. Although much progress has been made on analysis techniques, a major obstacle is the prevalence of libraries, in particular jQuery, which apply programming patterns that have detrimental consequences on the analysis precision and performance. Previous work on dynamic determinacy analysis has demonstrated how information about program expressions that always resolve to a fixed value in some call context may lead to significant scalability improvements of static analysis for such code. We present a static dataflow analysis for JavaScript that infers and exploits determinacy information on-the-fly, to enable analysis of some of the most complex parts of jQuery. The analysis combines selective context and path sensitivity, constant propagation, and branch pruning, based on a systematic investigation of the main causes of analysis imprecision when using a more basic analysis. The techniques are implemented in the TAJS analysis tool and evaluated on a collection of small programs that use jQuery. Our results show that the proposed analysis techniques boost both precision and performance, specifically for inferring type information and call graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Pradel:2014:EAR, author = "Michael Pradel and Parker Schuh and George Necula and Koushik Sen", title = "{EventBreak}: analyzing the responsiveness of user interfaces through performance-guided test generation", journal = j-SIGPLAN, volume = "49", number = "10", pages = "33--47", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660233", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Event-driven user interface applications typically have a single thread of execution that processes event handlers in response to input events triggered by the user, the network, or other applications. Programmers must ensure that event handlers terminate after a short amount of time because otherwise, the application may become unresponsive. This paper presents EventBreak, a performance-guided test generation technique to identify and analyze event handlers whose execution time may gradually increase while using the application. The key idea is to systematically search for pairs of events where triggering one event increases the execution time of the other event. For example, this situation may happen because one event accumulates data that is processed by the other event. We implement the approach for JavaScript-based web applications and apply it to three real-world applications. EventBreak discovers events with an execution time that gradually increases in an unbounded way, which makes the application unresponsive, and events that, if triggered repeatedly, reveal a severe scalability problem, which makes the application unusable. The approach reveals two known bugs and four previously unknown responsiveness problems. Furthermore, we show that EventBreak helps in testing that event handlers avoid such problems by bounding a handler's execution time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Hsiao:2014:UWC, author = "Chun-Hung Hsiao and Michael Cafarella and Satish Narayanasamy", title = "Using web corpus statistics for program analysis", journal = j-SIGPLAN, volume = "49", number = "10", pages = "49--65", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660226", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several program analysis tools --- such as plagiarism detection and bug finding --- rely on knowing a piece of code's relative semantic importance. For example, a plagiarism detector should not bother reporting two programs that have an identical simple loop counter test, but should report programs that share more distinctive code. Traditional program analysis techniques ( e.g., finding data and control dependencies) are useful, but do not say how surprising or common a line of code is. Natural language processing researchers have encountered a similar problem and addressed it using an n -gram model of text frequency, derived from statistics computed over text corpora. We propose and compute an n -gram model for programming languages, computed over a corpus of 2.8 million JavaScript programs we downloaded from the Web. In contrast to previous techniques, we describe a code n -gram as a subgraph of the program dependence graph that contains all nodes and edges reachable in n steps from the statement. We can count n -grams in a program and count the frequency of n -grams in the corpus, enabling us to compute tf-idf -style measures that capture the differing importance of different lines of code. We demonstrate the power of this approach by implementing a plagiarism detector with accuracy that beats previous techniques, and a bug-finding tool that discovered over a dozen previously unknown bugs in a collection of real deployed programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Barr:2014:TAT, author = "Earl T. Barr and Mark Marron", title = "{Tardis}: affordable time-travel debugging in managed runtimes", journal = j-SIGPLAN, volume = "49", number = "10", pages = "67--82", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660209", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developers who set a breakpoint a few statements too late or who are trying to diagnose a subtle bug from a single core dump often wish for a time-traveling debugger. The ability to rewind time to see the exact sequence of statements and program values leading to an error has great intuitive appeal but, due to large time and space overheads, time traveling debuggers have seen limited adoption. A managed runtime, such as the Java JVM or a JavaScript engine, has already paid much of the cost of providing core features --- type safety, memory management, and virtual IO --- that can be reused to implement a low overhead time-traveling debugger. We leverage this insight to design and build affordable time-traveling debuggers for managed languages. Tardis realizes our design: it provides affordable time-travel with an average overhead of only 7\% during normal execution, a rate of 0.6MB/s of history logging, and a worst-case 0.68s time-travel latency on our benchmark applications. Tardis can also debug optimized code using time-travel to reconstruct state. This capability, coupled with its low overhead, makes Tardis suitable for use as the default debugger for managed languages, promising to bring time-traveling debugging into the mainstream and transform the practice of debugging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Bell:2014:PID, author = "Jonathan Bell and Gail Kaiser", title = "{Phosphor}: illuminating dynamic data flow in commodity {JVMs}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "83--101", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660212", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Dynamic taint analysis is a well-known information flow analysis problem with many possible applications. Taint tracking allows for analysis of application data flow by assigning labels to data, and then propagating those labels through data flow. Taint tracking systems traditionally compromise among performance, precision, soundness, and portability. Performance can be critical, as these systems are often intended to be deployed to production environments, and hence must have low overhead. To be deployed in security-conscious settings, taint tracking must also be sound and precise. Dynamic taint tracking must be portable in order to be easily deployed and adopted for real world purposes, without requiring recompilation of the operating system or language interpreter, and without requiring access to application source code. We present Phosphor, a dynamic taint tracking system for the Java Virtual Machine (JVM) that simultaneously achieves our goals of performance, soundness, precision, and portability. Moreover, to our knowledge, it is the first portable general purpose taint tracking system for the JVM. We evaluated Phosphor 's performance on two commonly used JVM languages (Java and Scala), on two successive revisions of two commonly used JVMs (Oracle's HotSpot and OpenJDK's IcedTea) and on Android's Dalvik Virtual Machine, finding its performance to be impressive: as low as 3\% (53\% on average; 220\% at worst) using the DaCapo macro benchmark suite. This paper describes our approach toward achieving portable taint tracking in the JVM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Pina:2014:RDJ, author = "Lu{\'\i}s Pina and Lu{\'\i}s Veiga and Michael Hicks", title = "{Rubah}: {DSU} for {Java} on a stock {JVM}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "103--119", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660220", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents Rubah, the first dynamic software updating system for Java that: is portable, implemented via libraries and bytecode rewriting on top of a standard JVM; is efficient, imposing essentially no overhead on normal, steady-state execution; is flexible, allowing nearly arbitrary changes to classes between updates; and is non-disruptive, employing either a novel eager algorithm that transforms the program state with multiple threads, or a novel lazy algorithm that transforms objects as they are demanded, post-update. Requiring little programmer effort, Rubah has been used to dynamically update five long-running applications: the H2 database, the Voldemort key-value store, the Jake2 implementation of the Quake 2 shooter game, the CrossFTP server, and the JavaEmailServer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Shahriyar:2014:FCG, author = "Rifat Shahriyar and Stephen M. Blackburn and Kathryn S. McKinley", title = "Fast conservative garbage collection", journal = j-SIGPLAN, volume = "49", number = "10", pages = "121--139", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660198", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Garbage collectors are exact or conservative. An exact collector identifies all references precisely and may move referents and update references, whereas a conservative collector treats one or more of stack, register, and heap references as ambiguous. Ambiguous references constrain collectors in two ways. (1) Since they may be pointers, the collectors must retain referents. (2) Since they may be values, the collectors cannot modify them, pinning their referents. We explore conservative collectors for managed languages, with ambiguous stacks and registers. We show that for Java benchmarks they retain and pin remarkably few heap objects: $ < 0.01 \% $ are falsely retained and 0.03\% are pinned. The larger effect is collector design. Prior conservative collectors (1) use mark-sweep and unnecessarily forgo moving all objects, or (2) use mostly copying and pin entire pages. Compared to generational collection, overheads are substantial: 12\% and 45\% respectively. We introduce high performance conservative Immix and reference counting (RC). Immix is a mark-region collector with fine line -grain pinning and opportunistic copying of unambiguous referents. Deferred RC simply needs an object map to deliver the first conservative RC. We implement six exact collectors and their conservative counterparts. Conservative Immix and RC come within 2 to 3\% of their exact counterparts. In particular, conservative RC Immix is slightly faster than a well-tuned exact generational collector. These findings show that for managed languages, conservative collection is compatible with high performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Holk:2014:RBM, author = "Eric Holk and Ryan Newton and Jeremy Siek and Andrew Lumsdaine", title = "Region-based memory management for {GPU} programming languages: enabling rich data structures on a spartan host", journal = j-SIGPLAN, volume = "49", number = "10", pages = "141--155", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660244", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics processing units (GPUs) can effectively accelerate many applications, but their applicability has been largely limited to problems whose solutions can be expressed neatly in terms of linear algebra. Indeed, most GPU programming languages limit the user to simple data structures --- typically only multidimensional rectangular arrays of scalar values. Many algorithms are more naturally expressed using higher level language features, such as algebraic data types (ADTs) and first class procedures, yet building these structures in a manner suitable for a GPU remains a challenge. We present a region-based memory management approach that enables rich data structures in Harlan, a language for data parallel computing. Regions enable rich data structures by providing a uniform representation for pointers on both the CPU and GPU and by providing a means of transferring entire data structures between CPU and GPU memory. We demonstrate Harlan's increased expressiveness on several example programs and show that Harlan performs well on more traditional data-parallel problems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Uhler:2014:SSB, author = "Richard Uhler and Nirav Dave", title = "{Smten} with satisfiability-based search", journal = j-SIGPLAN, volume = "49", number = "10", pages = "157--176", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660208", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Satisfiability (SAT) and Satisfiability Modulo Theories (SMT) have been used in solving a wide variety of important and challenging problems, including automatic test generation, model checking, and program synthesis. For these applications to scale to larger problem instances, developers cannot rely solely on the sophistication of SAT and SMT solvers to efficiently solve their queries; they must also optimize their own orchestration and construction of queries. We present Smten, a high-level language for orchestrating and constructing satisfiability-based search queries. We show that applications developed using Smten require significantly fewer lines of code and less developer effort to achieve results comparable to standard SMT-based tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Bosboom:2014:SCC, author = "Jeffrey Bosboom and Sumanaruban Rajadurai and Weng-Fai Wong and Saman Amarasinghe", title = "{StreamJIT}: a commensal compiler for high-performance stream programming", journal = j-SIGPLAN, volume = "49", number = "10", pages = "177--195", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660236", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There are many domain libraries, but despite the performance benefits of compilation, domain-specific languages are comparatively rare due to the high cost of implementing an optimizing compiler. We propose commensal compilation, a new strategy for compiling embedded domain-specific languages by reusing the massive investment in modern language virtual machine platforms. Commensal compilers use the host language's front-end, use host platform APIs that enable back-end optimizations by the host platform JIT, and use an autotuner for optimization selection. The cost of implementing a commensal compiler is only the cost of implementing the domain-specific optimizations. We demonstrate the concept by implementing a commensal compiler for the stream programming language StreamJIT atop the Java platform. Our compiler achieves performance 2.8 times better than the StreamIt native code (via GCC) compiler with considerably less implementation effort.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Tosch:2014:SPA, author = "Emma Tosch and Emery D. Berger", title = "{SurveyMan}: programming and automatically debugging surveys", journal = j-SIGPLAN, volume = "49", number = "10", pages = "197--211", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660206", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Surveys can be viewed as programs, complete with logic, control flow, and bugs. Word choice or the order in which questions are asked can unintentionally bias responses. Vague, confusing, or intrusive questions can cause respondents to abandon a survey. Surveys can also have runtime errors: inattentive respondents can taint results. This effect is especially problematic when deploying surveys in uncontrolled settings, such as on the web or via crowdsourcing platforms. Because the results of surveys drive business decisions and inform scientific conclusions, it is crucial to make sure they are correct. We present SurveyMan, a system for designing, deploying, and automatically debugging surveys. Survey authors write their surveys in a lightweight domain-specific language aimed at end users. SurveyMan statically analyzes the survey to provide feedback to survey authors before deployment. It then compiles the survey into JavaScript and deploys it either to the web or a crowdsourcing platform. SurveyMan 's dynamic analyses automatically find survey bugs, and control for the quality of responses. We evaluate SurveyMan 's algorithms analytically and empirically, demonstrating its effectiveness with case studies of social science surveys conducted via Amazon's Mechanical Turk.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Bartenstein:2014:RTS, author = "Thomas W. Bartenstein and Yu David Liu", title = "Rate types for stream programs", journal = j-SIGPLAN, volume = "49", number = "10", pages = "213--232", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660225", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce RATE TYPES, a novel type system to reason about and optimize data-intensive programs. Built around stream languages, RATE TYPES performs static quantitative reasoning about stream rates --- the frequency of data items in a stream being consumed, processed, and produced. Despite the fact that streams are fundamentally dynamic, we find two essential concepts of stream rate control --- throughput ratio and natural rate --- are intimately related to the program structure itself and can be effectively reasoned about by a type system. RATE TYPES is proven to correspond with a time-aware and parallelism-aware operational semantics. The strong correspondence result tolerates arbitrary schedules, and does not require any synchronization between stream filters.We further implement RATE TYPES, demonstrating its effectiveness in predicting stream data rates in real-world stream programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Amin:2014:FPD, author = "Nada Amin and Tiark Rompf and Martin Odersky", title = "Foundations of path-dependent types", journal = j-SIGPLAN, volume = "49", number = "10", pages = "233--249", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660216", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A scalable programming language is one in which the same concepts can describe small as well as large parts. Towards this goal, Scala unifies concepts from object and module systems. An essential ingredient of this unification is the concept of objects with type members, which can be referenced through path-dependent types. Unfortunately, path-dependent types are not well-understood, and have been a roadblock in grounding the Scala type system on firm theory. We study several calculi for path-dependent types. We present DOT which captures the essence --- DOT stands for Dependent Object Types. We explore the design space bottom-up, teasing apart inherent from accidental complexities, while fully mechanizing our models at each step. Even in this simple setting, many interesting patterns arise from the interaction of structural and nominal features. Whereas our simple calculus enjoys many desirable and intuitive properties, we demonstrate that the theory gets much more complicated once we add another Scala feature, type refinement, or extend the subtyping relation to a lattice. We discuss possible remedies and trade-offs in modeling type systems for Scala-like languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Allende:2014:CGT, author = "Esteban Allende and Johan Fabry and Ronald Garcia and {\'E}ric Tanter", title = "Confined gradual typing", journal = j-SIGPLAN, volume = "49", number = "10", pages = "251--270", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660222", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Gradual typing combines static and dynamic typing flexibly and safely in a single programming language. To do so, gradually typed languages implicitly insert casts where needed, to ensure at runtime that typing assumptions are not violated by untyped code. However, the implicit nature of cast insertion, especially on higher-order values, can jeopardize reliability and efficiency: higher-order casts can fail at any time, and are costly to execute. We propose Confined Gradual Typing, which extends gradual typing with two new type qualifiers that let programmers control the flow of values between the typed and the untyped worlds, and thereby trade some flexibility for more reliability and performance. We formally develop two variants of Confined Gradual Typing that capture different flexibility/guarantee tradeoffs. We report on the implementation of Confined Gradual Typing in Gradualtalk, a gradually-typed Smalltalk, which confirms the performance advantage of avoiding unwanted higher-order casts and the low overhead of the approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Altidor:2014:RJG, author = "John Altidor and Yannis Smaragdakis", title = "Refactoring {Java} generics by inferring wildcards, in practice", journal = j-SIGPLAN, volume = "49", number = "10", pages = "271--290", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660203", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Wildcard annotations can improve the generality of Java generic libraries, but require heavy manual effort. We present an algorithm for refactoring and inferring more general type instantiations of Java generics using wildcards. Compared to past approaches, our work is practical and immediately applicable: we assume no changes to the Java type system, while taking into account all its intricacies. Our system allows users to select declarations (variables, method parameters, return types, etc.) to generalize and considers declarations not declared in available source code. It then performs an inter-procedural flow analysis and a method body analysis, in order to generalize type signatures. We evaluate our technique on six Java generic libraries. We find that 34\% of available declarations of variant type signatures can be generalized --- i.e., relaxed with more general wildcard types. On average, 146 other declarations need to be updated when a declaration is generalized, showing that this refactoring would be too tedious and error-prone to perform manually.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{David:2014:CMC, author = "Florian David and Gael Thomas and Julia Lawall and Gilles Muller", title = "Continuously measuring critical section pressure with the free-lunch profiler", journal = j-SIGPLAN, volume = "49", number = "10", pages = "291--307", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660210", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today, Java is regularly used to implement large multi-threaded server-class applications that use locks to protect access to shared data. However, understanding the impact of locks on the performance of a system is complex, and thus the use of locks can impede the progress of threads on configurations that were not anticipated by the developer, during specific phases of the execution. In this paper, we propose Free Lunch, a new lock profiler for Java application servers, specifically designed to identify, in-vivo, phases where the progress of the threads is impeded by a lock. Free Lunch is designed around a new metric, critical section pressure (CSP), which directly correlates the progress of the threads to each of the locks. Using Free Lunch, we have identified phases of high CSP, which were hidden with other lock profilers, in the distributed Cassandra NoSQL database and in several applications from the DaCapo 9.12, the SPECjvm2008 and the SPECjbb2005 benchmark suites. Our evaluation of Free Lunch shows that its overhead is never greater than 6\%, making it suitable for in-vivo use.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Misailovic:2014:CRA, author = "Sasa Misailovic and Michael Carbin and Sara Achour and Zichao Qi and Martin C. Rinard", title = "{Chisel}: reliability- and accuracy-aware optimization of approximate computational kernels", journal = j-SIGPLAN, volume = "49", number = "10", pages = "309--328", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660231", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The accuracy of an approximate computation is the distance between the result that the computation produces and the corresponding fully accurate result. The reliability of the computation is the probability that it will produce an acceptably accurate result. Emerging approximate hardware platforms provide approximate operations that, in return for reduced energy consumption and/or increased performance, exhibit reduced reliability and/or accuracy. We present Chisel, a system for reliability- and accuracy-aware optimization of approximate computational kernels that run on approximate hardware platforms. Given a combined reliability and/or accuracy specification, Chisel automatically selects approximate kernel operations to synthesize an approximate computation that minimizes energy consumption while satisfying its reliability and accuracy specification. We evaluate Chisel on five applications from the image processing, scientific computing, and financial analysis domains. The experimental results show that our implemented optimization algorithm enables Chisel to optimize our set of benchmark kernels to obtain energy savings from 8.7\% to 19.8\% compared to the fully reliable kernel implementations while preserving important reliability guarantees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Kambadur:2014:ESE, author = "Melanie Kambadur and Martha A. Kim", title = "An experimental survey of energy management across the stack", journal = j-SIGPLAN, volume = "49", number = "10", pages = "329--344", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660196", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern demand for energy-efficient computation has spurred research at all levels of the stack, from devices to microarchitecture, operating systems, compilers, and languages. Unfortunately, this breadth has resulted in a disjointed space, with technologies at different levels of the system stack rarely compared, let alone coordinated. This work begins to remedy the problem, conducting an experimental survey of the present state of energy management across the stack. Focusing on settings that are exposed to software, we measure the total energy, average power, and execution time of 41 benchmark applications in 220 configurations, across a total of 200,000 program executions. Some of the more important findings of the survey include that effective parallelization and compiler optimizations have the potential to save far more energy than Linux's frequency tuning algorithms; that certain non-complementary energy strategies can undercut each other's savings by half when combined; and that while the power impacts of most strategies remain constant across applications, the runtime impacts vary, resulting in inconsistent energy impacts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Pinto:2014:UEB, author = "Gustavo Pinto and Fernando Castor and Yu David Liu", title = "Understanding energy behaviors of thread management constructs", journal = j-SIGPLAN, volume = "49", number = "10", pages = "345--360", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660235", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Java programmers are faced with numerous choices in managing concurrent execution on multicore platforms. These choices often have different trade-offs (e.g., performance, scalability, and correctness guarantees). This paper analyzes an additional dimension, energy consumption. It presents an empirical study aiming to illuminate the relationship between the choices and settings of thread management constructs and energy consumption. We consider three important thread management constructs in concurrent programming: explicit thread creation, fixed-size thread pooling, and work stealing. We further shed light on the energy/performance trade-off of three ``tuning knobs'' of these constructs: the number of threads, the task division strategy, and the characteristics of processed data. Through an extensive experimental space exploration over real-world Java programs, we produce a list of findings about the energy behaviors of concurrent programs, which are not always obvious. The study serves as a first step toward improving energy efficiency of concurrent programs on parallel architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Drechsler:2014:DRU, author = "Joscha Drechsler and Guido Salvaneschi and Ragnar Mogk and Mira Mezini", title = "Distributed {REScala}: an update algorithm for distributed reactive programming", journal = j-SIGPLAN, volume = "49", number = "10", pages = "361--376", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660240", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reactive programming improves the design of reactive applications by relocating the logic for managing dependencies between dependent values away from the application logic to the language implementation. Many distributed applications are reactive. Yet, existing change propagation algorithms are not suitable in a distributed setting. We propose Distributed REScala, a reactive language with a change propagation algorithm that works without centralized knowledge about the topology of the dependency structure among reactive values and avoids unnecessary propagation of changes, while retaining safety guarantees ( glitch freedom ). Distributed REScala enables distributed reactive programming, bringing the benefits of reactive programming to distributed applications. We demonstrate the enabled design improvements by a case study. We also empirically evaluate the performance of our algorithm in comparison to other algorithms in a simulated distributed setting.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Rendel:2014:OAA, author = "Tillmann Rendel and Jonathan Immanuel Brachth{\"a}user and Klaus Ostermann", title = "From object algebras to attribute grammars", journal = j-SIGPLAN, volume = "49", number = "10", pages = "377--395", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660237", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Oliveira and Cook (2012) and Oliveira et al. (2013) have recently introduced object algebras as a program structuring technique to improve the modularity and extensibility of programs. We analyze the relationship between object algebras and attribute grammars (AGs), a formalism to augment context-free grammars with attributes. We present an extension of the object algebra technique with which the full class of L-attributed grammars --- an important class of AGs that corresponds to one-pass compilers --- can be encoded in Scala. The encoding is modular (attributes can be defined and type-checked separately), scalable (the size of the encoding is linear in the size of the AG specification) and compositional (each AG artifact is represented as a semantic object of the host language). To evaluate these claims, we have formalized the encoding and re-implemented a one-pass compiler for a subset of C with our technique. We also discuss how advanced features of modern AG systems, such as higher-order and parameterized attributes, reference attributes, and forwarding can be supported.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Ureche:2014:LDL, author = "Vlad Ureche and Eugene Burmako and Martin Odersky", title = "Late data layout: unifying data representation transformations", journal = j-SIGPLAN, volume = "49", number = "10", pages = "397--416", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660197", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Values need to be represented differently when interacting with certain language features. For example, an integer has to take an object-based representation when interacting with erased generics, although, for performance reasons, the stack-based value representation is better. To abstract over these implementation details, some programming languages choose to expose a unified high-level concept (the integer) and let the compiler choose its exact representation and insert coercions where necessary. This pattern appears in multiple language features such as value classes, specialization and multi-stage programming: they all expose a unified concept which they later refine into multiple representations. Yet, the underlying compiler implementations typically entangle the core mechanism with assumptions about the alternative representations and their interaction with other language features. In this paper we present the Late Data Layout mechanism, a simple but versatile type-driven generalization that subsumes and improves the state-of-the-art representation transformations. In doing so, we make two key observations: (1) annotated types conveniently capture the semantics of using multiple representations and (2) local type inference can be used to consistently and optimally introduce coercions. We validated our approach by implementing three language features as Scala compiler extensions: value classes, specialization (using the miniboxing representation) and a simplified multi-stage programming mechanism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Mitschke:2014:ILI, author = "Ralf Mitschke and Sebastian Erdweg and Mirko K{\"o}hler and Mira Mezini and Guido Salvaneschi", title = "{i3QL}: language-integrated live data views", journal = j-SIGPLAN, volume = "49", number = "10", pages = "417--432", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660242", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An incremental computation updates its result based on a change to its input, which is often an order of magnitude faster than a recomputation from scratch. In particular, incrementalization can make expensive computations feasible for settings that require short feedback cycles, such as interactive systems, IDEs, or (soft) real-time systems. This paper presents i3QL, a general-purpose programming language for specifying incremental computations. i3QL provides a declarative SQL-like syntax and is based on incremental versions of operators from relational algebra, enriched with support for general recursion. We integrated i3QL into Scala as a library, which enables programmers to use regular Scala code for non-incremental subcomputations of an i3QL query and to easily integrate incremental computations into larger software projects. To improve performance, i3QL optimizes user-defined queries by applying algebraic laws and partial evaluation. We describe the design and implementation of i3QL and its optimizations, demonstrate its applicability, and evaluate its performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Chakrabarti:2014:ALL, author = "Dhruva R. Chakrabarti and Hans-J. Boehm and Kumud Bhandari", title = "{Atlas}: leveraging locks for non-volatile memory consistency", journal = j-SIGPLAN, volume = "49", number = "10", pages = "433--452", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660224", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-volatile main memory, such as memristors or phase change memory, can revolutionize the way programs persist data. In-memory objects can themselves be persistent without the need for a separate persistent data storage format. However, the challenge is to ensure that such data remains consistent if a failure occurs during execution. In this paper, we present our system, called Atlas, which adds durability semantics to lock-based code, typically allowing us to automatically maintain a globally consistent state even in the presence of failures. We identify failure-atomic sections of code based on existing critical sections and describe a log-based implementation that can be used to recover a consistent state after a failure. We discuss several subtle semantic issues and implementation tradeoffs. We confirm the ability to rapidly flush CPU caches as a core implementation bottleneck and suggest partial solutions. Experimental results confirm the practicality of our approach and provide insight into the overheads of such a system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Steele:2014:FSP, author = "Guy L. {Steele, Jr.} and Doug Lea and Christine H. Flood", title = "Fast splittable pseudorandom number generators", journal = j-SIGPLAN, volume = "49", number = "10", pages = "453--472", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660195", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/m/marsaglia-george.bib; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/jstatsoft.bib; https://www.math.utah.edu/pub/tex/bib/mathcw.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/tomacs.bib", abstract = "We describe a new algorithm SplitMix for an object-oriented and splittable pseudorandom number generator (PRNG) that is quite fast: 9 64-bit arithmetic/logical operations per 64 bits generated. A conventional linear PRNG object provides a generate method that returns one pseudorandom value and updates the state of the PRNG, but a splittable PRNG object also has a second operation, split, that replaces the original PRNG object with two (seemingly) independent PRNG objects, by creating and returning a new such object and updating the state of the original object. Splittable PRNG objects make it easy to organize the use of pseudorandom numbers in multithreaded programs structured using fork-join parallelism. No locking or synchronization is required (other than the usual memory fence immediately after object creation). Because the generate method has no loops or conditionals, it is suitable for SIMD or GPU implementation. We derive SplitMix from the DotMix algorithm of Leiserson, Schardl, and Sukha by making a series of program transformations and engineering improvements. The end result is an object-oriented version of the purely functional API used in the Haskell library for over a decade, but SplitMix is faster and produces pseudorandom sequences of higher quality; it is also far superior in quality and speed to java.util.Random, and has been included in Java JDK8 as the class java.util.SplittableRandom. We have tested the pseudorandom sequences produced by SplitMix using two standard statistical test suites (DieHarder and TestU01) and they appear to be adequate for ``everyday'' use, such as in Monte Carlo algorithms and randomized data structures where speed is important.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark-1 = "OOPSLA '14 conference proceedings.", remark-2 = "On page 466, the authors describe an interesting technique for improving a user-supplied seed that might produce insufficient randomness in the next several members of the random-number sequence: ``Long runs of 0-bits or of 1-bits in the $\gamma$ [candidate seed] value do not cause bits of the seed to flip; an approximate proxy for how many bits of the seed will flip might be the number of bit pairs of the form 01 or 10 in the candidate $\gamma$ value {\tt z}. Therefore we require that the number of such pairs, as computed by {\tt Long.bitCount(z ^ (z >>> 1))}, exceed 24; if it does not, then the candidate z is replaced by the XOR of {\tt z} and {\tt 0xaaaaaaaaaaaaaaaaL}, a constant chosen so that (a) the low bit of {\tt z} remains 1, and (b) every bit pair of the form 00 or 11 becomes either 01 or 10, and likewise every bit pair of the form 01 or 10 becomes either 00 or 11, so the new value necessarily has more than 24 bit pairs whose bits differ. Testing shows that this trick appears to be effective.''", remark-3 = "From page 468: ``we did three runs of TestU01 BigCrush on {\tt java.util.Random}; 19 tests produced clear failure on all three runs. These included 9 Birthday Spacings tests, 8 ClosePairs tests, a WeightDistrib test, and a CouponCollector test. This confirms L'Ecuyer's observation that {\tt java.util.Random} tends to fail Birthday Spacings tests [17].'' The reference is to \cite{LEcuyer:2001:SUR}.", remark-4 = "From page 470: ``[L'Ecuyer] comments, `In the Java class {\tt java.util.Random}, RNG streams can be declared and constructed dynamically, without limit on their number. However, no precaution seems to have been taken regarding the independence of these streams.'''", remark-5 = "From page 471: ``They [the generators in this paper] should not be used for cryptographic or security applications, because they are too predictable (the mixing functions are easily inverted, and two successive outputs suffice to reconstruct the internal state), \ldots{} One version seems especially suitable for use as a replacement for {\tt java.util.Random}, because it produces sequences of higher quality, is faster in sequential use, is easily parallelized for use in JDK8 stream expressions, and is amenable to efficient implementation on SIMD and GPU architectures.''", } @Article{Samak:2014:MTS, author = "Malavika Samak and Murali Krishna Ramanathan", title = "Multithreaded test synthesis for deadlock detection", journal = j-SIGPLAN, volume = "49", number = "10", pages = "473--489", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660238", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Designing and implementing thread-safe multithreaded libraries can be a daunting task as developers of these libraries need to ensure that their implementations are free from concurrency bugs, including deadlocks. The usual practice involves employing software testing and/or dynamic analysis to detect deadlocks. Their effectiveness is dependent on well-designed multithreaded test cases. Unsurprisingly, developing multithreaded tests is significantly harder than developing sequential tests for obvious reasons. In this paper, we address the problem of automatically synthesizing multithreaded tests that can induce deadlocks. The key insight to our approach is that a subset of the properties observed when a deadlock manifests in a concurrent execution can also be observed in a single threaded execution. We design a novel, automatic, scalable and directed approach that identifies these properties and synthesizes a deadlock revealing multithreaded test. The input to our approach is the library implementation under consideration and the output is a set of deadlock revealing multithreaded tests. We have implemented our approach as part of a tool, named OMEN$^1$. OMEN is able to synthesize multithreaded tests on many multithreaded Java libraries. Applying a dynamic deadlock detector on the execution of the synthesized tests results in the detection of a number of deadlocks, including 35 real deadlocks in classes documented as thread-safe. Moreover, our experimental results show that dynamic analysis on multithreaded tests that are either synthesized randomly or developed by third-party programmers are ineffective in detecting the deadlocks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Bergan:2014:SEM, author = "Tom Bergan and Dan Grossman and Luis Ceze", title = "Symbolic execution of multithreaded programs from arbitrary program contexts", journal = j-SIGPLAN, volume = "49", number = "10", pages = "491--506", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660200", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe an algorithm to perform symbolic execution of a multithreaded program starting from an arbitrary program context. We argue that this can enable more efficient symbolic exploration of deep code paths in multithreaded programs by allowing the symbolic engine to jump directly to program contexts of interest. The key challenge is modeling the initial context with reasonable precision --- an overly approximate model leads to exploration of many infeasible paths during symbolic execution, while a very precise model would be so expensive to compute that computing it would defeat the purpose of jumping directly to the initial context in the first place. We propose a context-specific dataflow analysis that approximates the initial context cheaply, but precisely enough to avoid some common causes of infeasible-path explosion. This model is necessarily approximate --- it may leave portions of the memory state unconstrained, leaving our symbolic execution unable to answer simple questions such as ``which thread holds lock A?''. For such cases, we describe a novel algorithm for evaluating symbolic synchronization during symbolic execution. Our symbolic execution semantics are sound and complete up to the limits of the underlying SMT solver. We describe initial experiments on an implementation in Cloud 9.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Barowy:2014:CDD, author = "Daniel W. Barowy and Dimitar Gochev and Emery D. Berger", title = "{CheckCell}: data debugging for spreadsheets", journal = j-SIGPLAN, volume = "49", number = "10", pages = "507--523", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660207", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Testing and static analysis can help root out bugs in programs, but not in data. This paper introduces data debugging, an approach that combines program analysis and statistical analysis to automatically find potential data errors. Since it is impossible to know a priori whether data are erroneous, data debugging instead locates data that has a disproportionate impact on the computation. Such data is either very important, or wrong. Data debugging is especially useful in the context of data-intensive programming environments that intertwine data with programs in the form of queries or formulas. We present the first data debugging tool, CheckCell, an add-in for Microsoft Excel. CheckCell identifies cells that have an unusually high impact on the spreadsheet's computations. We show that CheckCell is both analytically and empirically fast and effective. We show that it successfully finds injected typographical errors produced by a generative model trained with data entry from 169,112 Mechanical Turk tasks. CheckCell is more precise and efficient than standard outlier detection techniques. CheckCell also automatically identifies a key flaw in the infamous Reinhart and Rogoff spreadsheet.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Pavlinovic:2014:FMT, author = "Zvonimir Pavlinovic and Tim King and Thomas Wies", title = "Finding minimum type error sources", journal = j-SIGPLAN, volume = "49", number = "10", pages = "525--542", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660230", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Automatic type inference is a popular feature of functional programming languages. If a program cannot be typed, the compiler typically reports a single program location in its error message. This location is the point where the type inference failed, but not necessarily the actual source of the error. Other potential error sources are not even considered. Hence, the compiler often misses the true error source, which increases debugging time for the programmer. In this paper, we present a general framework for automatic localization of type errors. Our algorithm finds all minimum error sources, where the exact definition of minimum is given in terms of a compiler-specific ranking criterion. Compilers can use minimum error sources to produce more meaningful error reports, and for automatic error correction. Our approach works by reducing the search for minimum error sources to an optimization problem that we formulate in terms of weighted maximum satisfiability modulo theories (MaxSMT). The reduction to weighted MaxSMT allows us to build on SMT solvers to support rich type systems and at the same time abstract from the concrete criterion that is used for ranking the error sources. We have implemented an instance of our framework targeted at Hindley-Milner type systems and evaluated it on existing OCaml benchmarks for type error localization. Our evaluation shows that our approach has the potential to significantly improve the quality of type error reports produced by state of the art compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Liu:2014:FFL, author = "Peng Liu and Omer Tripp and Xiangyu Zhang", title = "{Flint}: fixing linearizability violations", journal = j-SIGPLAN, volume = "49", number = "10", pages = "543--560", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660217", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing concurrent software while achieving both correctness and efficiency is a grand challenge. To facilitate this task, concurrent data structures have been introduced into the standard library of popular languages like Java and C\#. Unfortunately, while the operations exposed by concurrent data structures are atomic (or linearizable), compositions of these operations are not necessarily atomic. Recent studies have found many erroneous implementations of composed concurrent operations. We address the problem of fixing nonlinearizable composed operations such that they behave atomically. We introduce Flint, an automated fixing algorithm for composed Map operations. Flint accepts as input a composed operation suffering from atomicity violations. Its output, if fixing succeeds, is a composed operation that behaves equivalently to the original operation in sequential runs and is guaranteed to be atomic. To our knowledge, Flint is the first general algorithm for fixing incorrect concurrent compositions. We have evaluated Flint on 48 incorrect compositions from 27 popular applications, including Tomcat and MyFaces. The results are highly encouraging: Flint is able to correct 96\% of the methods, and the fixed version is often the same as the fix by an expert programmer and as efficient as the original code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Song:2014:SDR, author = "Linhai Song and Shan Lu", title = "Statistical debugging for real-world performance problems", journal = j-SIGPLAN, volume = "49", number = "10", pages = "561--578", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660234", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Design and implementation defects that lead to inefficient computation widely exist in software. These defects are difficult to avoid and discover. They lead to severe performance degradation and energy waste during production runs, and are becoming increasingly critical with the meager increase of single-core hardware performance and the increasing concerns about energy constraints. Effective tools that diagnose performance problems and point out the inefficiency root cause are sorely needed. The state of the art of performance diagnosis is preliminary. Profiling can identify the functions that consume the most computation resources, but can neither identify the ones that waste the most resources nor explain why. Performance-bug detectors can identify specific type of inefficient computation, but are not suited for diagnosing general performance problems. Effective failure diagnosis techniques, such as statistical debugging, have been proposed for functional bugs. However, whether they work for performance problems is still an open question. In this paper, we first conduct an empirical study to understand how performance problems are observed and reported by real-world users. Our study shows that statistical debugging is a natural fit for diagnosing performance problems, which are often observed through comparison-based approaches and reported together with both good and bad inputs. We then thoroughly investigate different design points in statistical debugging, including three different predicates and two different types of statistical models, to understand which design point works the best for performance diagnosis. Finally, we study how some unique nature of performance bugs allows sampling techniques to lower the overhead of run-time performance diagnosis without extending the diagnosis latency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Parr:2014:ALP, author = "Terence Parr and Sam Harwell and Kathleen Fisher", title = "Adaptive {LL(*)} parsing: the power of dynamic analysis", journal = j-SIGPLAN, volume = "49", number = "10", pages = "579--598", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660202", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the advances made by modern parsing strategies such as PEG, LL (*), GLR, and GLL, parsing is not a solved problem. Existing approaches suffer from a number of weaknesses, including difficulties supporting side-effecting embedded actions, slow and/or unpredictable performance, and counter-intuitive matching strategies. This paper introduces the ALL (*) parsing strategy that combines the simplicity, efficiency, and predictability of conventional top-down LL(k) parsers with the power of a GLR-like mechanism to make parsing decisions. The critical innovation is to move grammar analysis to parse-time, which lets ALL(*) handle any non-left-recursive context-free grammar. ALL (*) is O(n$^4$ ) in theory but consistently performs linearly on grammars used in practice, outperforming general strategies such as GLL and GLR by orders of magnitude. ANTLR 4 generates ALL (*) parsers and supports direct left-recursion through grammar rewriting. Widespread ANTLR 4 use (5000 downloads/month in 2013) provides evidence that ALL (*) is effective for a wide variety of applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Gligoric:2014:AMB, author = "Milos Gligoric and Wolfram Schulte and Chandra Prasad and Danny van Velzen and Iman Narasamdya and Benjamin Livshits", title = "Automated migration of build scripts using dynamic analysis and search-based refactoring", journal = j-SIGPLAN, volume = "49", number = "10", pages = "599--616", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660239", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The efficiency of a build system is an important factor for developer productivity. As a result, developer teams have been increasingly adopting new build systems that allow higher build parallelization. However, migrating the existing legacy build scripts to new build systems is a tedious and error-prone process. Unfortunately, there is insufficient support for automated migration of build scripts, making the migration more problematic. We propose the first dynamic approach for automated migration of build scripts to new build systems. Our approach works in two phases. First, from a set of execution traces, we synthesize build scripts that accurately capture the intent of the original build. The synthesized build scripts are typically long and hard to maintain. Second, we apply refactorings that raise the abstraction level of the synthesized scripts (e.g., introduce functions for similar fragments). As different refactoring sequences may lead to different build scripts, we use a search-based approach that explores various sequences to identify the best (e.g., shortest) build script. We optimize search-based refactoring with partial-order reduction to faster explore refactoring sequences. We implemented the proposed two phase migration approach in a tool called METAMORPHOSIS that has been recently used at Microsoft.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Kumar:2014:MCM, author = "Vineet Kumar and Laurie Hendren", title = "{MIX10}: compiling {MATLAB} to {X10} for high performance", journal = j-SIGPLAN, volume = "49", number = "10", pages = "617--636", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660218", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "MATLAB is a popular dynamic array-based language commonly used by students, scientists and engineers who appreciate the interactive development style, the rich set of array operators, the extensive builtin library, and the fact that they do not have to declare static types. Even though these users like to program in MATLAB, their computations are often very compute-intensive and are better suited for emerging high performance computing systems. This paper reports on MIX10, a source-to-source compiler that automatically translates MATLAB programs to X10, a language designed for ``Performance and Productivity at Scale''; thus, helping scientific programmers make better use of high performance computing systems. There is a large semantic gap between the array-based dynamically-typed nature of MATLAB and the object-oriented, statically-typed, and high-level array abstractions of X10. This paper addresses the major challenges that must be overcome to produce sequential X10 code that is competitive with state-of-the-art static compilers for MATLAB which target more conventional imperative languages such as C and Fortran. Given that efficient basis, the paper then provides a translation for the MATLAB parfor construct that leverages the powerful concurrency constructs in X10. The MIX10 compiler has been implemented using the McLab compiler tools, is open source, and is available both for compiler researchers and end-user MATLAB programmers. We have used the implementation to perform many empirical measurements on a set of 17 MATLAB benchmarks. We show that our best MIX10-generated code is significantly faster than the de facto Mathworks' MATLAB system, and that our results are competitive with state-of-the-art static compilers that target C and Fortran. We also show the importance of finding the correct approach to representing the arrays in the generated X10 code, and the necessity of an IntegerOkay ' analysis that determines which double variables can be safely represented as integers. Finally, we show that our X10-based handling of the MATLAB parfor greatly outperforms the de facto MATLAB implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Jonnalagedda:2014:SPC, author = "Manohar Jonnalagedda and Thierry Coppey and Sandro Stucki and Tiark Rompf and Martin Odersky", title = "Staged parser combinators for efficient data processing", journal = j-SIGPLAN, volume = "49", number = "10", pages = "637--653", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660241", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parsers are ubiquitous in computing, and many applications depend on their performance for decoding data efficiently. Parser combinators are an intuitive tool for writing parsers: tight integration with the host language enables grammar specifications to be interleaved with processing of parse results. Unfortunately, parser combinators are typically slow due to the high overhead of the host language abstraction mechanisms that enable composition. We present a technique for eliminating such overhead. We use staging, a form of runtime code generation, to dissociate input parsing from parser composition, and eliminate intermediate data structures and computations associated with parser composition at staging time. A key challenge is to maintain support for input dependent grammars, which have no clear stage distinction. Our approach applies to top-down recursive-descent parsers as well as bottom-up non-deterministic parsers with key applications in dynamic programming on sequences, where we auto-generate code for parallel hardware. We achieve performance comparable to specialized, hand-written parsers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Rosner:2014:BET, author = "Nicol{\'a}s Rosner and Valeria Bengolea and Pablo Ponzio and Shadi Abdul Khalek and Nazareno Aguirre and Marcelo F. Frias and Sarfraz Khurshid", title = "Bounded exhaustive test input generation from hybrid invariants", journal = j-SIGPLAN, volume = "49", number = "10", pages = "655--674", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660232", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel technique for producing bounded exhaustive test suites from hybrid invariants, i.e., invariants that are expressed imperatively, declaratively, or as a combination of declarative and imperative predicates. Hybrid specifications are processed using known mechanisms for the imperative and declarative parts, but combined in a way that enables us to exploit information from the declarative side, such as tight bounds computed from the declarative specification, to improve the search both on the imperative and declarative sides. Moreover, our technique automatically evaluates different possible ways of processing the imperative side, and the alternative settings (imperative or declarative) for parts of the invariant available both declaratively and imperatively, to decide the most convenient invariant configuration with respect to efficiency in test generation. This is achieved by transcoping, i.e., by assessing the efficiency of the different alternatives on small scopes (where generation times are negligible), and then extrapolating the results to larger scopes. We also show experiments involving collection classes that support the effectiveness of our technique, by demonstrating that (i) bounded exhaustive suites can be computed from hybrid invariants significantly more efficiently than doing so using state-of-the-art purely imperative and purely declarative approaches, and (ii) our technique is able to automatically determine efficient hybrid invariants, in the sense that they lead to an efficient computation of bounded exhaustive suites, using transcoping.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Wang:2014:CVM, author = "Peng Wang and Santiago Cuellar and Adam Chlipala", title = "Compiler verification meets cross-language linking via data abstraction", journal = j-SIGPLAN, volume = "49", number = "10", pages = "675--690", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660201", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many real programs are written in multiple different programming languages, and supporting this pattern creates challenges for formal compiler verification. We describe our Coq verification of a compiler for a high-level language, such that the compiler correctness theorem allows us to derive partial-correctness Hoare-logic theorems for programs built by linking the assembly code output by our compiler and assembly code produced by other means. Our compiler supports such tricky features as storable cross-language function pointers, without giving up the usual benefits of being able to verify different compiler phases (including, in our case, two classic optimizations) independently. The key technical innovation is a mixed operational and axiomatic semantics for the source language, with a built-in notion of abstract data types, such that compiled code interfaces with other languages only through axiomatically specified methods that mutate encapsulated private data, represented in whatever formats are most natural for those languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Turon:2014:GNW, author = "Aaron Turon and Viktor Vafeiadis and Derek Dreyer", title = "{GPS}: navigating weak memory with ghosts, protocols, and separation", journal = j-SIGPLAN, volume = "49", number = "10", pages = "691--707", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660243", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Weak memory models formalize the inconsistent behaviors that one can expect to observe in multithreaded programs running on modern hardware. In so doing, however, they complicate the already-difficult task of reasoning about correctness of concurrent code. Worse, they render impotent the sophisticated formal methods that have been developed to tame concurrency, which almost universally assume a strong ( i.e. sequentially consistent) memory model. This paper introduces GPS, the first program logic to provide a full-fledged suite of modern verification techniques --- including ghost state, protocols, and separation logic --- for high-level, structured reasoning about weak memory. We demonstrate the effectiveness of GPS by applying it to challenging examples drawn from the Linux kernel as well as lock-free data structures. We also define the semantics of GPS and prove in Coq that it is sound with respect to the axiomatic C11 weak memory model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Desai:2014:NPA, author = "Ankush Desai and Pranav Garg and P. Madhusudan", title = "Natural proofs for asynchronous programs using almost-synchronous reductions", journal = j-SIGPLAN, volume = "49", number = "10", pages = "709--725", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660211", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the problem of provably verifying that an asynchronous message-passing system satisfies its local assertions. We present a novel reduction scheme for asynchronous event-driven programs that finds almost-synchronous invariants --- invariants consisting of global states where message buffers are close to empty. The reduction finds almost-synchronous invariants and simultaneously argues that they cover all local states. We show that asynchronous programs often have almost-synchronous invariants and that we can exploit this to build natural proofs that they are correct. We implement our reduction strategy, which is sound and complete, and show that it is more effective in proving programs correct as well as more efficient in finding bugs in several programs, compared to current search strategies which almost always diverge. The high point of our experiments is that our technique can prove the Windows Phone USB Driver written in P [9]correct for the responsiveness property, which was hitherto not provable using state-of-the-art model-checkers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Zhang:2014:AIO, author = "Wei Zhang and Per Larsen and Stefan Brunthaler and Michael Franz", title = "Accelerating iterators in optimizing {AST} interpreters", journal = j-SIGPLAN, volume = "49", number = "10", pages = "727--743", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660223", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Generators offer an elegant way to express iterators. However, their performance has always been their Achilles heel and has prevented widespread adoption. We present techniques to efficiently implement and optimize generators. We have implemented our optimizations in ZipPy, a modern, light-weight AST interpreter based Python 3 implementation targeting the Java virtual machine. Our implementation builds on a framework that optimizes AST interpreters using just-in-time compilation. In such a system, it is crucial that AST optimizations do not prevent subsequent optimizations. Our system was carefully designed to avoid this problem. We report an average speedup of 3.58x for generator-bound programs. As a result, using generators no longer has downsides and programmers are free to enjoy their upsides.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Zhao:2014:CSP, author = "Zhijia Zhao and Bo Wu and Mingzhou Zhou and Yufei Ding and Jianhua Sun and Xipeng Shen and Youfeng Wu", title = "Call sequence prediction through probabilistic calling automata", journal = j-SIGPLAN, volume = "49", number = "10", pages = "745--762", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660221", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Predicting a sequence of upcoming function calls is important for optimizing programs written in modern managed languages (e.g., Java, Javascript, C\#.) Existing function call predictions are mainly built on statistical patterns, suitable for predicting a single call but not a sequence of calls. This paper presents a new way to enable call sequence prediction, which exploits program structures through Probabilistic Calling Automata (PCA), a new program representation that captures both the inherent ensuing relations among function calls, and the probabilistic nature of execution paths. It shows that PCA-based prediction outperforms existing predictions, yielding substantial speedup when being applied to guide Just-In-Time compilation. By enabling accurate, efficient call sequence prediction for the first time, PCA-based predictors open up many new opportunities for dynamic program optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Zhou:2014:SEM, author = "Mingzhou Zhou and Xipeng Shen and Yaoqing Gao and Graham Yiu", title = "Space-efficient multi-versioning for input-adaptive feedback-driven program optimizations", journal = j-SIGPLAN, volume = "49", number = "10", pages = "763--776", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660229", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Function versioning is an approach to addressing input-sensitivity of program optimizations. A major side effect of it is notable code size increase, which has been hindering its broad applications to large code bases and space-stringent environments. In this paper, we initiate a systematic exploration into the problem, providing answers to some fundamental questions: Given a space constraint, to which function we should apply versioning? How many versions of a function should we include in the final executable? Is the optimal selection feasible to do in polynomial time? This study proves selecting the best set of versions under a space constraint is NP-complete and proposes a heuristic algorithm named CHoGS which yields near optimal results in quadratic time. We implement the algorithm and conduct experiments through the IBM XL compilers. We observe significant performance enhancement with only slight code size increase; the results from CHoGS show factors of higher space efficiency than those from traditional hotness-based methods.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Adams:2014:HVM, author = "Keith Adams and Jason Evans and Bertrand Maher and Guilherme Ottoni and Andrew Paroski and Brett Simmers and Edwin Smith and Owen Yamauchi", title = "The {HipHop Virtual Machine}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "777--790", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660199", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "The HipHop Virtual Machine (HHVM) is a JIT compiler and runtime for PHP. While PHP values are dynamically typed, real programs often have latent types that are useful for optimization once discovered. Some types can be proven through static analysis, but limitations in the ahead-of-time approach leave some types to be discovered at run time. And even though many values have latent types, PHP programs can also contain polymorphic variables and expressions, which must be handled without catastrophic slowdown. HHVM discovers latent types by structuring its JIT around the concept of a tracelet. A tracelet is approximately a basic block specialized for a particular set of run-time types for its input values. Tracelets allow HHVM to exactly and efficiently learn the types observed by the program, while using a simple compiler. This paper shows that this approach enables HHVM to achieve high levels of performance, without sacrificing compatibility or interactivity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Nazare:2014:VMA, author = "Henrique Nazar{\'e} and Izabela Maffra and Willer Santos and Leonardo Barbosa and Laure Gonnord and Fernando Magno Quint{\~a}o Pereira", title = "Validation of memory accesses through symbolic analyses", journal = j-SIGPLAN, volume = "49", number = "10", pages = "791--809", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660205", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The C programming language does not prevent out-of-bounds memory accesses. There exist several techniques to secure C programs; however, these methods tend to slow down these programs substantially, because they populate the binary code with runtime checks. To deal with this problem, we have designed and tested two static analyses --- symbolic region and range analysis --- which we combine to remove the majority of these guards. In addition to the analyses themselves, we bring two other contributions. First, we describe live range splitting strategies that improve the efficiency and the precision of our analyses. Secondly, we show how to deal with integer overflows, a phenomenon that can compromise the correctness of static algorithms that validate memory accesses. We validate our claims by incorporating our findings into AddressSanitizer. We generate SPEC CINT 2006 code that is 17\% faster and 9\% more energy efficient than the code produced originally by this tool. Furthermore, our approach is 50\% more effective than Pentagons, a state-of-the-art analysis to sanitize memory accesses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Partush:2014:ASD, author = "Nimrod Partush and Eran Yahav", title = "Abstract semantic differencing via speculative correlation", journal = j-SIGPLAN, volume = "49", number = "10", pages = "811--828", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660245", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address the problem of computing semantic differences between a program and a patched version of the program. Our goal is to obtain a precise characterization of the difference between program versions, or establish their equivalence. We focus on infinite-state numerical programs, and use abstract interpretation to compute an over-approximation of program differences. Computing differences and establishing equivalence under abstraction requires abstracting relationships between variables in the two programs. Towards that end, we use a correlating abstract domain to compute a sound approximation of these relationships which captures semantic difference. This approximation can be computed over any interleaving of the two programs. However, the choice of interleaving can significantly affect precision. We present a speculative search algorithm that aims to find an interleaving of the two programs with minimal abstract semantic difference. This method is unique as it allows the analysis to dynamically alternate between several interleavings. We have implemented our approach and applied it to real-world examples including patches from Git, GNU Coreutils, as well as a few handpicked patches from the Linux kernel and the Mozilla Firefox web browser. Our evaluation shows that we compute precise approximations of semantic differences, and report few false differences.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Zhang:2014:ESA, author = "Qirun Zhang and Xiao Xiao and Charles Zhang and Hao Yuan and Zhendong Su", title = "Efficient subcubic alias analysis for {C}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "829--845", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660213", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Inclusion-based alias analysis for C can be formulated as a context-free language (CFL) reachability problem. It is well known that the traditional cubic CFL-reachability algorithm does not scale well in practice. We present a highly scalable and efficient CFL-reachability-based alias analysis for C. The key novelty of our algorithm is to propagate reachability information along only original graph edges and bypass a large portion of summary edges, while the traditional CFL-reachability algorithm propagates along all summary edges. We also utilize the Four Russians' Trick --- a key enabling technique in the subcubic CFL-reachability algorithm --- in our alias analysis. We have implemented our subcubic alias analysis and conducted extensive experiments on widely-used C programs from the pointer analysis literature. The results demonstrate that our alias analysis scales extremely well in practice. In particular, it can analyze the recent Linux kernel (which consists of 10M SLOC) in about 30 seconds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Brutschy:2014:SAI, author = "Lucas Brutschy and Pietro Ferrara and Peter M{\"u}ller", title = "Static analysis for independent app developers", journal = j-SIGPLAN, volume = "49", number = "10", pages = "847--860", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660219", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mobile app markets have lowered the barrier to market entry for software producers. As a consequence, an increasing number of independent app developers offer their products, and recent platforms such as the MIT App Inventor and Microsoft's TouchDevelop enable even lay programmers to develop apps and distribute them in app markets. A major challenge in this distribution model is to ensure the quality of apps. Besides the usual sources of software errors, mobile apps are susceptible to errors caused by the non-determinism of an event-based execution model, a volatile environment, diverse hardware, and others. Many of these errors are difficult to detect during testing, especially for independent app developers, who are not supported by test teams and elaborate test infrastructures. To address this problem, we propose a static program analysis that captures the specifics of mobile apps and is efficient enough to provide feedback during the development process. Experiments involving 51,456 published TouchDevelop scripts show that our analysis analyzes 98\% of the scripts in under a minute, and five seconds on average. Manual inspection of the analysis results for a selection of all scripts shows that most of the alarms are real errors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Vora:2014:AEA, author = "Keval Vora and Sai Charan Koduru and Rajiv Gupta", title = "{ASPIRE}: exploiting asynchronous parallelism in iterative algorithms using a relaxed consistency based {DSM}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "861--878", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660227", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many vertex-centric graph algorithms can be expressed using asynchronous parallelism by relaxing certain read-after-write data dependences and allowing threads to compute vertex values using stale (i.e., not the most recent) values of their neighboring vertices. We observe that on distributed shared memory systems, by converting synchronous algorithms into their asynchronous counterparts, algorithms can be made tolerant to high inter-node communication latency. However, high inter-node communication latency can lead to excessive use of stale values causing an increase in the number of iterations required by the algorithms to converge. Although by using bounded staleness we can restrict the slowdown in the rate of convergence, this also restricts the ability to tolerate communication latency. In this paper we design a relaxed memory consistency model and consistency protocol that simultaneously tolerate communication latency and minimize the use of stale values. This is achieved via a coordinated use of best effort refresh policy and bounded staleness. We demonstrate that for a range of asynchronous graph algorithms and PDE solvers, on an average, our approach outperforms algorithms based upon: prior relaxed memory models that allow stale values by at least 2.27x; and Bulk Synchronous Parallel (BSP) model by 4.2x. We also show that our approach frequently outperforms GraphLab, a popular distributed graph processing framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Holt:2014:AAL, author = "Brandon Holt and Preston Briggs and Luis Ceze and Mark Oskin", title = "{Alembic}: automatic locality extraction via migration", journal = j-SIGPLAN, volume = "49", number = "10", pages = "879--894", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Partitioned Global Address Space (PGAS) environments simplify writing parallel code for clusters because they make data movement implicit --- dereferencing global pointers automatically moves data around. However, it does not free the programmer from needing to reason about locality --- poor placement of data can lead to excessive and even unnecessary communication. For this reason, modern PGAS languages such as X10, Chapel, and UPC allow programmers to express data-layout constraints and explicitly move computation. This places an extra burden on the programmer, and is less effective for applications with limited or data-dependent locality (e.g., graph analytics). This paper proposes Alembic, a new static analysis that frees programmers from having to manually move computation to exploit locality in PGAS programs. It works by determining regions of code that access the same cluster node, then transforming the code to migrate parts of the execution to increase the proportion of accesses to local data. We implement the analysis and transformation for C++ in LLVM and show that in irregular application kernels, Alembic can achieve 82\% of the performance of hand-tuned communication (for comparison, na{\"\i}ve compiler-generated communication achieves only 13\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Xiao:2014:CPL, author = "Tian Xiao and Zhenyu Guo and Hucheng Zhou and Jiaxing Zhang and Xu Zhao and Chencheng Ye and Xi Wang and Wei Lin and Wenguang Chen and Lidong Zhou", title = "{Cybertron}: pushing the limit on {I/O} reduction in data-parallel programs", journal = j-SIGPLAN, volume = "49", number = "10", pages = "895--908", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660204", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "I/O reduction has been a major focus in optimizing data-parallel programs for big-data processing. While the current state-of-the-art techniques use static program analysis to reduce I/O, Cybertron proposes a new direction that incorporates runtime mechanisms to push the limit further on I/O reduction. In particular, Cybertron tracks how data is used in the computation accurately at runtime to filter unused data at finer granularity dynamically, beyond what current static-analysis based mechanisms are capable of, and to facilitate a new mechanism called constraint based encoding for more efficient encoding. Cybertron has been implemented and applied to production data-parallel programs; our extensive evaluations on real programs and real data have shown its effectiveness on I/O reduction over the existing mechanisms at reasonable CPU cost, and its improvement on end-to-end performance in various network environments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Radoi:2014:TIC, author = "Cosmin Radoi and Stephen J. Fink and Rodric Rabbah and Manu Sridharan", title = "Translating imperative code to {MapReduce}", journal = j-SIGPLAN, volume = "49", number = "10", pages = "909--927", month = oct, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2714064.2660228", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an approach for automatic translation of sequential, imperative code into a parallel MapReduce framework. Automating such a translation is challenging: imperative updates must be translated into a functional MapReduce form in a manner that both preserves semantics and enables parallelism. Our approach works by first translating the input code into a functional representation, with loops succinctly represented by fold operations. Then, guided by rewrite rules, our system searches a space of equivalent programs for an effective MapReduce implementation. The rules include a novel technique for handling irregular loop-carried dependencies using group-by operations to enable greater parallelism. We have implemented our technique in a tool called Mold. It translates sequential Java code into code targeting the Apache Spark runtime. We evaluated Mold on several real-world kernels and found that in most cases Mold generated the desired MapReduce program, even for codes with complex indirect updates.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '14 conference proceedings.", } @Article{Guyer:2014:UJT, author = "Samuel Z. Guyer", title = "Use of the {JVM} at {Twitter}: a bird's eye view", journal = j-SIGPLAN, volume = "49", number = "11", pages = "1--1", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2619208", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Specialties: 15+ years of virtual machine implementation experience with special focus on memory management / garbage collection. Close to 20 years of C/C++ experience. 15+ years of Java experience. Expert in concurrent/parallel programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Terei:2014:MHP, author = "David Terei and Alex Aiken and Jan Vitek", title = "{$ M^3 $}: high-performance memory management from off-the-shelf components", journal = j-SIGPLAN, volume = "49", number = "11", pages = "3--13", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602995", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Real-world garbage collectors in managed languages are complex. We investigate whether this complexity is really necessary and show that by having a different (but wider) interface between the collector and the developer, we can achieve high performance with off-the-shelf components for real applications. We propose to assemble a memory manager out of multiple, simple collection strategies and to expose the choice of where to use those strategies in the program to the developer. We describe and evaluate an instantiation of our design for C. Our prototype allows developers to choose on a per-type basis whether data should be reference counted or reclaimed by a tracing collector. While neither strategy is optimised, our empirical data shows that we can achieve performance that is competitive with hand-tuned C code for real-world applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Clifford:2014:AFB, author = "Daniel Clifford and Hannes Payer and Michael Starzinger and Ben L. Titzer", title = "Allocation folding based on dominance", journal = j-SIGPLAN, volume = "49", number = "11", pages = "15--24", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602994", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Memory management system performance is of increasing importance in today's managed languages. Two lingering sources of overhead are the direct costs of memory allocations and write barriers. This paper introduces it allocation folding, an optimization technique where the virtual machine automatically folds multiple memory allocation operations in optimized code together into a single, larger it allocation group. An allocation group comprises multiple objects and requires just a single bounds check in a bump-pointer style allocation, rather than a check for each individual object. More importantly, all objects allocated in a single allocation group are guaranteed to be contiguous after allocation and thus exist in the same generation, which makes it possible to statically remove write barriers for reference stores involving objects in the same allocation group. Unlike object inlining, object fusing, and object colocation, allocation folding requires no special connectivity or ownership relation between the objects in an allocation group. We present our analysis algorithm to determine when it is safe to fold allocations together and discuss our implementation in V8, an open-source, production JavaScript virtual machine. We present performance results for the Octane and Kraken benchmark suites and show that allocation folding is a strong performance improvement, even in the presence of some heap fragmentation. Additionally, we use four hand-selected benchmarks JPEGEncoder, NBody, Soft3D, and Textwriter where allocation folding has a large impact.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Ratnakar:2014:PPC, author = "Bollu Ratnakar and Rupesh Nasre", title = "Push-pull constraint graph for efficient points-to analysis", journal = j-SIGPLAN, volume = "49", number = "11", pages = "25--33", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602989", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present techniques for efficient computation of points-to information for C programs. Pointer analysis is an important phase in the compilation process. The computed points-to information and the alias information is useful for client analyses from varied domains such as bug finding, data-flow analysis, identifying security vulnerabilities, and parallelization, to name a few. Former research on pointer analysis has indicated that the main bottleneck towards scalability is manifested by the presence of complex constraints (load p = *q and store *p = q constraints) in the program. Complex constraints add edges to the constraint graph in an unpredictable manner and are responsible for initiating propagation of large amounts of points-to information across edges. We identify that the root cause to this issue is in the homogeneous structure in the constraint graph, due to which existing analyses treat loads and stores in a uniform manner. To address these issues, we present two techniques. First, we represent a constraint graph in a non-homogeneous manner, treat loads and stores in different ways, and employ a push-pull model for non-uniform propagation. Second, we propose lazy propagation which propagates information in the constraint graph only when necessary. We illustrate the effectiveness of our techniques using six large open-source programs and show that they improve the analysis time over a state-of-the-art BDD-based analysis by 33\% and over Deep Propagation by 21\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Joisha:2014:STF, author = "Pramod G. Joisha", title = "Sticky tries: fast insertions, fast lookups, no deletions for large key universes", journal = j-SIGPLAN, volume = "49", number = "11", pages = "35--46", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602998", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the sticky trie, a new variant of the standard trie data structure that achieves high-performing atomic insertions and lookups for large key universes by precluding deletions. It has applications in several areas, including address tracking, logging, and garbage collection. By leveraging features of a modern operating system, we show how a runtime can exploit the absence of deletions to realize an efficient sticky-trie implementation. We report on an evaluation of two representative uses --- compelling Bloom-filter alternative and fast substitute for a garbage collector's sequential store buffer (SSB). We demonstrate that a sticky trie, when compared with what is perhaps among the simplest Bloom filters, can be over 43\% faster, scale substantially better with increasing threads, and yet be free of false positives. By introducing the concept of an ideal SSB, we also demonstrate that a sticky trie could be competitive in performance with a class of SSBs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Brandt:2014:CPG, author = "Steven R. Brandt and Hari Krishnan and Gokarna Sharma and Costas Busch", title = "Concurrent, parallel garbage collection in linear time", journal = j-SIGPLAN, volume = "49", number = "11", pages = "47--58", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602990", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a new concurrent garbage collection algorithm based on two types of reference, strong and weak, to link the graph of objects. Strong references connect the roots to all the nodes in the graph but do not contain cycles. Weak references may, however, contain cycles. Advantages of this system include: (1) reduced processing, non-trivial garbage collection work is only required when the last strong reference is lost; (2) fewer memory traces to delete objects, a garbage cycle only needs to be traversed twice to be deleted; (3) fewer memory traces to retain objects, since the collector can often prove objects are reachable without fully tracing support cycles to which the objects belong; (4) concurrency, it can run in parallel with a live system without ``stopping the world''; (5) parallel, because collection operations in different parts of the memory can proceed at the same time. Previous variants of this technique required exponential cleanup time, but our algorithm is linear in total time, i.e. any changes in the graph take only O(N) time steps, where N is the number of edges in the affected subgraph (e.g. the subgraph whose strong support is affected by the operations).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Ugawa:2014:ROP, author = "Tomoharu Ugawa and Richard E. Jones and Carl G. Ritson", title = "Reference object processing in on-the-fly garbage collection", journal = j-SIGPLAN, volume = "49", number = "11", pages = "59--69", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602991", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most proposals for on-the-fly garbage collection ignore the question of Java's weak and other reference types. However, we show that reference types are heavily used in DaCapo benchmarks. Of the few collectors that do address this issue, most block mutators, either globally or individually, while processing reference types. We introduce a new framework for processing reference types on-the-fly in Jikes RVM. Our framework supports both insertion and deletion write barriers. We have model checked our algorithm and incorporated it in our new implementation of the Sapphire on-the-fly collector. Using a deletion barrier, we process references while mutators are running in less than three times the time that previous approaches take while mutators are halted; our overall execution times are no worse, and often better.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Li:2014:MHD, author = "Pengcheng Li and Chen Ding and Hao Luo", title = "Modeling heap data growth using average liveness", journal = j-SIGPLAN, volume = "49", number = "11", pages = "71--82", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602997; https://www.math.utah.edu/pub/tex/bib/java2010.bib", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Most of today's programs make use of a sizable heap to store dynamic data. To characterize the heap dynamics, this paper presents a set of metrics to measure the average amount of data live and dead in a period of execution. They are collectively called average liveness. The paper defines these metrics of average liveness, gives linear-time algorithms for measurement, and discusses their use in finding the best heap size. The algorithms are implemented in a Java tracing system called Elephant Tracks and evaluated using the Dacapo benchmarks running on the Oracle HotSpot and IBM J9 Java virtual machines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Zakkak:2014:JJM, author = "Foivos S. Zakkak and Polyvios Pratikakis", title = "{JDMM}: a {Java} memory model for non-cache-coherent memory architectures", journal = j-SIGPLAN, volume = "49", number = "11", pages = "83--92", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "As the number of cores continuously grows, processor designers are considering non coherent memories as more scalable and energy efficient alternatives to the current coherent ones. The Java Memory Model (JMM) requires that all cores can access the Java heap. It guarantees sequential consistency for data-race-free programs and no out-of-thin-air values for non data-race-free programs. To implement the Java Memory Model over non-cache-coherent and distributed architectures Java Virtual Machines (JVMs) are most likely to employ software caching. In this work, (i) we provide a formalization of the Java Memory Model for non-cache-coherent and distributed memory architectures, (ii) prove the adherence of our model with the Java Memory Model and (iii) evaluate, regarding its compliance to the Java Memory Model, a state-of-the-art Java Virtual Machine implementation on a non-cache-coherent architecture.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Egielski:2014:MAM, author = "Ian J. Egielski and Jesse Huang and Eddy Z. Zhang", title = "Massive atomics for massive parallelism on {GPUs}", journal = j-SIGPLAN, volume = "49", number = "11", pages = "93--103", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602993", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "One important type of parallelism exploited in many applications is reduction type parallelism. In these applications, the order of the read-modify-write updates to one shared data object can be arbitrary as long as there is an imposed order for the read-modify-write updates. The typical way to parallelize these types of applications is to first let every individual thread perform local computation and save the results in thread-private data objects, and then merge the results from all worker threads in the reduction stage. All applications that fit into the map reduce framework belong to this category. Additionally, the machine learning, data mining, numerical analysis and scientific simulation applications may also benefit from reduction type parallelism. However, the parallelization scheme via the usage of thread-private data objects may not be vi- able in massively parallel GPU applications. Because the number of concurrent threads is extremely large (at least tens of thousands of), thread-private data object creation may lead to memory space explosion problems. In this paper, we propose a novel approach to deal with shared data object management for reduction type parallelism on GPUs. Our approach exploits fine-grained parallelism while at the same time maintaining good programmability. It is based on the usage of intrinsic hardware atomic instructions. Atomic operation may appear to be expensive since it causes thread serialization when multiple threads atomically update the same memory object at the same time. However, we discovered that, with appropriate atomic collision reduction techniques, the atomic implementation can out- perform the non-atomics implementation, even for benchmarks known to have high performance non-atomics GPU implementations. In the meantime, the usage of atomics can greatly reduce coding complexity as neither thread-private object management or explicit thread-communication (for the shared data objects protected by atomic operations) is necessary.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Ritson:2014:EGC, author = "Carl G. Ritson and Tomoharu Ugawa and Richard E. Jones", title = "Exploring garbage collection with {Haswell} hardware transactional memory", journal = j-SIGPLAN, volume = "49", number = "11", pages = "105--115", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602992", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Intel's latest processor microarchitecture, Haswell, adds support for a restricted form of transactional memory to the x86 programming model. We explore how this can be applied to three garbage collection scenarios in Jikes RVM: parallel copying, concurrent copying and bitmap marking. We demonstrate gains in concurrent copying speed over traditional synchronisation mechanisms of 48-101\%. We also show how similar but portable performance gains can be achieved through software transactional memory techniques. We identify the architectural overhead of capturing sufficient work for transactional execution as a major stumbling block to the effective use of transactions in the other scenarios.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Bacon:2014:PRT, author = "David F. Bacon and Perry Cheng and Sunil Shukla", title = "Parallel real-time garbage collection of multiple heaps in reconfigurable hardware", journal = j-SIGPLAN, volume = "49", number = "11", pages = "117--127", month = nov, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775049.2602996", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite rapid increases in memory capacity, reconfigurable hardware is still programmed in a very low-level manner, generally without any dynamic allocation at all. This limits productivity especially as the larger chips encourage more and more complex designs to be attempted. Prior work has shown that it is possible to implement a real-time collector in hardware and achieve stall-free operation --- but at the price of severe restrictions on object layouts. We present the first hardware garbage collector capable of collecting multiple inter-connected heaps, thereby allowing a rich set of object types. We show that for a modest additional cost in logic and memory, we can support multiple heaps at a clock frequency competitive with monolithic, fixed-layout heaps. We evaluate the hardware design by synthesizing it for a Xilinx FPGA and using co-simulation to measure the run-time behavior over a set of four benchmarks. Even at high allocation and mutation rates the collector is able to sustain stall-free (100\% minimum mutator utilization) operation with up to 4 inter-connected heaps, while only requiring between 1.1 and 1.7 times the maximum live memory of the application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '14 conference proceedings.", } @Article{Wu:2014:EHS, author = "Nicolas Wu and Tom Schrijvers and Ralf Hinze", title = "Effect handlers in scope", journal = j-SIGPLAN, volume = "49", number = "12", pages = "1--12", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633358", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Algebraic effect handlers are a powerful means for describing effectful computations. They provide a lightweight and orthogonal technique to define and compose the syntax and semantics of different effects. The semantics is captured by handlers, which are functions that transform syntax trees. Unfortunately, the approach does not support syntax for scoping constructs, which arise in a number of scenarios. While handlers can be used to provide a limited form of scope, we demonstrate that this approach constrains the possible interactions of effects and rules out some desired semantics. This paper presents two different ways to capture scoped constructs in syntax, and shows how to achieve different semantics by reordering handlers. The first approach expresses scopes using the existing algebraic handlers framework, but has some limitations. The problem is fully solved in the second approach where we introduce higher-order syntax.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Orchard:2014:EES, author = "Dominic Orchard and Tomas Petricek", title = "Embedding effect systems in {Haskell}", journal = j-SIGPLAN, volume = "49", number = "12", pages = "13--24", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633368", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Monads are now an everyday tool in functional programming for abstracting and delimiting effects. The link between monads and effect systems is well-known, but in their typical use, monads provide a much more coarse-grained view of effects. Effect systems capture fine-grained information about the effects, but monads provide only a binary view: effectful or pure. Recent theoretical work has unified fine-grained effect systems with monads using a monad-like structure indexed by a monoid of effect annotations (called parametric effect monads). This aligns the power of monads with the power of effect systems. This paper leverages recent advances in Haskell's type system (as provided by GHC) to embed this approach in Haskell, providing user-programmable effect systems. We explore a number of practical examples that make Haskell even better and safer for effectful programming. Along the way, we relate the examples to other concepts, such as Haskell's implicit parameters and coeffects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Blanchette:2014:ERN, author = "Jasmin Christian Blanchette and Lars Hupel and Tobias Nipkow and Lars Noschinski and Dmitriy Traytel", title = "Experience report: the next 1100 {Haskell} programmers", journal = j-SIGPLAN, volume = "49", number = "12", pages = "25--30", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633359", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We report on our experience teaching a Haskell-based functional programming course to over 1100 students for two winter terms. The syllabus was organized around selected material from various sources. Throughout the terms, we emphasized correctness through QuickCheck tests and proofs by induction. The submission architecture was coupled with automatic testing, giving students the possibility to correct mistakes before the deadline. To motivate the students, we complemented the weekly assignments with an informal competition and gave away trophies in a award ceremony.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Muranushi:2014:ERT, author = "Takayuki Muranushi and Richard A. Eisenberg", title = "Experience report: type-checking polymorphic units for astrophysics research in {Haskell}", journal = j-SIGPLAN, volume = "49", number = "12", pages = "31--38", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633362", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many of the bugs in scientific programs have their roots in mistreatment of physical dimensions, via erroneous expressions in the quantity calculus. Now that the type system in the Glasgow Haskell Compiler is rich enough to support type-level integers and other promoted datatypes, we can type-check the quantity calculus in Haskell. In addition to basic dimension-aware arithmetic and unit conversions, our units library features an extensible system of dimensions and units, a notion of dimensions apart from that of units, and unit polymorphism designed to describe the laws of physics. We demonstrate the utility of units by writing an astrophysics research paper. This work is free of unit concerns because every quantity expression in the paper is rigorously type-checked.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Vazou:2014:LER, author = "Niki Vazou and Eric L. Seidel and Ranjit Jhala", title = "{LiquidHaskell}: experience with refinement types in the real world", journal = j-SIGPLAN, volume = "49", number = "12", pages = "39--51", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633366", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Haskell has many delightful features. Perhaps the one most beloved by its users is its type system that allows developers to specify and verify a variety of program properties at compile time. However, many properties, typically those that depend on relationships between program values are impossible, or at the very least, cumbersome to encode within the existing type system. Many such properties can be verified using a combination of Refinement Types and external SMT solvers. We describe the refinement type checker liquidHaskell, which we have used to specify and verify a variety of properties of over 10,000 lines of Haskell code from various popular libraries, including containers, hscolour, bytestring, text, vector-algorithms and xmonad. First, we present a high-level overview of liquidHaskell, through a tour of its features. Second, we present a qualitative discussion of the kinds of properties that can be checked --- ranging from generic application independent criteria like totality and termination, to application specific concerns like memory safety and data structure correctness invariants. Finally, we present a quantitative evaluation of the approach, with a view towards measuring the efficiency and programmer effort required for verification, and discuss the limitations of the approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Pike:2014:SAE, author = "Lee Pike", title = "{SmartCheck}: automatic and efficient counterexample reduction and generalization", journal = j-SIGPLAN, volume = "49", number = "12", pages = "53--64", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633365", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "QuickCheck is a powerful library for automatic test-case generation. Because QuickCheck performs random testing, some of the counterexamples discovered are very large. QuickCheck provides an interface for the user to write shrink functions to attempt to reduce the size of counter examples. Hand-written implementations of shrink can be complex, inefficient, and consist of significant boilerplate code. Furthermore, shrinking is only one aspect in debugging: counterexample generalization is the process of extrapolating from individual counterexamples to a class of counterexamples, often requiring a flash of insight from the programmer. To improve counterexample reduction and generalization, we introduce SmartCheck. SmartCheck is a debugging tool that reduces algebraic data using generic search heuristics to efficiently find smaller counterexamples. In addition to shrinking, SmartCheck also automatically generalizes counterexamples to formulas representing classes of counterexamples. SmartCheck has been implemented for Haskell and is freely available.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Maier:2014:HDS, author = "Patrick Maier and Robert Stewart and Phil Trinder", title = "The {HdpH DSLs} for scalable reliable computation", journal = j-SIGPLAN, volume = "49", number = "12", pages = "65--76", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633363", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The statelessness of functional computations facilitates both parallelism and fault recovery. Faults and non-uniform communication topologies are key challenges for emergent large scale parallel architectures. We report on HdpH and HdpH-RS, a pair of Haskell DSLs designed to address these challenges for irregular task-parallel computations on large distributed-memory architectures. Both DSLs share an API combining explicit task placement with sophisticated work stealing. HdpH focuses on scalability by making placement and stealing topology aware whereas HdpH-RS delivers reliability by means of fault tolerant work stealing. We present operational semantics for both DSLs and investigate conditions for semantic equivalence of HdpH and HdpH-RS programs, that is, conditions under which topology awareness can be transparently traded for fault tolerance. We detail how the DSL implementations realise topology awareness and fault tolerance. We report an initial evaluation of scalability and fault tolerance on a 256-core cluster and on up to 32K cores of an HPC platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Okabe:2014:SDW, author = "Kiwamu Okabe and Takayuki Muranushi", title = "Systems demonstration: writing {NetBSD} sound drivers in {Haskell}", journal = j-SIGPLAN, volume = "49", number = "12", pages = "77--78", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most strongly typed, functional programming languages are not equipped with a reentrant garbage collector. Therefore such languages are not used for operating systems programming, where the virtues of types are most desired. We propose the use of Context-Local Heaps (CLHs) to achieve reentrancy, which also increasing the speed of garbage collection. We have implemented CLHs in Ajhc, a Haskell compiler derived from jhc, rewritten some NetBSD sound drivers using Ajhc, and benchmarked them. The reentrant, faster garbage collection that CLHs provide opens the path to type-assisted operating systems programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Ekblad:2014:SCC, author = "Anton Ekblad and Koen Claessen", title = "A seamless, client-centric programming model for type safe web applications", journal = j-SIGPLAN, volume = "49", number = "12", pages = "79--89", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new programming model for web applications which is (1) seamless; one program and one language is used to produce code for both client and server, (2) client-centric; the programmer takes the viewpoint of the client that runs code on the server rather than the other way around, (3) functional and type-safe, and (4) portable; everything is implemented as a Haskell library that implicitly takes care of all networking code. Our aim is to improve the painful and error-prone experience of today's standard development methods, in which clients and servers are coded in different languages and communicate with each other using ad-hoc protocols. We present the design of our library called Haste.App, an example web application that uses it, and discuss the implementation and the compiler technology on which it depends.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Levy:2014:DPM, author = "Amit A. Levy and David Terei and Deian Stefan and David Mazi{\'e}res", title = "Demo proposal: making web applications --- {XSafe}", journal = j-SIGPLAN, volume = "49", number = "12", pages = "91--91", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Simple is a web framework for Haskell. Simple came out of our work on Hails, a platform for secure web applications. For Hails, we needed a flexible web framework that uses no unsafe language features and can be used to build apps outside the IO monad. Unlike many mainstream web frameworks, Simple does not enforce a particular structure or paradigm. Instead, it simply provides a set of composable building blocks to help developers structure and organize their web applications. We've used Simple to build both traditional web applications as well as applications with explicit, strong safety and security guarantees. In the demonstration, we'll focus on the former --- introducing the framework and motivating it's utility for traditional web apps --- and show how we can leverage the LIO information flow control library to add mandatory security policies to apps.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Stefan:2014:BSS, author = "Deian Stefan and Amit Levy and Alejandro Russo and David Mazi{\'e}res", title = "Building secure systems with {LIO} (demo)", journal = j-SIGPLAN, volume = "49", number = "12", pages = "93--94", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633371", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "LIO is a decentralized information flow control (DIFC) system, implemented in Haskell. In this demo proposal, we give an overview of the LIO library and show how LIO can be used to build secure systems. In particular, we show how to specify high-level security policies in the context of web applications, and describe how LIO automatically enforces these policies even in the presence of untrusted code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Eisenberg:2014:PFT, author = "Richard A. Eisenberg and Jan Stolarek", title = "Promoting functions to type families in {Haskell}", journal = j-SIGPLAN, volume = "49", number = "12", pages = "95--106", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633361", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Haskell, as implemented in the Glasgow Haskell Compiler (GHC), is enriched with many extensions that support type-level programming, such as promoted datatypes, kind polymorphism, and type families. Yet, the expressiveness of the type-level language remains limited. It is missing many features present at the term level, including case expressions, anonymous functions, partially-applied functions, and let expressions. In this paper, we present an algorithm --- with a proof of correctness --- to encode these term-level constructs at the type level. Our approach is automated and capable of promoting a wide array of functions to type families. We also highlight and discuss those term-level features that are not promotable. In so doing, we offer a critique on GHC's existing type system, showing what it is already capable of and where it may want improvement. We believe that delineating the mismatch between GHC's term level and its type level is a key step toward supporting dependently typed programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Morris:2014:SSH, author = "J. Garrett Morris", title = "A simple semantics for {Haskell} overloading", journal = j-SIGPLAN, volume = "49", number = "12", pages = "107--118", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633364", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As originally proposed, type classes provide overloading and ad-hoc definition, but can still be understood (and implemented) in terms of strictly parametric calculi. This is not true of subsequent extensions of type classes. Functional dependencies and equality constraints allow the satisfiability of predicates to refine typing; this means that the interpretations of equivalent qualified types may not be interconvertible. Overlapping instances and instance chains allow predicates to be satisfied without determining the implementations of their associated class methods, introducing truly non-parametric behavior. We propose a new approach to the semantics of type classes, interpreting polymorphic expressions by the behavior of each of their ground instances, but without requiring that those behaviors be parametrically determined. We argue that this approach both matches the intuitive meanings of qualified types and accurately models the behavior of programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Chakravarty:2014:FIC, author = "Manuel M. T. Chakravarty", title = "Foreign inline code: systems demonstration", journal = j-SIGPLAN, volume = "49", number = "12", pages = "119--120", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Adams:2014:ISP, author = "Michael D. Adams and {\"O}mer S. Agacan", title = "Indentation-sensitive parsing for {Parsec}", journal = j-SIGPLAN, volume = "49", number = "12", pages = "121--132", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several popular languages including Haskell and Python use the indentation and layout of code as an essential part of their syntax. In the past, implementations of these languages used ad hoc techniques to implement layout. Recent work has shown that a simple extension to context-free grammars can replace these ad hoc techniques and provide both formal foundations and efficient parsing algorithms for indentation sensitivity. However, that previous work is limited to bottom-up, LR($k$) parsing, and many combinator-based parsing frameworks including Parsec use top-down algorithms that are outside its scope. This paper remedies this by showing how to add indentation sensitivity to parsing frameworks like Parsec. It explores both the formal semantics of and efficient algorithms for indentation sensitivity. It derives a Parsec-based library for indentation-sensitive parsing and presents benchmarks on a real-world language that show its efficiency and practicality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{vanderPloeg:2014:RRR, author = "Atze van der Ploeg and Oleg Kiselyov", title = "Reflection without remorse: revealing a hidden sequence to speed up monadic reflection", journal = j-SIGPLAN, volume = "49", number = "12", pages = "133--144", month = dec, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775050.2633360", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A series of list appends or monadic binds for many monads performs algorithmically worse when left-associated. Continuation-passing style (CPS) is well-known to cure this severe dependence of performance on the association pattern. The advantage of CPS dwindles or disappears if we have to examine or modify the intermediate result of a series of appends or binds, before continuing the series. Such examination is frequently needed, for example, to control search in non-determinism monads. We present an alternative approach that is just as general as CPS but more robust: it makes series of binds and other such operations efficient regardless of the association pattern-- and also provides efficient access to intermediate results. The key is to represent such a conceptual sequence as an efficient sequence data structure. Efficient sequence data structures from the literature are homogeneous and cannot be applied as they are in a type-safe way to series of monadic binds. We generalize them to type aligned sequences and show how to construct their (assuredly order-preserving) implementations. We demonstrate that our solution solves previously undocumented, severe performance problems in iteratees, LogicT transformers, free monads and extensible effects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '14 conference proceedings.", } @Article{Rajamani:2015:ART, author = "Sriram Rajamani", title = "Automating Repetitive Tasks for the Masses", journal = j-SIGPLAN, volume = "50", number = "1", pages = "1--2", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2682621", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The programming languages (PL) research community has traditionally catered to the needs of professional programmers in the continuously evolving technical industry. However, there is a new opportunity that knocks our doors. The recent IT revolution has resulted in the masses having access to personal computing devices. More than 99\% of these computer users are non-programmers and are today limited to being passive consumers of the software that is made available to them. Can we empower these users to more effectively leverage computers for their daily tasks? The formalisms, techniques, and tools developed in the PL and the formal methods research communities can play a pivotal role!", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Mellies:2015:FTR, author = "Paul-Andr{\'e} Melli{\`e}s and Noam Zeilberger", title = "Functors are Type Refinement Systems", journal = j-SIGPLAN, volume = "50", number = "1", pages = "3--16", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676970", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The standard reading of type theory through the lens of category theory is based on the idea of viewing a type system as a category of well-typed terms. We propose a basic revision of this reading: rather than interpreting type systems as categories, we describe them as functors from a category of typing derivations to a category of underlying terms. Then, turning this around, we explain how in fact any functor gives rise to a generalized type system, with an abstract notion of typing judgment, typing derivations and typing rules. This leads to a purely categorical reformulation of various natural classes of type systems as natural classes of functors. The main purpose of this paper is to describe the general framework (which can also be seen as providing a categorical analysis of refinement types ), and to present a few applications. As a larger case study, we revisit Reynolds' paper on ``The Meaning of Types'' (2000), showing how the paper's main results may be reconstructed along these lines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Krishnaswami:2015:ILD, author = "Neelakantan R. Krishnaswami and Pierre Pradic and Nick Benton", title = "Integrating Linear and Dependent Types", journal = j-SIGPLAN, volume = "50", number = "1", pages = "17--30", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676969", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we show how to integrate linear types with type dependency, by extending the linear/non-linear calculus of Benton to support type dependency. Next, we give an application of this calculus by giving a proof-theoretic account of imperative programming, which requires extending the calculus with computationally irrelevant quantification, proof irrelevance, and a monad of computations. We show the soundness of our theory by giving a realizability model in the style of Nuprl, which permits us to validate not only the beta-laws for each type, but also the eta-laws. These extensions permit us to decompose Hoare triples into a collection of simpler type-theoretic connectives, yielding a rich equational theory for dependently-typed higher-order imperative programs. Furthermore, both the type theory and its model are relatively simple, even when all of the extensions are considered.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Sojakova:2015:HIT, author = "Kristina Sojakova", title = "Higher Inductive Types as Homotopy-Initial Algebras", journal = j-SIGPLAN, volume = "50", number = "1", pages = "31--42", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676983", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Homotopy Type Theory is a new field of mathematics based on the recently-discovered correspondence between Martin-L{\"o}f's constructive type theory and abstract homotopy theory. We have a powerful interplay between these disciplines --- we can use geometric intuition to formulate new concepts in type theory and, conversely, use type-theoretic machinery to verify and often simplify existing mathematical proofs. Higher inductive types form a crucial part of this new system since they allow us to represent mathematical objects, such as spheres, tori, pushouts, and quotients, in the type theory. We investigate a class of higher inductive types called W-suspensions which generalize Martin-L{\"o}f's well-founded trees. We show that a propositional variant of W-suspensions, whose computational behavior is determined up to a higher path, is characterized by the universal property of being a homotopy-initial algebra. As a corollary we get that W-suspensions in the strict form are homotopy-initial.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Ngo:2015:RES, author = "Minh Ngo and Fabio Massacci and Dimiter Milushev and Frank Piessens", title = "Runtime Enforcement of Security Policies on Black Box Reactive Programs", journal = j-SIGPLAN, volume = "50", number = "1", pages = "43--54", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676978", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Security enforcement mechanisms like execution monitors are used to make sure that some untrusted program complies with a policy. Different enforcement mechanisms have different strengths and weaknesses and hence it is important to understand the qualities of various enforcement mechanisms. This paper studies runtime enforcement mechanisms for reactive programs. We study the impact of two important constraints that many practical enforcement mechanisms satisfy: (1) the enforcement mechanism must handle each input/output event in finite time and on occurrence of the event (as opposed to for instance Ligatti's edit automata that have the power to buffer events for an arbitrary amount of time), and (2) the enforcement mechanism treats the untrusted program as a black box: it can monitor and/or edit the input/output events that the program exhibits on execution and it can explore alternative executions of the program by running additional copies of the program and providing these different inputs. It can not inspect the source or machine code of the untrusted program. Such enforcement mechanisms are important in practice: they include for instance many execution monitors, virtual machine monitors, and secure multi-execution or shadow executions. We establish upper and lower bounds for the class of policies that are enforceable by such black box mechanisms, and we propose a generic enforcement mechanism that works for a wide range of policies. We also show how our generic enforcement mechanism can be instantiated to enforce specific classes of policies, at the same time showing that many existing enforcement mechanisms are optimized instances of our construction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Barthe:2015:HOA, author = "Gilles Barthe and Marco Gaboardi and Emilio Jes{\'u}s Gallego Arias and Justin Hsu and Aaron Roth and Pierre-Yves Strub", title = "Higher-Order Approximate Relational Refinement Types for Mechanism Design and Differential Privacy", journal = j-SIGPLAN, volume = "50", number = "1", pages = "55--68", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677000", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mechanism design is the study of algorithm design where the inputs to the algorithm are controlled by strategic agents, who must be incentivized to faithfully report them. Unlike typical programmatic properties, it is not sufficient for algorithms to merely satisfy the property, incentive properties are only useful if the strategic agents also believe this fact. Verification is an attractive way to convince agents that the incentive properties actually hold, but mechanism design poses several unique challenges: interesting properties can be sophisticated relational properties of probabilistic computations involving expected values, and mechanisms may rely on other probabilistic properties, like differential privacy, to achieve their goals. We introduce a relational refinement type system, called HOARe2, for verifying mechanism design and differential privacy. We show that HOARe2 is sound w.r.t. a denotational semantics, and correctly models (epsilon,delta)-differential privacy; moreover, we show that it subsumes DFuzz, an existing linear dependent type system for differential privacy. Finally, we develop an SMT-based implementation of HOARe2 and use it to verify challenging examples of mechanism design, including auctions and aggregative games, and new proposed examples from differential privacy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Ebadi:2015:DPN, author = "Hamid Ebadi and David Sands and Gerardo Schneider", title = "Differential Privacy: Now it's Getting Personal", journal = j-SIGPLAN, volume = "50", number = "1", pages = "69--81", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677005", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Differential privacy provides a way to get useful information about sensitive data without revealing much about any one individual. It enjoys many nice compositionality properties not shared by other approaches to privacy, including, in particular, robustness against side-knowledge. Designing differentially private mechanisms from scratch can be a challenging task. One way to make it easier to construct new differential private mechanisms is to design a system which allows more complex mechanisms (programs) to be built from differentially private building blocks in principled way, so that the resulting programs are guaranteed to be differentially private by construction. This paper is about a new accounting principle for building differentially private programs. It is based on a simple generalisation of classic differential privacy which we call Personalised Differential Privacy (PDP). In PDP each individual has its own personal privacy level. We describe ProPer, a interactive system for implementing PDP which maintains a privacy budget for each individual. When a primitive query is made on data derived from individuals, the provenance of the involved records determines how the privacy budget of an individual is affected: the number of records derived from Alice determines the multiplier for the privacy decrease in Alice's budget. This offers some advantages over previous systems, in particular its fine-grained character allows better utilisation of the privacy budget than mechanisms based purely on the concept of global sensitivity, and it applies naturally to the case of a live database where new individuals are added over time. We provide a formal model of the ProPer approach, prove that it provides personalised differential privacy, and describe a prototype implementation based on McSherry's PINQ system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Tang:2015:SBC, author = "Hao Tang and Xiaoyin Wang and Lingming Zhang and Bing Xie and Lu Zhang and Hong Mei", title = "Summary-Based Context-Sensitive Data-Dependence Analysis in Presence of Callbacks", journal = j-SIGPLAN, volume = "50", number = "1", pages = "83--95", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676997", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Building a summary for library code is a common approach to speeding up the analysis of client code. In presence of callbacks, some reachability relationships between library nodes cannot be obtained during library-code summarization. Thus, the library code may have to be analyzed again during the analysis of the client code with the library summary. In this paper, we propose to summarize library code with tree-adjoining-language (TAL) reachability. Compared with the summary built with context-free-language (CFL) reachability, the summary built with TAL reachability further contains conditional reachability relationships. The conditional reachability relationships can lead to much lighter analysis of the library code during the client code analysis with the TAL-reachability-based library summary. We also performed an experimental comparison of context-sensitive data-dependence analysis with the TAL-reachability-based library summary and context-sensitive data-dependence analysis with the CFL-reachability-based library summary using 15 benchmark subjects. Our experimental results demonstrate that the former has an 8X speed-up over the latter on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Chatterjee:2015:FAA, author = "Krishnendu Chatterjee and Rasmus Ibsen-Jensen and Andreas Pavlogiannis and Prateesh Goyal", title = "Faster Algorithms for Algebraic Path Properties in Recursive State Machines with Constant Treewidth", journal = j-SIGPLAN, volume = "50", number = "1", pages = "97--109", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676979", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interprocedural analysis is at the heart of numerous applications in programming languages, such as alias analysis, constant propagation, etc. Recursive state machines (RSMs) are standard models for interprocedural analysis. We consider a general framework with RSMs where the transitions are labeled from a semiring, and path properties are algebraic with semiring operations. RSMs with algebraic path properties can model interprocedural dataflow analysis problems, the shortest path problem, the most probable path problem, etc. The traditional algorithms for interprocedural analysis focus on path properties where the starting point is fixed as the entry point of a specific method. In this work, we consider possible multiple queries as required in many applications such as in alias analysis. The study of multiple queries allows us to bring in a very important algorithmic distinction between the resource usage of the one-time preprocessing vs for each individual query. The second aspect that we consider is that the control flow graphs for most programs have constant treewidth. Our main contributions are simple and implementable algorithms that support multiple queries for algebraic path properties for RSMs that have constant treewidth. Our theoretical results show that our algorithms have small additional one-time preprocessing, but can answer subsequent queries significantly faster as compared to the current best-known solutions for several important problems, such as interprocedural reachability and shortest path. We provide a prototype implementation for interprocedural reachability and intraprocedural shortest path that gives a significant speed-up on several benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Raychev:2015:PPP, author = "Veselin Raychev and Martin Vechev and Andreas Krause", title = "Predicting Program Properties from {``Big Code''}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "111--124", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677009", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new approach for predicting program properties from massive codebases (aka ``Big Code''). Our approach first learns a probabilistic model from existing data and then uses this model to predict properties of new, unseen programs. The key idea of our work is to transform the input program into a representation which allows us to phrase the problem of inferring program properties as structured prediction in machine learning. This formulation enables us to leverage powerful probabilistic graphical models such as conditional random fields (CRFs) in order to perform joint prediction of program properties. As an example of our approach, we built a scalable prediction engine called JSNice for solving two kinds of problems in the context of JavaScript: predicting (syntactic) names of identifiers and predicting (semantic) type annotations of variables. Experimentally, JSNice predicts correct names for 63\% of name identifiers and its type annotation predictions are correct in 81\% of the cases. In the first week since its release, JSNice was used by more than 30,000 developers and in only few months has become a popular tool in the JavaScript developer community. By formulating the problem of inferring program properties as structured prediction and showing how to perform both learning and inference in this context, our work opens up new possibilities for attacking a wide range of difficult problems in the context of ``Big Code'' including invariant generation, decompilation, synthesis and others.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Alur:2015:DDL, author = "Rajeev Alur and Loris D'Antoni and Mukund Raghothaman", title = "{DReX}: a Declarative Language for Efficiently Evaluating Regular String Transformations", journal = j-SIGPLAN, volume = "50", number = "1", pages = "125--137", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676981", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "We present DReX, a declarative language that can express all regular string-to-string transformations, and can still be efficiently evaluated. The class of regular string transformations has a robust theoretical foundation including multiple characterizations, closure properties, and decidable analysis questions, and admits a number of string operations such as insertion, deletion, substring swap, and reversal. Recent research has led to a characterization of regular string transformations using a primitive set of function combinators analogous to the definition of regular languages using regular expressions. While these combinators form the basis for the language DReX proposed in this paper, our main technical focus is on the complexity of evaluating the output of a DReX program on a given input string. It turns out that the natural evaluation algorithm involves dynamic programming, leading to complexity that is cubic in the length of the input string. Our main contribution is identifying a consistency restriction on the use of combinators in DReX programs, and a single-pass evaluation algorithm for consistent programs with time complexity that is linear in the length of the input string and polynomial in the size of the program. We show that the consistency restriction does not limit the expressiveness, and whether a DReX program is consistent can be checked efficiently. We report on a prototype implementation, and evaluate it using a representative set of text processing tasks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Veanes:2015:DPS, author = "Margus Veanes and Todd Mytkowicz and David Molnar and Benjamin Livshits", title = "Data-Parallel String-Manipulating Programs", journal = j-SIGPLAN, volume = "50", number = "1", pages = "139--152", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677014", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "String-manipulating programs are an important class of programs with applications in malware detection, graphics, input sanitization for Web security, and large-scale HTML processing. This paper extends prior work on BEK, an expressive domain-specific language for writing string-manipulating programs, with algorithmic insights that make BEK both analyzable and data-parallel. By analyzable we mean that unlike most general purpose programming languages, many algebraic properties of a BEK program are decidable (i.e., one can check whether two programs commute or compute the inverse of a program). By data-parallel we mean that a BEK program can compute on arbitrary subsections of its input in parallel, thus exploiting parallel hardware. This latter requirement is particularly important for programs which operate on large data: without data parallelism, a programmer cannot hide the latency of reading data from various storage media (i.e., reading a terabyte of data from a modern hard drive takes about 3 hours). With a data-parallel approach, the system can split data across multiple disks and thus hide the latency of reading the data. A BEK program is expressive: a programmer can use conditionals, switch statements, and registers --- or local variables --- in order to implement common string-manipulating programs. Unfortunately, this expressivity induces data dependencies, which are an obstacle to parallelism. The key contribution of this paper is an algorithm which automatically removes these data dependencies by mapping a BEK program into a intermediate format consisting of symbolic transducers, which extend classical transducers with symbolic predicates and symbolic assignments. We present a novel algorithm that we call exploration which performs symbolic loop unrolling of these transducers to obtain simplified versions of the original program. We show how these simplified versions can then be lifted to a stateless form, and from there compiled to data-parallel hardware. To evaluate the efficacy of our approach, we demonstrate up to 8x speedups for a number of real-world, BEK programs, (e.g., HTML encoder and decoder) on data-parallel hardware. To the best of our knowledge, these are the first data parallel implementation of these programs. To validate that our approach is correct, we use an automatic testing technique to compare our generated code to the original implementations and find no semantic deviations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Chlipala:2015:UWS, author = "Adam Chlipala", title = "{Ur\slash Web}: a Simple Model for Programming the {Web}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "153--165", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677004", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The World Wide Web has evolved gradually from a document delivery platform to an architecture for distributed programming. This largely unplanned evolution is apparent in the set of interconnected languages and protocols that any Web application must manage. This paper presents Ur/Web, a domain-specific, statically typed functional programming language with a much simpler model for programming modern Web applications. Ur/Web's model is unified, where programs in a single programming language are compiled to other ``Web standards'' languages as needed; supports novel kinds of encapsulation of Web-specific state; and exposes simple concurrency, where programmers can reason about distributed, multithreaded applications via a mix of transactions and cooperative preemption. We give a tutorial introduction to the main features of Ur/Web and discuss the language implementation and the production Web applications that use it.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Rastogi:2015:SEG, author = "Aseem Rastogi and Nikhil Swamy and C{\'e}dric Fournet and Gavin Bierman and Panagiotis Vekris", title = "Safe \& Efficient Gradual Typing for {TypeScript}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "167--180", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676971", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Current proposals for adding gradual typing to JavaScript, such as Closure, TypeScript and Dart, forgo soundness to deal with issues of scale, code reuse, and popular programming patterns. We show how to address these issues in practice while retaining soundness. We design and implement a new gradual type system, prototyped for expediency as a 'Safe' compilation mode for TypeScript. Our compiler achieves soundness by enforcing stricter static checks and embedding residual runtime checks in compiled code. It emits plain JavaScript that runs on stock virtual machines. Our main theorem is a simulation that ensures that the checks introduced by Safe TypeScript (1) catch any dynamic type error, and (2) do not alter the semantics of type-safe TypeScript code. Safe TypeScript is carefully designed to minimize the performance overhead of runtime checks. At its core, we rely on two new ideas: differential subtyping, a new form of coercive subtyping that computes the minimum amount of runtime type information that must be added to each object; and an erasure modality, which we use to safely and selectively erase type information. This allows us to scale our design to full-fledged TypeScript, including arrays, maps, classes, inheritance, overloading, and generic types. We validate the usability and performance of Safe TypeScript empirically by type-checking and compiling around 120,000 lines of existing TypeScript source code. Although runtime checks can be expensive, the end-to-end overhead is small for code bases that already have type annotations. For instance, we bootstrap the Safe TypeScript compiler (90,000 lines including the base TypeScript compiler): we measure a 15\% runtime overhead for type safety, and also uncover programming errors as type safety violations. We conclude that, at least during development and testing, subjecting JavaScript/TypeScript programs to safe gradual typing adds significant value to source type annotations at a modest cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Greenberg:2015:SEM, author = "Michael Greenberg", title = "Space-Efficient Manifest Contracts", journal = j-SIGPLAN, volume = "50", number = "1", pages = "181--194", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676967", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The standard algorithm for higher-order contract checking can lead to unbounded space consumption and can destroy tail recursion, altering a program's asymptotic space complexity. While space efficiency for gradual types---contracts mediating untyped and typed code---is well studied, sound space efficiency for manifest contracts---contracts that check stronger properties than simple types, e.g., ``is a natural'' instead of ''is an integer''---remains an open problem. We show how to achieve sound space efficiency for manifest contracts with strong predicate contracts. The essential trick is breaking the contract checking down into coercions: structured, blame-annotated lists of checks. By carefully preventing duplicate coercions from appearing, we can restore space efficiency while keeping the same observable behavior.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Sekiyama:2015:MCD, author = "Taro Sekiyama and Yuki Nishida and Atsushi Igarashi", title = "Manifest Contracts for Datatypes", journal = j-SIGPLAN, volume = "50", number = "1", pages = "195--207", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676996", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study algebraic data types in a manifest contract system, a software contract system where contract information occurs as refinement types. We first compare two simple approaches: refinements on type constructors and refinements on data constructors. For example, lists of positive integers can be described by {l:int list | for_all (lambda y. y > 0) l} in the former, whereas by a user-defined datatype pos_list with cons of type {x:int | x > 0} X pos_list->pos_list in the latter. The two approaches are complementary: the former makes it easier for a programmer to write types and the latter enables more efficient contract checking. To take the best of both worlds, we propose (1) a syntactic translation from refinements on type constructors to equivalent refinements on data constructors and (2) dynamically checked casts between different but compatible datatypes such as int list and pos_list. We define a manifest contract calculus to formalize the semantics of the casts and prove that the translation is correct.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Vafeiadis:2015:CCO, author = "Viktor Vafeiadis and Thibaut Balabonski and Soham Chakraborty and Robin Morisset and Francesco Zappa Nardelli", title = "Common Compiler Optimisations are Invalid in the {C11} Memory Model and what we can do about it", journal = j-SIGPLAN, volume = "50", number = "1", pages = "209--220", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676995", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show that the weak memory model introduced by the 2011 C and C++ standards does not permit many common source-to-source program transformations (such as expression linearisation and ``roach motel'' reorderings) that modern compilers perform and that are deemed to be correct. As such it cannot be used to define the semantics of intermediate languages of compilers, as, for instance, LLVM aimed to. We consider a number of possible local fixes, some strengthening and some weakening the model. We evaluate the proposed fixes by determining which program transformations are valid with respect to each of the patched models. We provide formal Coq proofs of their correctness or counterexamples as appropriate.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Lange:2015:CMG, author = "Julien Lange and Emilio Tuosto and Nobuko Yoshida", title = "From Communicating Machines to Graphical Choreographies", journal = j-SIGPLAN, volume = "50", number = "1", pages = "221--232", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676964", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphical choreographies, or global graphs, are general multiparty session specifications featuring expressive constructs such as forking, merging, and joining for representing application-level protocols. Global graphs can be directly translated into modelling notations such as BPMN and UML. This paper presents an algorithm whereby a global graph can be constructed from asynchronous interactions represented by communicating finite-state machines (CFSMs). Our results include: a sound and complete characterisation of a subset of safe CFSMs from which global graphs can be constructed; an algorithm to translate CFSMs to global graphs; a time complexity analysis; and an implementation of our theory, as well as an experimental evaluation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Dodds:2015:SCT, author = "Mike Dodds and Andreas Haas and Christoph M. Kirsch", title = "A Scalable, Correct Time-Stamped Stack", journal = j-SIGPLAN, volume = "50", number = "1", pages = "233--246", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676963", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent data-structures, such as stacks, queues, and deques, often implicitly enforce a total order over elements in their underlying memory layout. However, much of this order is unnecessary: linearizability only requires that elements are ordered if the insert methods ran in sequence. We propose a new approach which uses timestamping to avoid unnecessary ordering. Pairs of elements can be left unordered if their associated insert operations ran concurrently, and order imposed as necessary at the eventual removal. We realise our approach in a new non-blocking data-structure, the TS (timestamped) stack. Using the same approach, we can define corresponding queue and deque data-structures. In experiments on x86, the TS stack outperforms and outscales all its competitors --- for example, it outperforms the elimination-backoff stack by factor of two. In our approach, more concurrency translates into less ordering, giving less-contended removal and thus higher performance and scalability. Despite this, the TS stack is linearizable with respect to stack semantics. The weak internal ordering in the TS stack presents a challenge when establishing linearizability: standard techniques such as linearization points work well when there exists a total internal order. We present a new stack theorem, mechanised in Isabelle, which characterises the orderings sufficient to establish stack semantics. By applying our stack theorem, we show that the TS stack is indeed linearizable. Our theorem constitutes a new, generic proof technique for concurrent stacks, and it paves the way for future weakly ordered data-structure designs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Jourdan:2015:FVC, author = "Jacques-Henri Jourdan and Vincent Laporte and Sandrine Blazy and Xavier Leroy and David Pichardie", title = "A Formally-Verified {C} Static Analyzer", journal = j-SIGPLAN, volume = "50", number = "1", pages = "247--259", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper reports on the design and soundness proof, using the Coq proof assistant, of Verasco, a static analyzer based on abstract interpretation for most of the ISO C 1999 language (excluding recursion and dynamic allocation). Verasco establishes the absence of run-time errors in the analyzed programs. It enjoys a modular architecture that supports the extensible combination of multiple abstract domains, both relational and non-relational. Verasco integrates with the CompCert formally-verified C compiler so that not only the soundness of the analysis results is guaranteed with mathematical certitude, but also the fact that these guarantees carry over to the compiled code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Giacobazzi:2015:APA, author = "Roberto Giacobazzi and Francesco Logozzo and Francesco Ranzato", title = "Analyzing Program Analyses", journal = j-SIGPLAN, volume = "50", number = "1", pages = "261--273", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676987", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We want to prove that a static analysis of a given program is complete, namely, no imprecision arises when asking some query on the program behavior in the concrete (ie, for its concrete semantics) or in the abstract (ie, for its abstract interpretation). Completeness proofs are therefore useful to assign confidence to alarms raised by static analyses. We introduce the completeness class of an abstraction as the set of all programs for which the abstraction is complete. Our first result shows that for any nontrivial abstraction, its completeness class is not recursively enumerable. We then introduce a stratified deductive system to prove the completeness of program analyses over an abstract domain A. We prove the soundness of the deductive system. We observe that the only sources of incompleteness are assignments and Boolean tests --- unlikely a common belief in static analysis, joins do not induce incompleteness. The first layer of this proof system is generic, abstraction-agnostic, and it deals with the standard constructs for program composition, that is, sequential composition, branching and guarded iteration. The second layer is instead abstraction-specific: the designer of an abstract domain A provides conditions for completeness in A of assignments and Boolean tests which have to be checked by a suitable static analysis or assumed in the completeness proof as hypotheses. We instantiate the second layer of this proof system first with a generic nonrelational abstraction in order to provide a sound rule for the completeness of assignments. Orthogonally, we instantiate it to the numerical abstract domains of Intervals and Octagons, providing necessary and sufficient conditions for the completeness of their Boolean tests and of assignments for Octagons.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Stewart:2015:CC, author = "Gordon Stewart and Lennart Beringer and Santiago Cuellar and Andrew W. Appel", title = "Compositional {CompCert}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "275--287", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676985", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper reports on the development of Compositional CompCert, the first verified separate compiler for C. Specifying and proving separate compilation for C is made challenging by the coincidence of: compiler optimizations, such as register spilling, that introduce compiler-managed (private) memory regions into function stack frames, and C's stack-allocated addressable local variables, which may leak portions of stack frames to other modules when their addresses are passed as arguments to external function calls. The CompCert compiler, as built/proved by Leroy etal 2006--2014, has proofs of correctness for whole programs, but its simulation relations are too weak to specify or prove separately compiled modules. Our technical contributions that make Compositional CompCert possible include: language-independent linking, a new operational model of multilanguage linking that supports strong semantic contextual equivalences; and structured simulations, a refinement of Beringer etal logical simulation relations that enables expressive module-local invariants on the state communicated between compilation units at runtime. All the results in the paper have been formalized in Coq and are available for download together with the Compositional CompCert compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Castagna:2015:PFS, author = "Giuseppe Castagna and Kim Nguyen and Zhiwu Xu and Pietro Abate", title = "Polymorphic Functions with Set-Theoretic Types: {Part 2}: Local Type Inference and Type Reconstruction", journal = j-SIGPLAN, volume = "50", number = "1", pages = "289--302", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676991", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This article is the second part of a two articles series about the definition of higher order polymorphic functions in a type system with recursive types and set-theoretic type connectives (unions, intersections, and negations). In the first part, presented in a companion paper, we defined and studied the syntax, semantics, and evaluation of the explicitly-typed version of a calculus, in which type instantiation is driven by explicit instantiation annotations. In this second part we present a local type inference system that allows the programmer to omit explicit instantiation annotations for function applications, and a type reconstruction system that allows the programmer to omit explicit type annotations for function definitions. The work presented in the two articles provides the theoretical foundations and technical machinery needed to design and implement higher-order polymorphic functional languages with union and intersection types and/or for semi-structured data processing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Garcia:2015:PTS, author = "Ronald Garcia and Matteo Cimini", title = "Principal Type Schemes for Gradual Programs", journal = j-SIGPLAN, volume = "50", number = "1", pages = "303--315", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676992", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Gradual typing is a discipline for integrating dynamic checking into a static type system. Since its introduction in functional languages, it has been adapted to a variety of type systems, including object-oriented, security, and substructural. This work studies its application to implicitly typed languages based on type inference. Siek and Vachharajani designed a gradual type inference system and algorithm that infers gradual types but still rejects ill-typed static programs. However, the type system requires local reasoning about type substitutions, an imperative inference algorithm, and a subtle correctness statement. This paper introduces a new approach to gradual type inference, driven by the principle that gradual inference should only produce static types. We present a static implicitly typed language, its gradual counterpart, and a type inference procedure. The gradual system types the same programs as Siek and Vachharajani, but has a modular structure amenable to extension. The language admits let-polymorphism, and its dynamics are defined by translation to the Polymorphic Blame Calculus. The principal types produced by our initial type system mask the distinction between static parametric polymorphism and polymorphism that can be attributed to gradual typing. To expose this difference, we distinguish static type parameters from gradual type parameters and reinterpret gradual type consistency accordingly. The resulting extension enables programs to be interpreted using either the polymorphic or monomorphic Blame Calculi.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Lourenco:2015:DIF, author = "Lu{\'\i}sa Louren{\c{c}}o and Lu{\'\i}s Caires", title = "Dependent Information Flow Types", journal = j-SIGPLAN, volume = "50", number = "1", pages = "317--328", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676994", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we develop a novel notion of dependent information flow types. Dependent information flow types fit within the standard framework of dependent type theory, but, unlike usual dependent types, crucially allow the security level of a type, rather than just the structural data type itself, to depend on runtime values. Our dependent function and dependent sum information flow types provide a direct, natural and elegant way to express and enforce fine grained security policies on programs, including programs that manipulate structured data types in which the security level of a structure field may depend on values dynamically stored in other fields, still considered a challenge to security enforcement in software systems such as data-centric web-based applications. We base our development on the very general setting of a minimal lambda-calculus with references and collections. We illustrate its expressiveness, showing how secure operations on relevant scenarios can be modelled and analysed using our dependent information flow type system, which is also shown to be amenable to algorithmic type checking. Our main results include type-safety and non-interference theorems ensuring that well-typed programs do not violate prescribed security policies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Preda:2015:ASA, author = "Mila Dalla Preda and Roberto Giacobazzi and Arun Lakhotia and Isabella Mastroeni", title = "Abstract Symbolic Automata: Mixed syntactic\slash semantic similarity analysis of executables", journal = j-SIGPLAN, volume = "50", number = "1", pages = "329--341", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676986", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a model for mixed syntactic/semantic approximation of programs based on symbolic finite automata (SFA). The edges of SFA are labeled by predicates whose semantics specifies the denotations that are allowed by the edge. We introduce the notion of abstract symbolic finite automaton (ASFA) where approximation is made by abstract interpretation of symbolic finite automata, acting both at syntactic (predicate) and semantic (denotation) level. We investigate in the details how the syntactic and semantic abstractions of SFA relate to each other and contribute to the determination of the recognized language. Then we introduce a family of transformations for simplifying ASFA. We apply this model to prove properties of commonly used tools for similarity analysis of binary executables. Following the structure of their control flow graphs, disassembled binary executables are represented as (concrete) SFA, where states are program points and predicates represent the (possibly infinite) I/O semantics of each basic block in a constraint form. Known tools for binary code analysis are viewed as specific choices of symbolic and semantic abstractions in our framework, making symbolic finite automata and their abstract interpretations a unifying model for comparing and reasoning about soundness and completeness of analyses of low-level code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Foster:2015:CDP, author = "Nate Foster and Dexter Kozen and Matthew Milano and Alexandra Silva and Laure Thompson", title = "A Coalgebraic Decision Procedure for {NetKAT}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "343--355", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677011", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "NetKAT is a domain-specific language and logic for specifying and verifying network packet-processing functions. It consists of Kleene algebra with tests (KAT) augmented with primitives for testing and modifying packet headers and encoding network topologies. Previous work developed the design of the language and its standard semantics, proved the soundness and completeness of the logic, defined a PSPACE algorithm for deciding equivalence, and presented several practical applications. This paper develops the coalgebraic theory of NetKAT, including a specialized version of the Brzozowski derivative, and presents a new efficient algorithm for deciding the equational theory using bisimulation. The coalgebraic structure admits an efficient sparse representation that results in a significant reduction in the size of the state space. We discuss the details of our implementation and optimizations that exploit NetKAT's equational axioms and coalgebraic structure to yield significantly improved performance. We present results from experiments demonstrating that our tool is competitive with state-of-the-art tools on several benchmarks including all-pairs connectivity, loop-freedom, and translation validation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Pous:2015:SAL, author = "Damien Pous", title = "Symbolic Algorithms for Language Equivalence and {Kleene} Algebra with Tests", journal = j-SIGPLAN, volume = "50", number = "1", pages = "357--368", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677007", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose algorithms for checking language equivalence of finite automata over a large alphabet. We use symbolic automata, where the transition function is compactly represented using (multi-terminal) binary decision diagrams (BDD). The key idea consists in computing a bisimulation by exploring reachable pairs symbolically, so as to avoid redundancies. This idea can be combined with already existing optimisations, and we show in particular a nice integration with the disjoint sets forest data-structure from Hopcroft and Karp's standard algorithm. Then we consider Kleene algebra with tests (KAT), an algebraic theory that can be used for verification in various domains ranging from compiler optimisation to network programming analysis. This theory is decidable by reduction to language equivalence of automata on guarded strings, a particular kind of automata that have exponentially large alphabets. We propose several methods allowing to construct symbolic automata out of KAT expressions, based either on Brzozowski's derivatives or on standard automata constructions. All in all, this results in efficient algorithms for deciding equivalence of KAT expressions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Sjoberg:2015:PC, author = "Vilhelm Sj{\"o}berg and Stephanie Weirich", title = "Programming up to Congruence", journal = j-SIGPLAN, volume = "50", number = "1", pages = "369--382", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676974", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents the design of Zombie, a dependently-typed programming language that uses an adaptation of a congruence closure algorithm for proof and type inference. This algorithm allows the type checker to automatically use equality assumptions from the context when reasoning about equality. Most dependently-typed languages automatically use equalities that follow from beta-reduction during type checking; however, such reasoning is incompatible with congruence closure. In contrast, Zombie does not use automatic beta-reduction because types may contain potentially diverging terms. Therefore Zombie provides a unique opportunity to explore an alternative definition of equivalence in dependently-typed language design. Our work includes the specification of the language via a bidirectional type system, which works `up-to-congruence,' and an algorithm for elaborating expressions in this language to an explicitly typed core language. We prove that our elaboration algorithm is complete with respect to the source type system, and always produces well typed terms in the core language. This algorithm has been implemented in the Zombie language, which includes general recursion, irrelevant arguments, heterogeneous equality and datatypes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Tobisawa:2015:MLC, author = "Kazunori Tobisawa", title = "A Meta Lambda Calculus with Cross-Level Computation", journal = j-SIGPLAN, volume = "50", number = "1", pages = "383--393", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676976", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose meta lambda calculus Lambda-* as a basic model of textual substitution via metavariables. The most important feature of the calculus is that every beta-redex can be reduced regardless of whether the beta-redex contains meta-level variables or not. Such a meta lambda calculus has never been achieved before due to difficulty to manage binding structure consistently with alpha-renaming in the presence of meta-level variables. We overcome the difficulty by introducing a new mechanism to deal with substitution and binding structure in a systematic way without the notion of free variables and alpha-renaming. Calculus Lambda-* enables us to investigate cross-level terms that include a certain type of level mismatch. Cross-level terms have been regarded as meaningless terms and left out of consideration thus far. We find that some cross-level terms behave as quotes and `eval' command in programming languages. With these terms, we show a procedural language as an application of the calculus, which sheds new light on the notions of stores and recursion via meta-level variables.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Staton:2015:AEL, author = "Sam Staton", title = "Algebraic Effects, Linearity, and Quantum Programming Languages", journal = j-SIGPLAN, volume = "50", number = "1", pages = "395--406", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop a new framework of algebraic theories with linear parameters, and use it to analyze the equational reasoning principles of quantum computing and quantum programming languages. We use the framework as follows: we present a new elementary algebraic theory of quantum computation, built from unitary gates and measurement; we provide a completeness theorem or the elementary algebraic theory by relating it with a model from operator algebra; we extract an equational theory for a quantum programming language from the algebraic theory; we compare quantum computation with other local notions of computation by investigating variations on the algebraic theory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Farzan:2015:PSU, author = "Azadeh Farzan and Zachary Kincaid and Andreas Podelski", title = "Proof Spaces for Unbounded Parallelism", journal = j-SIGPLAN, volume = "50", number = "1", pages = "407--420", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677012", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we present a new approach to automatically verify multi-threaded programs which are executed by an unbounded number of threads running in parallel. The starting point for our work is the problem of how we can leverage existing automated verification technology for sequential programs (abstract interpretation, Craig interpolation, constraint solving, etc.) for multi-threaded programs. Suppose that we are given a correctness proof for a trace of a program (or for some other program fragment). We observe that the proof can always be decomposed into a finite set of Hoare triples, and we ask what can be proved from the finite set of Hoare triples using only simple combinatorial inference rules (without access to a theorem prover and without the possibility to infer genuinely new Hoare triples)? We introduce a proof system where one proves the correctness of a multi-threaded program by showing that for each trace of the program, there exists a correctness proof in the space of proofs that are derivable from a finite set of axioms using simple combinatorial inference rules. This proof system is complete with respect to the classical proof method of establishing an inductive invariant (which uses thread quantification and control predicates). Moreover, it is possible to algorithmically check whether a given set of axioms is sufficient to prove the correctness of a multi-threaded program, using ideas from well-structured transition systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Sangiorgi:2015:ECU, author = "Davide Sangiorgi", title = "Equations, Contractions, and Unique Solutions", journal = j-SIGPLAN, volume = "50", number = "1", pages = "421--432", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676965", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "One of the most studied behavioural equivalences is bisimilarity. Its success is much due to the associated bisimulation proof method, which can be further enhanced by means of ``up-to bisimulation'' techniques such as ``up-to context''. A different proof method is discussed, based on unique solution of special forms of inequations called contractions, and inspired by Milner's theorem on unique solution of equations. The method is as powerful as the bisimulation proof method and its ``up-to context'' enhancements. The definition of contraction can be transferred onto other behavioural equivalences, possibly contextual and noncoinductive. This enables a coinductive reasoning style on such equivalences, either by applying the method based on unique solution of contractions, or by injecting appropriate contraction preorders into the bisimulation game. The techniques are illustrated on CCS-like languages; an example dealing with higher-order languages is also shown.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Gupta:2015:SRC, author = "Ashutosh Gupta and Thomas A. Henzinger and Arjun Radhakrishna and Roopsha Samanta and Thorsten Tarrach", title = "Succinct Representation of Concurrent Trace Sets", journal = j-SIGPLAN, volume = "50", number = "1", pages = "433--444", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677008", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a method and a tool for generating succinct representations of sets of concurrent traces. We focus on trace sets that contain all correct or all incorrect permutations of events from a given trace. We represent trace sets as HB-Formulas that are Boolean combinations of happens-before constraints between events. To generate a representation of incorrect interleavings, our method iteratively explores interleavings that violate the specification and gathers generalizations of the discovered interleavings into an HB-Formula; its complement yields a representation of correct interleavings. We claim that our trace set representations can drive diverse verification, fault localization, repair, and synthesis techniques for concurrent programs. We demonstrate this by using our tool in three case studies involving synchronization synthesis, bug summarization, and abstraction refinement based verification. In each case study, our initial experimental results have been promising. In the first case study, we present an algorithm for inferring missing synchronization from an HB-Formula representing correct interleavings of a given trace. The algorithm applies rules to rewrite specific patterns in the HB-Formula into locks, barriers, and wait-notify constructs. In the second case study, we use an HB-Formula representing incorrect interleavings for bug summarization. While the HB-Formula itself is a concise counterexample summary, we present additional inference rules to help identify specific concurrency bugs such as data races, define-use order violations, and two-stage access bugs. In the final case study, we present a novel predicate learning procedure that uses HB-Formulas representing abstract counterexamples to accelerate counterexample-guided abstraction refinement (CEGAR). In each iteration of the CEGAR loop, the procedure refines the abstraction to eliminate multiple spurious abstract counterexamples drawn from the HB-Formula.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Bogdanas:2015:KJC, author = "Denis Bogdanas and Grigore Rosu", title = "{K-Java}: a Complete Semantics of {Java}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "445--456", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676982", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents K-Java, a complete executable formal semantics of Java 1.4. K-Java was extensively tested with a test suite developed alongside the project, following the Test Driven Development methodology. In order to maintain clarity while handling the great size of Java, the semantics was split into two separate definitions --- a static semantics and a dynamic semantics. The output of the static semantics is a preprocessed Java program, which is passed as input to the dynamic semantics for execution. The preprocessed program is a valid Java program, which uses a subset of the features of Java. The semantics is applied to model-check multi-threaded programs. Both the test suite and the static semantics are generic and ready to be used in other Java-related projects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Adams:2015:TEH, author = "Michael D. Adams", title = "Towards the Essence of Hygiene", journal = j-SIGPLAN, volume = "50", number = "1", pages = "457--469", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677013", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hygiene is an essential aspect of Scheme's macro system that prevents unintended variable capture. However, previous work on hygiene has focused on algorithmic implementation rather than precise, mathematical definition of what constitutes hygiene. This is in stark contrast with lexical scope, alpha-equivalence and capture-avoiding substitution, which also deal with preventing unintended variable capture but have widely applicable and well-understood mathematical definitions. This paper presents such a precise, mathematical definition of hygiene. It reviews various kinds of hygiene violation and presents examples of how they occur. From these examples, we develop a practical algorithm for hygienic macro expansion. We then present algorithm-independent, mathematical criteria for whether a macro expansion algorithm is hygienic. This characterization corresponds closely to existing hygiene algorithms and sheds light on aspects of hygiene that are usually overlooked in informal definitions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Brown:2015:SRG, author = "Matt Brown and Jens Palsberg", title = "Self-Representation in {Girard}'s {System U}", journal = j-SIGPLAN, volume = "50", number = "1", pages = "471--484", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676988", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In 1991, Pfenning and Lee studied whether System F could support a typed self-interpreter. They concluded that typed self-representation for System F ``seems to be impossible'', but were able to represent System F in F$_{ \omega }$. Further, they found that the representation of F$_{ \omega }$ requires kind polymorphism, which is outside F$_{ \omega }$. In 2009, Rendel, Ostermann and Hofer conjectured that the representation of kind-polymorphic terms would require another, higher form of polymorphism. Is this a case of infinite regress? We show that it is not and present a typed self-representation for Girard's System U, the first for a \lambda -calculus with decidable type checking. System U extends System F$_{ \omega }$ with kind polymorphic terms and types. We show that kind polymorphic types (i.e. types that depend on kinds) are sufficient to ``tie the knot'' --- they enable representations of kind polymorphic terms without introducing another form of polymorphism. Our self-representation supports operations that iterate over a term, each of which can be applied to a representation of itself. We present three typed self-applicable operations: a self-interpreter that recovers a term from its representation, a predicate that tests the intensional structure of a term, and a typed continuation-passing-style (CPS) transformation --- the first typed self-applicable CPS transformation. Our techniques could have applications from verifiably type-preserving metaprograms, to growable typed languages, to more efficient self-interpreters.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Lee:2015:CEE, author = "Peter Lee", title = "Coding by Everyone, Every Day", journal = j-SIGPLAN, volume = "50", number = "1", pages = "485--485", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2682622", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In recent years, advances in machine learning and related fields have led to significant advances in a range of user-interface technologies, including audio processing, speech recognition, and natural language processing. These advances in turn have enabled speech-based digital assistants and speech-to-speech translation systems to become practical to deploy on a large scale. In essence, machines are becoming capable of hearing what we are saying. But will they understand what we want them to do when we talk to them? What are the prospects for getting useful work done in essence, by synthesizing programs --- through the act of having a conversation with a computer? In this lecture, I will speculate on the central role that programming-language design and program synthesis may have in this possible --- and I will argue, likely --- future of computing, one in which every user writes programs, every day, by conversing with a computing system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Buneman:2015:DPT, author = "Peter Buneman", title = "Databases and Programming: Two Subjects Divided by a Common Language?", journal = j-SIGPLAN, volume = "50", number = "1", pages = "487--487", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2682620", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The 1990s saw a hugely productive interaction between database and programming language research. Ideas about type systems from programming languages played a central role in generalizing and adapting relational database systems to new data models. At the same time databases provided some of the best concrete examples of the application of concurrency theory and of the benefits of high-level optimization in functional programming languages. One of the driving ambitions behind this research was the idea that database access should be properly embedded in programming languages: one should not have to be bilingual in order to use a database from a programming language; and that goal has to some extent been realized. In the past fifteen years, new data models, both for data storage and for data exchange have appeared with depressing regularity and with each such model, the inevitable query language. Does programming language research have anything to contribute to these new languages? Should we take the time to to worry about embedding these models in conventional languages? Over the same period, some interesting new connections between databases and programming languages have emerged, notably in the areas of scientific databases, annotation and provenance. Will this provide new opportunities for cross-fertilization?", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Fioriti:2015:PTS, author = "Luis Mar{\'\i}a Ferrer Fioriti and Holger Hermanns", title = "Probabilistic Termination: Soundness, Completeness, and Compositionality", journal = j-SIGPLAN, volume = "50", number = "1", pages = "489--501", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677001", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a framework to prove almost sure termination for probabilistic programs with real valued variables. It is based on ranking supermartingales, a notion analogous to ranking functions on non-probabilistic programs. The framework is proven sound and complete for a meaningful class of programs involving randomization and bounded nondeterminism. We complement this foundational insight by a practical proof methodology, based on sound conditions that enable compositional reasoning and are amenable to a direct implementation using modern theorem provers. This is integrated in a small dependent type system, to overcome the problem that lexicographic ranking functions fail when combined with randomization. Among others, this compositional methodology enables the verification of probabilistic programs outside the complete class that admits ranking supermartingales.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{He:2015:LWA, author = "Fei He and Xiaowei Gao and Bow-Yaw Wang and Lijun Zhang", title = "Leveraging Weighted Automata in Compositional Reasoning about Concurrent Probabilistic Systems", journal = j-SIGPLAN, volume = "50", number = "1", pages = "503--514", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676998", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose the first sound and complete learning-based compositional verification technique for probabilistic safety properties on concurrent systems where each component is an Markov decision process. Different from previous works, weighted assumptions are introduced to attain completeness of our framework. Since weighted assumptions can be implicitly represented by multi-terminal binary decision diagrams (MTBDD's), we give an L*-based learning algorithm for MTBDD's to infer weighted assumptions. Experimental results suggest promising outlooks for our compositional technique.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Bonchi:2015:FAS, author = "Filippo Bonchi and Pawel Sobocinski and Fabio Zanasi", title = "Full Abstraction for Signal Flow Graphs", journal = j-SIGPLAN, volume = "50", number = "1", pages = "515--526", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676993", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Network theory uses the string diagrammatic language of monoidal categories to study graphical structures formally, eschewing specialised translations into intermediate formalisms. Recently, there has been a concerted research focus on developing a network theoretic approach to signal flow graphs, which are classical structures in control theory, signal processing and a cornerstone in the study of feedback. In this approach, signal flow graphs are given a relational denotational semantics in terms of formal power series. Thus far, the operational behaviour of such signal flow graphs has only been discussed at an intuitive level. In this paper we equip them with a structural operational semantics. As is typically the case, the purely operational picture is too concrete --- two graphs that are denotationally equal may exhibit different operational behaviour. We classify the ways in which this can occur and show that any graph can be realised --- rewritten, using the graphical theory, into an executable form where the operational behavior and the denotation coincides.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Hinze:2015:CHM, author = "Ralf Hinze and Nicolas Wu and Jeremy Gibbons", title = "Conjugate Hylomorphisms --- Or: The Mother of All Structured Recursion Schemes", journal = j-SIGPLAN, volume = "50", number = "1", pages = "527--538", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676989", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The past decades have witnessed an extensive study of structured recursion schemes. A general scheme is the hylomorphism, which captures the essence of divide-and-conquer: a problem is broken into sub-problems by a coalgebra; sub-problems are solved recursively; the sub-solutions are combined by an algebra to form a solution. In this paper we develop a simple toolbox for assembling recursive coalgebras, which by definition ensure that their hylo equations have unique solutions, whatever the algebra. Our main tool is the conjugate rule, a generic rule parametrized by an adjunction and a conjugate pair of natural transformations. We show that many basic adjunctions induce useful recursion schemes. In fact, almost every structured recursion scheme seems to arise as an instance of the conjugate rule. Further, we adapt our toolbox to the more expressive setting of parametrically recursive coalgebras, where the original input is also passed to the algebra. The formal development is complemented by a series of worked-out examples in Haskell.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Chatterjee:2015:QIA, author = "Krishnendu Chatterjee and Andreas Pavlogiannis and Yaron Velner", title = "Quantitative Interprocedural Analysis", journal = j-SIGPLAN, volume = "50", number = "1", pages = "539--551", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676968", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the quantitative analysis problem for interprocedural control-flow graphs (ICFGs). The input consists of an ICFG, a positive weight function that assigns every transition a positive integer-valued number, and a labelling of the transitions (events) as good, bad, and neutral events. The weight function assigns to each transition a numerical value that represents a measure of how good or bad an event is. The quantitative analysis problem asks whether there is a run of the ICFG where the ratio of the sum of the numerical weights of good events versus the sum of weights of bad events in the long-run is at least a given threshold (or equivalently, to compute the maximal ratio among all valid paths in the ICFG). The quantitative analysis problem for ICFGs can be solved in polynomial time, and we present an efficient and practical algorithm for the problem. We show that several problems relevant for static program analysis, such as estimating the worst-case execution time of a program or the average energy consumption of a mobile application, can be modeled in our framework. We have implemented our algorithm as a tool in the Java Soot framework. We demonstrate the effectiveness of our approach with two case studies. First, we show that our framework provides a sound approach (no false positives) for the analysis of inefficiently-used containers. Second, we show that our approach can also be used for static profiling of programs which reasons about methods that are frequently invoked. Our experimental results show that our tool scales to relatively large benchmarks, and discovers relevant and useful information that can be used to optimize performance of the programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Bastani:2015:SIU, author = "Osbert Bastani and Saswat Anand and Alex Aiken", title = "Specification Inference Using Context-Free Language Reachability", journal = j-SIGPLAN, volume = "50", number = "1", pages = "553--566", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676977", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a framework for computing context-free language reachability properties when parts of the program are missing. Our framework infers candidate specifications for missing program pieces that are needed for verifying a property of interest, and presents these specifications to a human auditor for validation. We have implemented this framework for a taint analysis of Android apps that relies on specifications for Android library methods. In an extensive experimental study on 179 apps, our tool performs verification with only a small number of queries to a human auditor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Elango:2015:CDA, author = "Venmugil Elango and Fabrice Rastello and Louis-No{\"e}l Pouchet and J. Ramanujam and P. Sadayappan", title = "On Characterizing the Data Access Complexity of Programs", journal = j-SIGPLAN, volume = "50", number = "1", pages = "567--580", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677010", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Technology trends will cause data movement to account for the majority of energy expenditure and execution time on emerging computers. Therefore, computational complexity will no longer be a sufficient metric for comparing algorithms, and a fundamental characterization of data access complexity will be increasingly important. The problem of developing lower bounds for data access complexity has been modeled using the formalism of Hong and Kung's red/blue pebble game for computational directed acyclic graphs (CDAGs). However, previously developed approaches to lower bounds analysis for the red/blue pebble game are very limited in effectiveness when applied to CDAGs of real programs, with computations comprised of multiple sub-computations with differing DAG structure. We address this problem by developing an approach for effectively composing lower bounds based on graph decomposition. We also develop a static analysis algorithm to derive the asymptotic data-access lower bounds of programs, as a function of the problem size and cache size.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Agten:2015:SMV, author = "Pieter Agten and Bart Jacobs and Frank Piessens", title = "Sound Modular Verification of {C} Code Executing in an Unverified Context", journal = j-SIGPLAN, volume = "50", number = "1", pages = "581--594", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676972", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the past decade, great progress has been made in the static modular verification of C code by means of separation logic-based program logics. However, the runtime guarantees offered by such verification are relatively limited when the verified modules are part of a whole program that also contains unverified modules. In particular, a memory safety error in an unverified module can corrupt the runtime state, leading to assertion failures or invalid memory accesses in the verified modules. This paper develops runtime checks to be inserted at the boundary between the verified and the unverified part of a program, to guarantee that no assertion failures or invalid memory accesses can occur at runtime in any verified module. One of the key challenges is enforcing the separation logic frame rule, which we achieve by checking the integrity of the footprint of the verified part of the program on each control flow transition from the unverified to the verified part. This in turn requires the presence of some support for module-private memory at runtime. We formalize our approach and prove soundness. We implement the necessary runtime checks by means of a program transformation that translates C code with separation logic annotations into plain C, and that relies on a protected module architecture for providing module-private memory and restricted module entry points. Benchmarks show the performance impact of this transformation depends on the choice of boundary between the verified and unverified parts of the program, but is below 4\% for real-world applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Gu:2015:DSC, author = "Ronghui Gu and J{\'e}r{\'e}mie Koenig and Tahina Ramananandro and Zhong Shao and Xiongnan (Newman) Wu and Shu-Chun Weng and Haozhong Zhang and Yu Guo", title = "Deep Specifications and Certified Abstraction Layers", journal = j-SIGPLAN, volume = "50", number = "1", pages = "595--608", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676975", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern computer systems consist of a multitude of abstraction layers (e.g., OS kernels, hypervisors, device drivers, network protocols), each of which defines an interface that hides the implementation details of a particular set of functionality. Client programs built on top of each layer can be understood solely based on the interface, independent of the layer implementation. Despite their obvious importance, abstraction layers have mostly been treated as a system concept; they have almost never been formally specified or verified. This makes it difficult to establish strong correctness properties, and to scale program verification across multiple layers. In this paper, we present a novel language-based account of abstraction layers and show that they correspond to a strong form of abstraction over a particularly rich class of specifications which we call deep specifications. Just as data abstraction in typed functional languages leads to the important representation independence property, abstraction over deep specification is characterized by an important implementation independence property: any two implementations of the same deep specification must have contextually equivalent behaviors. We present a new layer calculus showing how to formally specify, program, verify, and compose abstraction layers. We show how to instantiate the layer calculus in realistic programming languages such as C and assembly, and how to adapt the CompCert verified compiler to compile certified C layers such that they can be linked with assembly layers. Using these new languages and tools, we have successfully developed multiple certified OS kernels in the Coq proof assistant, the most realistic of which consists of 37 abstraction layers, took less than one person year to develop, and can boot a version of Linux as a guest.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Chlipala:2015:NIM, author = "Adam Chlipala", title = "From Network Interface to Multithreaded {Web} Applications: a Case Study in Modular Program Verification", journal = j-SIGPLAN, volume = "50", number = "1", pages = "609--622", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677003", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many verifications of realistic software systems are monolithic, in the sense that they define single global invariants over complete system state. More modular proof techniques promise to support reuse of component proofs and even reduce the effort required to verify one concrete system, just as modularity simplifies standard software development. This paper reports on one case study applying modular proof techniques in the Coq proof assistant. To our knowledge, it is the first modular verification certifying a system that combines infrastructure with an application of interest to end users. We assume a nonblocking API for managing TCP networking streams, and on top of that we work our way up to certifying multithreaded, database-backed Web applications. Key verified components include a cooperative threading library and an implementation of a domain-specific language for XML processing. We have deployed our case-study system on mobile robots, where it interfaces with off-the-shelf components for sensing, actuation, and control.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Crary:2015:CRM, author = "Karl Crary and Michael J. Sullivan", title = "A Calculus for Relaxed Memory", journal = j-SIGPLAN, volume = "50", number = "1", pages = "623--636", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676984", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new approach to programming multi-core, relaxed-memory architectures in imperative, portable programming languages. Our memory model is based on explicit, programmer-specified requirements for order of execution and the visibility of writes. The compiler then realizes those requirements in the most efficient manner it can. This is in contrast to existing memory models, which---if they allow programmer control over synchronization at all---are based on inferring the execution and visibility consequences of synchronization operations or annotations in the code. We formalize our memory model in a core calculus called RMC\@. Outside of the programmer's specified requirements, RMC is designed to be strictly more relaxed than existing architectures. It employs an aggressively nondeterministic semantics for expressions, in which actions can be executed in nearly any order, and a store semantics that generalizes Sarkar et al.'s and Alglave et al.'s models of the Power architecture. We establish several results for RMC, including sequential consistency for two programming disciplines, and an appropriate notion of type safety. All our results are formalized in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Jung:2015:IMI, author = "Ralf Jung and David Swasey and Filip Sieczkowski and Kasper Svendsen and Aaron Turon and Lars Birkedal and Derek Dreyer", title = "{Iris}: Monoids and Invariants as an Orthogonal Basis for Concurrent Reasoning", journal = j-SIGPLAN, volume = "50", number = "1", pages = "637--650", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676980", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Iris, a concurrent separation logic with a simple premise: monoids and invariants are all you need. Partial commutative monoids enable us to express---and invariants enable us to enforce---user-defined *protocols* on shared state, which are at the conceptual core of most recent program logics for concurrency. Furthermore, through a novel extension of the concept of a *view shift*, Iris supports the encoding of *logically atomic specifications*, i.e., Hoare-style specs that permit the client of an operation to treat the operation essentially as if it were atomic, even if it is not.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Bouajjani:2015:TRC, author = "Ahmed Bouajjani and Michael Emmi and Constantin Enea and Jad Hamza", title = "Tractable Refinement Checking for Concurrent Objects", journal = j-SIGPLAN, volume = "50", number = "1", pages = "651--662", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677002", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient implementations of concurrent objects such as semaphores, locks, and atomic collections are essential to modern computing. Yet programming such objects is error prone: in minimizing the synchronization overhead between concurrent object invocations, one risks the conformance to reference implementations --- or in formal terms, one risks violating observational refinement. Testing this refinement even within a single execution is intractable, limiting existing approaches to executions with very few object invocations. We develop a polynomial-time (per execution) approximation to refinement checking. The approximation is parameterized by an accuracy k \in N representing the degree to which refinement violations are visible. In principle, more violations are detectable as k increases, and in the limit, all are detectable. Our insight for this approximation arises from foundational properties on the partial orders characterizing the happens-before relations between object invocations: they are interval orders, with a well defined measure of complexity, i.e., their length. Approximating the happens-before relation with a possibly-weaker interval order of bounded length can be efficiently implemented by maintaining a bounded number of integer counters. In practice, we find that refinement violations can be detected with very small values of k, and that our approach scales far beyond existing refinement-checking approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Padon:2015:DSP, author = "Oded Padon and Neil Immerman and Aleksandr Karbyshev and Ori Lahav and Mooly Sagiv and Sharon Shoham", title = "Decentralizing {SDN} Policies", journal = j-SIGPLAN, volume = "50", number = "1", pages = "663--676", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676990", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-defined networking (SDN) is a new paradigm for operating and managing computer networks. SDN enables logically-centralized control over network devices through a ``controller'' --- software that operates independently of the network hardware. Network operators can run both in-house and third-party SDN programs on top of the controller, e.g., to specify routing and access control policies. In practice, having the controller handle events limits the network scalability. Therefore, the feasibility of SDN depends on the ability to efficiently decentralize network event-handling by installing forwarding rules on the switches. However, installing a rule too early or too late may lead to incorrect behavior, e.g., (1) packets may be forwarded to the wrong destination or incorrectly dropped; (2) packets handled by the switch may hide vital information from the controller, leading to incorrect forwarding behavior. The second issue is subtle and sometimes missed even by experienced programmers. The contributions of this paper are two fold. First, we formalize the correctness and optimality requirements for decentralizing network policies. Second, we identify a useful class of network policies which permits automatic synthesis of a controller which performs optimal forwarding rule installation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Cochran:2015:PBP, author = "Robert A. Cochran and Loris D'Antoni and Benjamin Livshits and David Molnar and Margus Veanes", title = "Program Boosting: Program Synthesis via Crowd-Sourcing", journal = j-SIGPLAN, volume = "50", number = "1", pages = "677--688", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2676973", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "In this paper, we investigate an approach to program synthesis that is based on crowd-sourcing. With the help of crowd-sourcing, we aim to capture the ``wisdom of the crowds'' to find good if not perfect solutions to inherently tricky programming tasks, which elude even expert developers and lack an easy-to-formalize specification. We propose an approach we call program boosting, which involves crowd-sourcing imperfect solutions to a difficult programming problem from developers and then blending these programs together in a way that improves their correctness. We implement this approach in a system called CROWDBOOST and show in our experiments that interesting and highly non-trivial tasks such as writing regular expressions for URLs or email addresses can be effectively crowd-sourced. We demonstrate that carefully blending the crowd-sourced results together consistently produces a boost, yielding results that are better than any of the starting programs. Our experiments on 465 program pairs show consistent boosts in accuracy and demonstrate that program boosting can be performed at a relatively modest monetary cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Delaware:2015:FDS, author = "Benjamin Delaware and Cl{\'e}ment Pit-Claudel and Jason Gross and Adam Chlipala", title = "{Fiat}: Deductive Synthesis of Abstract Data Types in a Proof Assistant", journal = j-SIGPLAN, volume = "50", number = "1", pages = "689--700", month = jan, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775051.2677006", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Fiat, a library for the Coq proof assistant supporting refinement of declarative specifications into efficient functional programs with a high degree of automation. Each refinement process leaves a proof trail, checkable by the normal Coq kernel, justifying its soundness. We focus on the synthesis of abstract data types that package methods with private data. We demonstrate the utility of our framework by applying it to the synthesis of query structures --- abstract data types with SQL-like query and insert operations. Fiat includes a library for writing specifications of query structures in SQL-inspired notation, expressing operations over relations (tables) in terms of mathematical sets. This library includes a suite of tactics for automating the refinement of specifications into efficient, correct-by-construction OCaml code. Using these tactics, a programmer can generate such an implementation completely automatically by only specifying the equivalent of SQL indexes, data structures capturing useful views of the abstract data. Throughout we speculate on the new programming modularity possibilities enabled by an automated refinement system with proved-correct rules.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '15 conference proceedings.", } @Article{Hanenberg:2015:WDW, author = "Stefan Hanenberg", title = "Why do we know so little about programming languages, and what would have happened if we had known more?", journal = j-SIGPLAN, volume = "50", number = "2", pages = "1--1", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661102", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming language research in the last decades was mainly driven by mathematical methods (such as formal semantics, correctness proofs, type soundness proofs, etc.) or run-time arguments based on benchmark tests. This happened despite the frequent discussion over programming language usability. We have now been through decade after decade of one language after another dominating the field, forcing companies to switch languages and migrate libraries. Now that Javascript seems to be the next language to dominate, people start to ask old questions anew. The first goal of this talk is to discuss why the application of empirical methods is (still) relatively rare in PL research, and to discuss what could be done in empirical methods to make them a substantial part of PL research. The second goal is to speculate about the possible effects that concrete empirical knowledge could have had on the programming language community. For example, what would have happened to programming languages if current knowledge would have been available 30 years ago? What if knowledge about programming languages from the year 2050 would be available today?", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Neto:2015:SOS, author = "Lourival Vieira Neto and Roberto Ierusalimschy and Ana L{\'u}cia de Moura and Marc Balmer", title = "Scriptable operating systems with {Lua}", journal = j-SIGPLAN, volume = "50", number = "2", pages = "2--10", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661096", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Extensible operating system is a design based on the idea that operating systems can be adapted to meet user requirements by allowing user extensions. In a different scenario, that of application development, there is a paradigm that supports that complex systems should allow users to write scripts to tailor an application to their needs. In this paper we propose the concept of scriptable operating system, which applies scripting development paradigm to the concept of extensible operating systems. Scriptable operating systems support that operating systems can adequately provide extensibility by allowing users to script their kernel. We also present an implementation of a kernel-scripting environment that allows users to dynamically extend Linux and NetBSD operating systems using the scripting language Lua. To evaluate this environment, we extended both OS kernels to allow users to script CPU frequency scaling and network packet filtering using Lua.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Johnson:2015:AAC, author = "James Ian Johnson and David {Van Horn}", title = "Abstracting abstract control", journal = j-SIGPLAN, volume = "50", number = "2", pages = "11--22", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661098", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The strength of a dynamic language is also its weakness: run-time flexibility comes at the cost of compile-time predictability. Many of the hallmarks of dynamic languages such as closures, continuations, various forms of reflection, and a lack of static types make many programmers rejoice, while compiler writers, tool developers, and verification engineers lament. The dynamism of these features simply confounds statically reasoning about programs that use them. Consequently, static analyses for dynamic languages are few, far between, and seldom sound. The ``abstracting abstract machines'' (AAM) approach to constructing static analyses has recently been proposed as a method to ameliorate the difficulty of designing analyses for such language features. The approach, so called because it derives a function for the sound and computable approximation of program behavior starting from the abstract machine semantics of a language, provides a viable approach to dynamic language analysis since all that is required is a machine description of the interpreter. The AAM recipe as originally described produces finite state abstractions: the behavior of a program is approximated as a finite state machine. Such a model is inherently imprecise when it comes to reasoning about the control stack of the interpreter: a finite state machine cannot faithfully represent a stack. Recent advances have shown that higher-order programs can be approximated with pushdown systems. However, such models, founded in automata theory, either breakdown or require significant engineering in the face of dynamic language features that inspect or modify the control stack. In this paper, we tackle the problem of bringing pushdown flow analysis to the domain of dynamic language features. We revise the abstracting abstract machines technique to target the stronger computational model of pushdown systems. In place of automata theory, we use only abstract machines and memoization. As case studies, we show the technique applies to a language with closures, garbage collection, stack-inspection, and first-class composable continuations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Strickland:2015:CDS, author = "T. Stephen Strickland and Brianna M. Ren and Jeffrey S. Foster", title = "Contracts for domain-specific languages in {Ruby}", journal = j-SIGPLAN, volume = "50", number = "2", pages = "23--34", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661092", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper concerns object-oriented embedded DSLs, which are popular in the Ruby community but have received little attention in the research literature. Ruby DSLs implement language keywords as implicit method calls to self; language structure is enforced by adjusting which object is bound to self in different scopes. While Ruby DSLs are powerful and elegant, they suffer from a lack of specification. In this paper, we introduce contracts for Ruby DSLs, which allow us to attribute blame appropriately when there are inconsistencies between an implementation and client. We formalize Ruby DSL contract checking in DSL, a core calculus that uses premethods with instance evaluation to enforce contracts. We then describe RDL, an implementation of Ruby DSL contracts. Finally, we present two tools that automatically infer RDL contracts: TypeInfer infers simple, type-like contracts based on observed method calls, and DSLInfer infers DSL keyword scopes and nesting by generating and testing candidate DSL usages based on initial examples. The type contracts generated by TypeInfer work well enough, though they are limited in precision by the small number of tests, while DSLInfer finds almost all DSL structure. Our goal is to help users understand a DSL from example programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Disney:2015:SYJ, author = "Tim Disney and Nathan Faubion and David Herman and Cormac Flanagan", title = "Sweeten your {JavaScript}: hygienic macros for {ES5}", journal = j-SIGPLAN, volume = "50", number = "2", pages = "35--44", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661097", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lisp and Scheme have demonstrated the power of macros to enable programmers to evolve and craft languages. In languages with more complex syntax, macros have had less success. In part, this has been due to the difficulty in building expressive hygienic macro systems for such languages. JavaScript in particular presents unique challenges for macro systems due to ambiguities in the lexing stage that force the JavaScript lexer and parser to be intertwined. In this paper we present a novel solution to the lexing ambiguity of JavaScript that enables us to cleanly separate the JavaScript lexer and parser by recording enough history during lexing to resolve ambiguities. We give an algorithm for this solution along with a proof that it does in fact correctly resolve ambiguities in the language. Though the algorithm and proof we present is specific to JavaScript, the general technique can be applied to other languages with ambiguous grammars. With lexer and parser separated, we then implement an expressive hygienic macro system for JavaScript called sweet.js.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Vitousek:2015:DEG, author = "Michael M. Vitousek and Andrew M. Kent and Jeremy G. Siek and Jim Baker", title = "Design and evaluation of gradual typing for {Python}", journal = j-SIGPLAN, volume = "50", number = "2", pages = "45--56", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661101", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Combining static and dynamic typing within the same language offers clear benefits to programmers. It provides dynamic typing in situations that require rapid prototyping, heterogeneous data structures, and reflection, while supporting static typing when safety, modularity, and efficiency are primary concerns. Siek and Taha (2006) introduced an approach to combining static and dynamic typing in a fine-grained manner through the notion of type consistency in the static semantics and run-time casts in the dynamic semantics. However, many open questions remain regarding the semantics of gradually typed languages. In this paper we present Reticulated Python, a system for experimenting with gradual-typed dialects of Python. The dialects are syntactically identical to Python 3 but give static and dynamic semantics to the type annotations already present in Python 3. Reticulated Python consists of a typechecker and a source-to-source translator from Reticulated Python to Python 3. Using Reticulated Python, we evaluate a gradual type system and three approaches to the dynamic semantics of mutable objects: the traditional semantics based on Siek and Taha (2007) and Herman et al. (2007) and two new designs. We evaluate these designs in the context of several third-party Python programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Freudenberg:2015:SMP, author = "Bert Freudenberg and Dan H. H. Ingalls and Tim Felgentreff and Tobias Pape and Robert Hirschfeld", title = "{SqueakJS}: a modern and practical smalltalk that runs in any browser", journal = j-SIGPLAN, volume = "50", number = "2", pages = "57--66", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661100", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We report our experience in implementing SqueakJS, a bit-compatible implementation of Squeak/Smalltalk written in pure JavaScript. SqueakJS runs entirely in the Web browser with a virtual filesystem that can be directed to a server or client-side storage. Our implementation is notable for simplicity and performance gained through adaptation to the host object memory and deployment leverage gained through the Lively Web development environment. We present several novel techniques as well as performance measurements for the resulting virtual machine. Much of this experience is potentially relevant to preserving other dynamic language systems and making them available in a browser-based environment.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Aigner:2015:AJE, author = "Martin Aigner and Thomas H{\"u}tter and Christoph M. Kirsch and Alexander Miller and Hannes Payer and Mario Preishuber", title = "{ACDC-JS}: explorative benchmarking of {JavaScript} memory management", journal = j-SIGPLAN, volume = "50", number = "2", pages = "67--78", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661089", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We present ACDC-JS, an open-source JavaScript memory management benchmarking tool. ACDC-JS incorporates a heap model based on real web applications and may be configured to expose virtually any relevant performance characteristics of JavaScript memory management systems. ACDC-JS is based on ACDC, a benchmarking tool for C/C++ that models periodic allocation and deallocation behavior (AC) as well as persistent memory (DC). We identify important characteristics of JavaScript mutator behavior and propose a configurable heap model based on typical distributions of these characteristics as foundation for ACDC-JS. We describe heap analyses of 13 real web applications extending existing work on JavaScript behavior analysis. Our experimental results show that ACDC-JS enables performance benchmarking and debugging of state-of-the-art JavaScript virtual machines such as V8 and SpiderMonkey by exposing key aspects of their memory management performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Kotthaus:2015:DPS, author = "Helena Kotthaus and Ingo Korb and Michael Engel and Peter Marwedel", title = "Dynamic page sharing optimization for the {R} language", journal = j-SIGPLAN, volume = "50", number = "2", pages = "79--90", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661094", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic languages such as R are increasingly used to process large data sets. Here, the R interpreter induces a large memory overhead due to wasteful memory allocation policies. If an application's working set exceeds the available physical memory, the OS starts to swap, resulting in slowdowns of a several orders of magnitude. Thus, memory optimizations for R will be beneficial to many applications. Existing R optimizations are mostly based on dynamic compilation or native libraries. Both methods are futile when the OS starts to page out memory. So far, only a few, data-type or application specific memory optimizations for R exist. To remedy this situation, we present a low-overhead page sharing approach for R that significantly reduces the interpreter's memory overhead. Concentrating on the most rewarding optimizations avoids the high runtime overhead of existing generic approaches for memory deduplication or compression. In addition, by applying knowledge of interpreter data structures and memory allocation patterns, our approach is not constrained to specific R applications and is transparent to the R interpreter. Our page sharing optimization enables us to reduce the memory consumption by up to 53.5\% with an average of 18.0\% for a set of real-world R benchmarks with a runtime overhead of only 5.3\% on average. In cases where page I/O can be avoided, significant speedups are achieved.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Khan:2015:UJW, author = "Faiz Khan and Vincent Foley-Bourgon and Sujay Kathrotia and Erick Lavoie and Laurie Hendren", title = "Using {JavaScript} and {WebCL} for numerical computations: a comparative study of native and web technologies", journal = j-SIGPLAN, volume = "50", number = "2", pages = "91--102", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661090", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "From its modest beginnings as a tool to validate forms, JavaScript is now an industrial-strength language used to power online applications such as spreadsheets, IDEs, image editors and even 3D games. Since all modern web browsers support JavaScript, it provides a medium that is both easy to distribute for developers and easy to access for users. This paper provides empirical data to answer the question: Is JavaScript fast enough for numerical computations? By measuring and comparing the runtime performance of benchmarks representative of a wide variety of scientific applications, we show that sequential JavaScript is within a factor of 2 of native code. Parallel code using WebCL shows speed improvements of up to 2.28 over JavaScript for the majority of the benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Rhodes:2015:DDO, author = "Dustin Rhodes and Tim Disney and Cormac Flanagan", title = "Dynamic detection of object capability violations through model checking", journal = j-SIGPLAN, volume = "50", number = "2", pages = "103--112", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661099", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we present a new tool called DOCaT (Dynamic Object Capability Tracer), a model checker for JavaScript that detects capability leaks in an object capability system. DOCaT includes an editor that highlights the sections of code that can be potentially transferred to untrusted third-party code along with a trace showing how the code could be leaked in an actual execution. This code highlighting provides a simple way of visualizing the references untrusted code potentially has access to and helps programmers to discover if their code is leaking more capabilities then required. DOCaT is implemented using a combination of source code rewriting (using Sweet.js, a JavaScript macro system), dynamic behavioral intercession (Proxies, introduced in ES6, the most recent version of JavaScript), and model checking. Together these methods are able to locate common ways for untrusted code to elevate its authority.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Steinert:2015:OVS, author = "Bastian Steinert and Lauritz Thamsen and Tim Felgentreff and Robert Hirschfeld", title = "Object versioning to support recovery needs: using proxies to preserve previous development states in lively", journal = j-SIGPLAN, volume = "50", number = "2", pages = "113--124", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661093", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We present object versioning as a generic approach to preserve access to previous development and application states. Version-aware references can manage the modifications made to the target object and record versions as desired. Such references can be provided without modifications to the virtual machine. We used proxies to implement the proposed concepts and demonstrate the Lively Kernel running on top of this object versioning layer. This enables Lively users to undo the effects of direct manipulation and other programming actions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Matsakis:2015:TOJ, author = "Nicholas D. Matsakis and David Herman and Dmitry Lomov", title = "Typed objects in {JavaScript}", journal = j-SIGPLAN, volume = "50", number = "2", pages = "125--134", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661095", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript's typed arrays have proven to be a crucial API for many JS applications, particularly those working with large amounts of data or emulating other languages. Unfortunately, the current typed array API offers no means of abstraction. Programmers are supplied with a simple byte buffer that can be viewed as an array of integers or floats, but nothing more. This paper presents a generalization of the typed arrays API entitled typed objects. The typed objects API is slated for inclusion in the upcoming ES7 standard. The API gives users the ability to define named types, making typed arrays much easier to work with. In particular, it is often trivial to replace uses of existing JavaScript objects with typed objects, resulting in better memory consumption and more predictable performance. The advantages of the typed object specification go beyond convenience, however. By supporting opacity---that is, the ability to deny access to the raw bytes of a typed object---the new typed object specification makes it possible to store objects as well as scalar data and also enables more optimization by JIT compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Callau:2015:UTP, author = "Oscar Calla{\'u} and Romain Robbes and {\'E}ric Tanter and David R{\"o}thlisberger and Alexandre Bergel", title = "On the use of type predicates in object-oriented software: the case of smalltalk", journal = j-SIGPLAN, volume = "50", number = "2", pages = "135--146", month = feb, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775052.2661091", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:21 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Object-orientation relies on polymorphism to express behavioral variants. As opposed to traditional procedural design, explicit type-based conditionals should be avoided. This message is conveyed in introductory material on object orientation, as well as in object-oriented reengineering patterns. Is this principle followed in practice? In other words, are type predicates actually used in object-oriented software, and if so, to which extent? Answering these questions will assist practitioners and researchers with providing information about the state of the practice, and informing the active research program of retrofitting type systems, clarifying whether complex flow-sensitive typing approaches are necessary. Other areas, such as refactoring and teaching object orientation, can also benefit from empirical evidence on the matter. We report on a study of the use of type predicates in a large base of over 4 million lines of Smalltalk code. Our study shows that type predicates are in fact widely used to do explicit type dispatch, suggesting that flow-sensitive typing approaches are necessary for a type system retrofitted for a dynamic object-oriented language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '14 conference proceedings.", } @Article{Jarvi:2015:SPH, author = "Jaakko J{\"a}rvi and Gabriel Foust and Magne Haveraaen", title = "Specializing planners for hierarchical multi-way dataflow constraint systems", journal = j-SIGPLAN, volume = "50", number = "3", pages = "1--10", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658762", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A constraint system consists of variables and a set of constraints on those variables. To solve a constraint system is to find a valuation that satisfies all constraints; or the ``best'' subset of constraints if not all can simultaneously be satisfied. In a multi-way dataflow constraint system, solving requires selecting a set of user-defined functions which, when executed, will enforce the constraints. The task of selecting these functions is called planning. The planner has two kinds of input: the specification of the constraints and an order of priority for those constraints. The former typically changes seldom, while the latter frequently, making constraint planning a potential application for program specialization. This paper shows how to generate specialized planners for hierarchical multi-way dataflow constraint systems when the constraints are known in advance. The specialized planners are DFAs; they can be an order of magnitude or more faster than a general purpose planner for the same system. Our applications for constraint systems are in user interface programming, where constraint systems determine how a GUI should react to user interaction---specialized planners can help to ensure that GUIs' responses to user interaction are instantaneous.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Steindorfer:2015:CSM, author = "Michael J. Steindorfer and Jurgen J. Vinju", title = "Code specialization for memory efficient hash tries (short paper)", journal = j-SIGPLAN, volume = "50", number = "3", pages = "11--14", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658763", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The hash trie data structure is a common part in standard collection libraries of JVM programming languages such as Clojure and Scala. It enables fast immutable implementations of maps, sets, and vectors, but it requires considerably more memory than an equivalent array-based data structure. This hinders the scalability of functional programs and the further adoption of this otherwise attractive style of programming. In this paper we present a product family of hash tries. We generate Java source code to specialize them using knowledge of JVM object memory layout. The number of possible specializations is exponential. The optimization challenge is thus to find a minimal set of variants which lead to a maximal loss in memory footprint on any given data. Using a set of experiments we measured the distribution of internal tree node sizes in hash tries. We used the results as a guidance to decide which variants of the family to generate and which variants should be left to the generic implementation. A preliminary validating experiment on the implementation of sets and maps shows that this technique leads to a median decrease of 55\% in memory footprint for maps (and 78\% for sets), while still maintaining comparable performance. Our combination of data analysis and code specialization proved to be effective.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Malakuti:2015:EGM, author = "Somayeh Malakuti and Mehmet Aksit", title = "Emergent gummy modules: modular representation of emergent behavior", journal = j-SIGPLAN, volume = "50", number = "3", pages = "15--24", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658764", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emergent behavior is generally defined as the appearance of complex behavior out of multiplicity of relatively simple interactions. Nowadays, there are various kinds of software systems that deal with detecting the emergence of certain behavior in environment, representing it in the software and providing means to manipulate the behavior. Where significant amount of research has been dedicated to develop algorithms for detecting emergent behavior, there is no dedicated attempt to provide suitable linguistic abstractions to modularize emergent behavior and its related concerns. This results in implementations that are complex and hard to maintain. In this paper, we identify three characteristic features of emergent behavior, and outline the shortcomings of current languages to properly program and modularize emergent behavior. We introduce emergent gummy modules as dedicated linguistic abstractions, which facilitate defining the appearance and disappearance conditions of emergent behavior as well as its utilization operations as one holistic module. We explain the implementation of emergent gummy modules in the GummyJ language, and illustrate that they improve the modularity of implementations. We represent the event processing semantics of GummyJ programs in UPPAAL model checker and verify their correctness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Gouseti:2015:ELI, author = "Maria Gouseti and Chiel Peters and Tijs van der Storm", title = "Extensible language implementation with object algebras (short paper)", journal = j-SIGPLAN, volume = "50", number = "3", pages = "25--28", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658765", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Object Algebras are a recently introduced design pattern to make the implementation of recursive data types more extensible. In this short paper we report our experience in using Object Algebras in building a realistic domain specific language (DSL) for questionnaires, called QL. This experience has led to a simple, yet powerful set of tools for the practical and flexible implementation of highly extensible languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Walkingshaw:2015:PEV, author = "Eric Walkingshaw and Klaus Ostermann", title = "Projectional editing of variational software", journal = j-SIGPLAN, volume = "50", number = "3", pages = "29--38", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658766", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Editing the source code of variational software is complicated by the presence of variation annotations, such as \#ifdef statements, and by code that is only included in some configurations. When editing some configurations and not others, it would be easier to edit a simplified version of the source code that includes only the configurations we currently care about. In this paper, we present a projectional editing model for variational software. Using our approach, a programmer can partially configure a variational program, edit this simplified view of the code, and then automatically update the original, fully variational source code. The model is based on an isolation principle where edits affect only the variants that are visible in the view. We show that this principle has several nice properties that are suggested by related work on bidirectional transformations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Ruprecht:2015:AFS, author = "Andreas Ruprecht and Bernhard Heinloth and Daniel Lohmann", title = "Automatic feature selection in large-scale system-software product lines", journal = j-SIGPLAN, volume = "50", number = "3", pages = "39--48", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658767", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "System software can typically be configured at compile time via a comfortable feature-based interface to tailor its functionality towards a specific use case. However, with the growing number of features, this tailoring process becomes increasingly difficult: As a prominent example, the Linux kernel in v3.14 provides nearly 14 000 configuration options to choose from. Even developers of embedded systems refrain from trying to build a minimized distinctive kernel configuration for their device --- and thereby waste memory and money for unneeded functionality. In this paper, we present an approach for the automatic use-case specific tailoring of system software for special-purpose embedded systems. We evaluate the effectiveness of our approach on the example of Linux by generating tailored kernels for well-known applications of the Rasperry Pi and a Google Nexus 4 smartphone. Compared to the original configurations, our approach leads to memory savings of 15-70 percent and requires only very little manual intervention.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Ma:2015:ETS, author = "Lei Ma and Cyrille Artho and Cheng Zhang and Hiroyuki Sato", title = "Efficient testing of software product lines via centralization (short paper)", journal = j-SIGPLAN, volume = "50", number = "3", pages = "49--52", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658768", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software product line~(SPL) engineering manages families of software products that share common features. However, cost-effective test case generation for an SPL is challenging. Applying existing test case generation techniques to each product variant separately may test common code in a redundant way. Moreover, it is difficult to share the test results among multiple product variants. In this paper, we propose the use of centralization, which combines multiple product variants from the same SPL and generates test cases for the entire system. By taking into account all variants, our technique generally avoids generating redundant test cases for common software components. Our case study on three SPLs shows that compared with testing each variant independently, our technique is more efficient and achieves higher test coverage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Smeltzer:2015:TAD, author = "Karl Smeltzer and Martin Erwig and Ronald Metoyer", title = "A transformational approach to data visualization", journal = j-SIGPLAN, volume = "50", number = "3", pages = "53--62", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658769", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Information visualization construction tools generally tend to fall in one of two disparate categories. Either they offer simple but inflexible visualization templates, or else they offer low-level graphical primitives which need to be assembled manually. Those that do offer flexible, domain-specific abstractions rarely focus on incrementally building and transforming visualizations, which could reduce limitations on the style of workflows supported. We present a Haskell-embedded DSL for data visualization that is designed to provide such abstractions and transformations. This DSL achieves additional expressiveness and flexibility through common functional programming idioms and the Haskell type class hierarchy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Shioda:2015:LLD, author = "Masato Shioda and Hideya Iwasaki and Shigeyuki Sato", title = "{LibDSL}: a library for developing embedded domain specific languages in {D} via template metaprogramming", journal = j-SIGPLAN, volume = "50", number = "3", pages = "63--72", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658770", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a library called LibDSL that helps the implementer of an embedded domain specific language (EDSL) effectively develop it in D language. The LibDSL library accepts as input some kinds of ``specifications'' of the EDSL that the implementer is going to develop and a D program within which an EDSL source program written by the user is embedded. It produces the front-end code of an LALR parser for the EDSL program and back-end code of the execution engine. LibDSL is able to produce two kinds of execution engines, namely compiler-based and interpreter-based engines, either of which the user can properly choose depending on whether an EDSL program is known at compile time or not. We have implemented the LibDSL system by using template metaprogramming and other advanced facilities such as compile-time function execution of D language. EDSL programs developed by means of LibDSL have a nice integrativeness with the host language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Jovanovic:2015:YYC, author = "Vojin Jovanovic and Amir Shaikhha and Sandro Stucki and Vladimir Nikolaev and Christoph Koch and Martin Odersky", title = "{Yin-Yang}: concealing the deep embedding of {DSLs}", journal = j-SIGPLAN, volume = "50", number = "3", pages = "73--82", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658771", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deeply embedded domain-specific languages (EDSLs) intrinsically compromise programmer experience for improved program performance. Shallow EDSLs complement them by trading program performance for good programmer experience. We present Yin-Yang, a framework for DSL embedding that uses Scala macros to reliably translate shallow EDSL programs to the corresponding deep EDSL programs. The translation allows program prototyping and development in the user friendly shallow embedding, while the corresponding deep embedding is used where performance is important. The reliability of the translation completely conceals the deep em- bedding from the user. For the DSL author, Yin-Yang automatically generates the deep DSL embeddings from their shallow counterparts by reusing the core translation. This obviates the need for code duplication and leads to reliability by construction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Hess:2015:ALF, author = "Benjamin Hess and Thomas R. Gross and Markus P{\"u}schel", title = "Automatic locality-friendly interface extension of numerical functions", journal = j-SIGPLAN, volume = "50", number = "3", pages = "83--92", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658772", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Raising the level of abstraction is a key concern of software engineering, and libraries (either used directly or as a target of a program generation system) are a successful technique to raise programmer productivity and to improve software quality. Unfortunately successful libraries may contain functions that may not be general enough. For example, many numeric performance libraries contain functions that work on one- or higher-dimensional arrays. A problem arises if a program wants to invoke such a function on a non-contiguous subarray (e.g., in C the column of a matrix or a subarray of an image). If the library developer did not foresee this scenario, the client program must include explicit copy steps before and after the library function call, incurring a possibly high performance penalty. A better solution would be an enhanced library function that allows for the desired access pattern. Exposing the access pattern allows the compiler to optimize for the intended usage scenario(s). As we do not want the library developer to generate all interesting versions manually, we present a tool that takes a library function written in C and generates such a customized function for typical accesses. We describe the approach, discuss limitations, and report on the performance. As example access patterns we consider those most common in numerical applications: striding and block striding, general permutations, as well as scaling. We evaluate the tool on various library functions including filters, scans, reductions, sorting, FFTs, and linear algebra operations. The automatically generated custom version is in most cases significantly faster than using individual steps, offering speed-ups that are typically in the range of 1.2--1.8x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Kamin:2015:ORS, author = "Sam Kamin and Mar{\'\i}a Jes{\'u}s Garzar{\'a}n and Baris Aktemur and Danqing Xu and Buse Yilmaz and Zhongbo Chen", title = "Optimization by runtime specialization for sparse matrix--vector multiplication", journal = j-SIGPLAN, volume = "50", number = "3", pages = "93--102", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658773", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Runtime specialization optimizes programs based on partial information available only at run time. It is applicable when some input data is used repeatedly while other input data varies. This technique has the potential of generating highly efficient codes. In this paper, we explore the potential for obtaining speedups for sparse matrix-dense vector multiplication using runtime specialization, in the case where a single matrix is to be multiplied by many vectors. We experiment with five methods involving runtime specialization, comparing them to methods that do not (including Intel's MKL library). For this work, our focus is the evaluation of the speedups that can be obtained with runtime specialization without considering the overheads of the code generation. Our experiments use 23 matrices from the Matrix Market and Florida collections, and run on five different machines. In 94 of those 115 cases, the specialized code runs faster than any version without specialization. If we only use specialization, the average speedup with respect to Intel's MKL library ranges from 1.44x to 1.77x, depending on the machine. We have also found that the best method depends on the matrix and machine; no method is best for all matrices and machines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Danilewski:2015:STD, author = "Piotr Danilewski and Marcel K{\"o}ster and Roland Lei{\ss}a and Richard Membarth and Philipp Slusallek", title = "Specialization through dynamic staging", journal = j-SIGPLAN, volume = "50", number = "3", pages = "103--112", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658774", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Partial evaluation allows for specialization of program fragments. This can be realized by staging, where one fragment is executed earlier than its surrounding code. However, taking advantage of these capabilities is often a cumbersome endeavor. In this paper, we present a new metaprogramming concept using staging parameters that are first-class citizen entities and define the order of execution of the program. Staging parameters can be used to define MetaML-like quotations, but can also allow stages to be created and resolved dynamically. The programmer can write generic, polyvariant code which can be reused in the context of different stages. We demonstrate how our approach can be used to define and apply domain-specific optimizations. Our implementation of the proposed metaprogramming concept generates code which is on a par with templated C++ code in terms of execution time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Asai:2015:CRL, author = "Kenichi Asai", title = "Compiling a reflective language using {MetaOCaml}", journal = j-SIGPLAN, volume = "50", number = "3", pages = "113--122", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658775", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A reflective language makes the language semantics open to user programs and allows them to access, extend, and modify it from within the same language framework. Because of its high flexibility and expressiveness, it can be an ideal platform for programming language research as well as practical applications in dynamic environments. However, efficient implementation of a reflective language is extremely difficult. Under the circumstance where the language semantics can change, a partial evaluator is required for compilation. This paper reports on the experience of using MetaOCaml as a compiler for a reflective language. With staging annotations, MetaOCaml achieves the same effect as using a partial evaluator. Unlike the standard partial evaluator, the run mechanism of MetaOCaml enables us to use the specialized (compiled) code in the current runtime environment. On the other hand, the lack of a binding-time analysis in MetaOCaml prohibits us from compiling a user program under modified compiled semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Humer:2015:DSL, author = "Christian Humer and Christian Wimmer and Christian Wirth and Andreas W{\"o}{\ss} and Thomas W{\"u}rthinger", title = "A domain-specific language for building self-optimizing {AST} interpreters", journal = j-SIGPLAN, volume = "50", number = "3", pages = "123--132", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658776", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Self-optimizing AST interpreters dynamically adapt to the provided input for faster execution. This adaptation includes initial tests of the input, changes to AST nodes, and insertion of guards that ensure assumptions still hold. Such specialization and speculation is essential for the performance of dynamic programming languages such as JavaScript. In traditional procedural and objectoriented programming languages it can be tedious to write selfoptimizing AST interpreters, as those languages fail to provide constructs that would specifically support that. This paper introduces a declarative domain-specific language (DSL) that greatly simplifies writing self-optimizing AST interpreters. The DSL supports specialization of operations based on types of the input and other properties. It can then use these specializations directly or chain them to represent the operation with the minimum amount of code possible. The DSL significantly reduces the complexity of expressing specializations for those interpreters. We use it in our high-performance implementation of JavaScript, where 274 language operations have an average of about 4 and a maximum of 190 specializations. In addition, the DSL is used in implementations of Ruby, Python, R, and Smalltalk.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Hill:2015:POO, author = "James H. Hill and Dennis C. Feiock", title = "{Pin++}: an object-oriented framework for writing {Pintools}", journal = j-SIGPLAN, volume = "50", number = "3", pages = "133--141", month = mar, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775053.2658777", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a framework named Pin++. Pin++ is an object-oriented framework that uses template metaprogramming to implement Pintools, which are analysis tools for the dynamic binary instrumentation tool named Pin. The goal of Pin++ is to simplify programming a Pintool and promote reuse of its components across different Pintools. Our results show that Pintools implemented using Pin++ can have a 54\% reduction in complexity, increase its modularity, and up to 60\% reduction in instrumentation overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '14 conference proceedings.", } @Article{Ozturk:2015:ASC, author = "Ozcan Ozturk", title = "Architectural Support for Cyber-Physical Systems", journal = j-SIGPLAN, volume = "50", number = "4", pages = "1--1", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694375", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cyber-physical systems are integrations of computation, communication networks, and physical dynamics. Although time plays a central role in the physical world, all widely used software abstractions lack temporal semantics. The notion of correct execution of a program written in every widely-used programming language today does not depend on the temporal behavior of the program. But temporal behavior matters in almost all systems, and most particularly in cyber-physical systems. In this talk, I will argue that time can and must become part of the semantics of programs for a large class of applications. To illustrate that this is both practical and useful, we will describe a recent effort at Berkeley in the design and implementation of timing-centric software systems. Specifically, I will describe PRET machines, which redefine the instruction-set architecture (ISA) of a microprocessor to embrace temporal semantics. Such machines can be used in high-confidence and safety-critical systems, in energy-constrained systems, in mixed-criticality systems, and as a Real-Time Unit (RTU) that cooperates with a general-purpose processor to provide real-time services, in a manner similar to how a GPU provides graphics services.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Zhang:2015:MRH, author = "Yiying Zhang and Jian Yang and Amirsaman Memaripour and Steven Swanson", title = "{Mojim}: a Reliable and Highly-Available Non-Volatile Memory System", journal = j-SIGPLAN, volume = "50", number = "4", pages = "3--18", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Next-generation non-volatile memories (NVMs) promise DRAM-like performance, persistence, and high density. They can attach directly to processors to form non-volatile main memory (NVMM) and offer the opportunity to build very low-latency storage systems. These high-performance storage systems would be especially useful in large-scale data center environments where reliability and availability are critical. However, providing reliability and availability to NVMM is challenging, since the latency of data replication can overwhelm the low latency that NVMM should provide. We propose Mojim, a system that provides the reliability and availability that large-scale storage systems require, while preserving the performance of NVMM. Mojim achieves these goals by using a two-tier architecture in which the primary tier contains a mirrored pair of nodes and the secondary tier contains one or more secondary backup nodes with weakly consistent copies of data. Mojim uses highly-optimized replication protocols, software, and networking stacks to minimize replication costs and expose as much of NVMM?s performance as possible. We evaluate Mojim using raw DRAM as a proxy for NVMM and using an industrial NVMM emulation system. We find that Mojim provides replicated NVMM with similar or even better performance than un-replicated NVMM (reducing latency by 27\% to 63\% and delivering between 0.4 to 2.7X the throughput). We demonstrate that replacing MongoDB's built-in replication system with Mojim improves MongoDB's performance by 3.4 to 4X.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Wang:2015:SPC, author = "Rujia Wang and Lei Jiang and Youtao Zhang and Jun Yang", title = "{SD-PCM}: Constructing Reliable Super Dense Phase Change Memory under Write Disturbance", journal = j-SIGPLAN, volume = "50", number = "4", pages = "19--31", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694352", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Phase Change Memory (PCM) has better scalability and smaller cell size comparing to DRAM. However, further scaling PCM cell in deep sub-micron regime results in significant thermal based write disturbance (WD). Naively allocating large inter-cell space increases cell size from 4F$^2$ ideal to 12F$^2$. While a recent work mitigates WD along word-lines through disturbance resilient data encoding, it is ineffective for WD along bit-lines, which is more severe due to widely adopted $ \mu $Trench structure in constructing PCM cell arrays. Without mitigating WD along bit-lines, a PCM cell still has 8F2, which is 100\% larger than the ideal. In this paper, we propose SD-PCM for achieving reliable write operations in super dense PCM. In particular, we focus on mitigating WD along bit-lines such that we can construct super dense PCM chips with 4F$^2$ cell size, i.e., the minimal for diode-switch based PCM. Based on simple verification-n-correction (VnC), we propose LazyCorrection and PreRead to effectively reduce VnC overhead and minimize cascading verification during write. We further propose (n:m)-Alloc for achieving good tradeoff between VnC overhead minimization and memory capacity loss. Our experimental results show that, comparing to a WD-free low density PCM, SD-PCM achieves 80\% capacity improvement in cell arrays while incurring around 0-10\% performance degradation when using different (n:m) allocators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Young:2015:DWE, author = "Vinson Young and Prashant J. Nair and Moinuddin K. Qureshi", title = "{DEUCE}: Write-Efficient Encryption for Non-Volatile Memories", journal = j-SIGPLAN, volume = "50", number = "4", pages = "33--44", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Phase Change Memory (PCM) is an emerging Non Volatile Memory (NVM) technology that has the potential to provide scalable high-density memory systems. While the non-volatility of PCM is a desirable property in order to save leakage power, it also has the undesirable effect of making PCM main memories susceptible to newer modes of security vulnerabilities, for example, accessibility to sensitive data if a PCM DIMM gets stolen. PCM memories can be made secure by encrypting the data. Unfortunately, such encryption comes with a significant overhead in terms of bits written to PCM memory, causing half of the bits in the line to change on every write, even if the actual number of bits being written to memory is small. Our studies show that a typical writeback modifies, on average, only 12\% of the bits in the cacheline. Thus, encryption causes almost a 4x increase in the number of bits written to PCM memories. Such extraneous bit writes cause significant increase in write power, reduction in write endurance, and reduction in write bandwidth. To provide the benefit of secure memory in a write efficient manner this paper proposes Dual Counter Encryption (DEUCE). DEUCE is based on the observation that a typical writeback only changes a few words, so DEUCE reencrypts only the words that have changed. We show that DEUCE reduces the number of modified bits per writeback for a secure memory from 50\% to 24\%, which improves performance by 27\% and increases lifetime by 2x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Morrison:2015:TBT, author = "Adam Morrison and Yehuda Afek", title = "Temporally Bounding {TSO} for Fence-Free Asymmetric Synchronization", journal = j-SIGPLAN, volume = "50", number = "4", pages = "45--58", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694374", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces a temporally bounded total store ordering (TBTSO) memory model, and shows that it enables nonblocking fence-free solutions to asymmetric synchronization problems, such as those arising in memory reclamation and biased locking. TBTSO strengthens the TSO memory model by bounding the time it takes a store to drain from the store buffer into memory. This bound enables devising fence-free algorithms for asymmetric problems, which require a performance-critical fast path to synchronize with an infrequently executed slow path. We demonstrate this by constructing (1) a fence-free version of the hazard pointers memory reclamation scheme, and (2) a fence-free biased lock algorithm which is compatible with unmanaged environments as it does not rely on safe points or similar mechanisms. We further argue that TBTSO can be implemented in hardware with modest modifications to existing TSO architectures. However, our design makes assumptions about proprietary implementation details of commercial hardware; it thus best serves as a starting point for a discussion on the feasibility of hardware TBTSO implementation. We also show how minimal OS support enables the adaptation of TBTSO algorithms to x86 systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Matveev:2015:RHN, author = "Alexander Matveev and Nir Shavit", title = "Reduced Hardware {NOrec}: a Safe and Scalable Hybrid Transactional Memory", journal = j-SIGPLAN, volume = "50", number = "4", pages = "59--71", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694393", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Because of hardware TM limitations, software fallbacks are the only way to make TM algorithms guarantee progress. Nevertheless, all known software fallbacks to date, from simple locks to sophisticated versions of the NOrec Hybrid TM algorithm, have either limited scalability or weakened semantics. We propose a novel reduced-hardware (RH) version of the NOrec HyTM algorithm. Instead of an all-software slow path, in our RH NOrec the slow-path is a ``mix'' of hardware and software: one short hardware transaction executes a maximal amount of initial reads in the hardware, and the second executes all of the writes. This novel combination of the RH approach and the NOrec algorithm delivers the first Hybrid TM that scales while fully preserving the hardware's original semantics of opacity and privatization. Our GCC implementation of RH NOrec is promising in that it shows improved performance relative to all prior methods, at the concurrency levels we could test today.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Orr:2015:SUR, author = "Marc S. Orr and Shuai Che and Ayse Yilmazer and Bradford M. Beckmann and Mark D. Hill and David A. Wood", title = "Synchronization Using Remote-Scope Promotion", journal = j-SIGPLAN, volume = "50", number = "4", pages = "73--86", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694350", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous system architecture (HSA) and OpenCL define scoped synchronization to facilitate low overhead communication across a subset of threads. Scoped synchronization works well for static sharing patterns, where consumer threads are known a priori. It works poorly for dynamic sharing patterns (e.g., work stealing) where programmers cannot use a faster small scope due to the rare possibility that the work is stolen by a thread in a distant slower scope. This puts programmers in a conundrum: optimize the common case by synchronizing at a faster small scope or use work stealing at a slower large scope. In this paper, we propose to extend scoped synchronization with remote-scope promotion. This allows the most frequent sharers to synchronize through a small scope. Infrequent sharers synchronize by promoting that remote small scope to a larger shared scope. Synchronization using remote-scope promotion provides performance robustness for dynamic workloads, where the benefits provided by scoped synchronization and work stealing are hard to anticipate. Compared to a na{\"\i}ve baseline, static scoped synchronization alone achieves a 1.07x speedup on average and dynamic work stealing alone achieves a 1.18x speedup on average. In contrast, synchronization using remote-scope promotion achieves a robust 1.25x speedup on average, across a diverse set of graph benchmarks and inputs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Liu:2015:GHS, author = "Chang Liu and Austin Harris and Martin Maas and Michael Hicks and Mohit Tiwari and Elaine Shi", title = "{GhostRider}: a Hardware-Software System for Memory Trace Oblivious Computation", journal = j-SIGPLAN, volume = "50", number = "4", pages = "87--101", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694385", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a new, co-designed compiler and architecture called GhostRider for supporting privacy preserving computation in the cloud. GhostRider ensures all programs satisfy a property called memory-trace obliviousness (MTO): Even an adversary that observes memory, bus traffic, and access times while the program executes can learn nothing about the program's sensitive inputs and outputs. One way to achieve MTO is to employ Oblivious RAM (ORAM), allocating all code and data in a single ORAM bank, and to also disable caches or fix the rate of memory traffic. This baseline approach can be inefficient, and so GhostRider's compiler uses a program analysis to do better, allocating data to non-oblivious, encrypted RAM (ERAM) and employing a scratchpad when doing so will not compromise MTO. The compiler can also allocate to multiple ORAM banks, which sometimes significantly reduces access times.We have formalized our approach and proved it enjoys MTO. Our FPGA-based hardware prototype and simulation results show that GhostRider significantly outperforms the baseline strategy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Fletcher:2015:FON, author = "Christopher W. Fletcher and Ling Ren and Albert Kwon and Marten van Dijk and Srinivas Devadas", title = "Freecursive {ORAM}: [Nearly] Free Recursion and Integrity Verification for Position-based Oblivious {RAM}", journal = j-SIGPLAN, volume = "50", number = "4", pages = "103--116", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694353", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Oblivious RAM (ORAM) is a cryptographic primitive that hides memory access patterns as seen by untrusted storage. Recently, ORAM has been architected into secure processors. A big challenge for hardware ORAM schemes is how to efficiently manage the Position Map (PosMap), a central component in modern ORAM algorithms. Implemented naively, the PosMap causes ORAM to be fundamentally unscalable in terms of on-chip area. On the other hand, a technique called Recursive ORAM fixes the area problem yet significantly increases ORAM's performance overhead. To address this challenge, we propose three new mechanisms. We propose a new ORAM structure called the PosMap Lookaside Buffer (PLB) and PosMap compression techniques to reduce the performance overhead from Recursive ORAM empirically (the latter also improves the construction asymptotically). Through simulation, we show that these techniques reduce the memory bandwidth overhead needed to support recursion by 95\%, reduce overall ORAM bandwidth by 37\% and improve overall SPEC benchmark performance by 1.27x. We then show how our PosMap compression techniques further facilitate an extremely efficient integrity verification scheme for ORAM which we call PosMap MAC (PMMAC). For a practical parameterization, PMMAC reduces the amount of hashing needed for integrity checking by $ \geq 68 \times $ relative to prior schemes and introduces only 7\% performance overhead. We prototype our mechanisms in hardware and report area and clock frequency for a complete ORAM design post-synthesis and post-layout using an ASIC flow in a 32~nm commercial process. With 2 DRAM channels, the design post-layout runs at 1~GHz and has a total area of .47~mm2. Depending on PLB-specific parameters, the PLB accounts for 10\% to 26\% area. PMMAC costs 12\% of total design area. Our work is the first to prototype Recursive ORAM or ORAM with any integrity scheme in hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Chisnall:2015:BPA, author = "David Chisnall and Colin Rothwell and Robert N. M. Watson and Jonathan Woodruff and Munraj Vadera and Simon W. Moore and Michael Roe and Brooks Davis and Peter G. Neumann", title = "Beyond the {PDP-11}: Architectural Support for a Memory-Safe {C} Abstract Machine", journal = j-SIGPLAN, volume = "50", number = "4", pages = "117--130", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new memory-safe interpretation of the C abstract machine that provides stronger protection to benefit security and debugging. Despite ambiguities in the specification intended to provide implementation flexibility, contemporary implementations of C have converged on a memory model similar to the PDP-11, the original target for C. This model lacks support for memory safety despite well-documented impacts on security and reliability. Attempts to change this model are often hampered by assumptions embedded in a large body of existing C code, dating back to the memory model exposed by the original C compiler for the PDP-11. Our experience with attempting to implement a memory-safe variant of C on the CHERI experimental microprocessor led us to identify a number of problematic idioms. We describe these as well as their interaction with existing memory safety schemes and the assumptions that they make beyond the requirements of the C specification. Finally, we refine the CHERI ISA and abstract model for C, by combining elements of the CHERI capability model and fat pointers, and present a softcore CPU that implements a C abstract machine that can run legacy C code with strong memory protection guarantees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Ma:2015:SDS, author = "Jiuyue Ma and Xiufeng Sui and Ninghui Sun and Yupeng Li and Zihao Yu and Bowen Huang and Tianni Xu and Zhicheng Yao and Yun Chen and Haibin Wang and Lixin Zhang and Yungang Bao", title = "Supporting Differentiated Services in Computers via Programmable Architecture for Resourcing-on-Demand {(PARD)}", journal = j-SIGPLAN, volume = "50", number = "4", pages = "131--143", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694382", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "This paper presents PARD, a programmable architecture for resourcing-on-demand that provides a new programming interface to convey an application's high-level information like quality-of-service requirements to the hardware. PARD enables new functionalities like fully hardware-supported virtualization and differentiated services in computers. PARD is inspired by the observation that a computer is inherently a network in which hardware components communicate via packets (e.g., over the NoC or PCIe). We apply principles of software-defined networking to this intra-computer network and address three major challenges. First, to deal with the semantic gap between high-level applications and underlying hardware packets, PARD attaches a high-level semantic tag (e.g., a virtual machine or thread ID) to each memory-access, I/O, or interrupt packet. Second, to make hardware components more manageable, PARD implements programmable control planes that can be integrated into various shared resources (e.g., cache, DRAM, and I/O devices) and can differentially process packets according to tag-based rules. Third, to facilitate programming, PARD abstracts all control planes as a device file tree to provide a uniform programming interface via which users create and apply tag-based rules. Full-system simulation results show that by co-locating latency-critical memcached applications with other workloads PARD can improve a four-core computer's CPU utilization by up to a factor of four without significantly increasing tail latency. FPGA emulation based on a preliminary RTL implementation demonstrates that the cache control plane introduces no extra latency and that the memory control plane can reduce queueing delay for high-priority memory-access requests by up to a factor of 5.6.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Omote:2015:IAE, author = "Yushi Omote and Takahiro Shinagawa and Kazuhiko Kato", title = "Improving Agility and Elasticity in Bare-metal Clouds", journal = j-SIGPLAN, volume = "50", number = "4", pages = "145--159", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694349", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Bare-metal clouds are an emerging infrastructure-as-a-service (IaaS) that leases physical machines (bare-metal instances) rather than virtual machines, allowing resource-intensive applications to have exclusive access to physical hardware. Unfortunately, bare-metal instances require time-consuming or OS-specific tasks for deployment due to the lack of virtualization layers, thereby sacrificing several beneficial features of traditional IaaS clouds such as agility, elasticity, and OS transparency. We present BMcast, an OS deployment system with a special-purpose de-virtualizable virtual machine monitor (VMM) that supports quick and OS-transparent startup of bare-metal instances. BMcast performs streaming OS deployment while allowing direct access to physical hardware from the guest OS, and then disappears after completing the deployment. Quick startup of instances improves agility and elasticity significantly, and OS transparency greatly simplifies management tasks for cloud customers. Experimental results have confirmed that BMcast initiated a bare-metal instance 8.6 times faster than image copying, and database performance on BMcast during streaming OS deployment was comparable to that on a state-of-the-art VMM without performing deployment. BMcast incurred zero overhead after de-virtualization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Haque:2015:FMI, author = "Md E. Haque and Yong hun Eom and Yuxiong He and Sameh Elnikety and Ricardo Bianchini and Kathryn S. McKinley", title = "Few-to-Many: Incremental Parallelism for Reducing Tail Latency in Interactive Services", journal = j-SIGPLAN, volume = "50", number = "4", pages = "161--175", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694384", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interactive services, such as Web search, recommendations, games, and finance, must respond quickly to satisfy customers. Achieving this goal requires optimizing tail (e.g., 99th+ percentile) latency. Although every server is multicore, parallelizing individual requests to reduce tail latency is challenging because (1) service demand is unknown when requests arrive; (2) blindly parallelizing all requests quickly oversubscribes hardware resources; and (3) parallelizing the numerous short requests will not improve tail latency. This paper introduces Few-to-Many (FM) incremental parallelization, which dynamically increases parallelism to reduce tail latency. FM uses request service demand profiles and hardware parallelism in an offline phase to compute a policy, represented as an interval table, which specifies when and how much software parallelism to add. At runtime, FM adds parallelism as specified by the interval table indexed by dynamic system load and request execution time progress. The longer a request executes, the more parallelism FM adds. We evaluate FM in Lucene, an open-source enterprise search engine, and in Bing, a commercial Web search engine. FM improves the 99th percentile response time up to 32\% in Lucene and up to 26\% in Bing, compared to prior state-of-the-art parallelization. Compared to running requests sequentially in Bing, FM improves tail latency by a factor of two. These results illustrate that incremental parallelism is a powerful tool for reducing tail latency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Colp:2015:PDS, author = "Patrick Colp and Jiawen Zhang and James Gleeson and Sahil Suneja and Eyal de Lara and Himanshu Raj and Stefan Saroiu and Alec Wolman", title = "Protecting Data on {Smartphones} and Tablets from Memory Attacks", journal = j-SIGPLAN, volume = "50", number = "4", pages = "177--189", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694380", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Smartphones and tablets are easily lost or stolen. This makes them susceptible to an inexpensive class of memory attacks, such as cold-boot attacks, using a bus monitor to observe the memory bus, and DMA attacks. This paper describes Sentry, a system that allows applications and OS components to store their code and data on the System-on-Chip (SoC) rather than in DRAM. We use ARM-specific mechanisms originally designed for embedded systems, but still present in today's mobile devices, to protect applications and OS subsystems from memory attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Dautenhahn:2015:NKO, author = "Nathan Dautenhahn and Theodoros Kasampalis and Will Dietz and John Criswell and Vikram Adve", title = "Nested Kernel: an Operating System Architecture for Intra-Kernel Privilege Separation", journal = j-SIGPLAN, volume = "50", number = "4", pages = "191--206", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694386", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Monolithic operating system designs undermine the security of computing systems by allowing single exploits anywhere in the kernel to enjoy full supervisor privilege. The nested kernel operating system architecture addresses this problem by ``nesting'' a small isolated kernel within a traditional monolithic kernel. The ``nested kernel'' interposes on all updates to virtual memory translations to assert protections on physical memory, thus significantly reducing the trusted computing base for memory access control enforcement. We incorporated the nested kernel architecture into FreeBSD on x86-64 hardware while allowing the entire operating system, including untrusted components, to operate at the highest hardware privilege level by write-protecting MMU translations and de-privileging the untrusted part of the kernel. Our implementation inherently enforces kernel code integrity while still allowing dynamically loaded kernel modules, thus defending against code injection attacks. We also demonstrate that the nested kernel architecture allows kernel developers to isolate memory in ways not possible in monolithic kernels by introducing write-mediation and write-logging services to protect critical system data structures. Performance of the nested kernel prototype shows modest overheads: $ < 1 \% $ average for Apache and 2.7\% for kernel compile. Overall, our results and experience show that the nested kernel design can be retrofitted to existing monolithic kernels, providing important security benefits.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Tan:2015:DWS, author = "Zhangxi Tan and Zhenghao Qian and Xi Chen and Krste Asanovic and David Patterson", title = "{DIABLO}: a Warehouse-Scale Computer Network Simulator using {FPGAs}", journal = j-SIGPLAN, volume = "50", number = "4", pages = "207--221", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694362", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Motivated by rapid software and hardware innovation in warehouse-scale computing (WSC), we visit the problem of warehouse-scale network design evaluation. A WSC is composed of about 30 arrays or clusters, each of which contains about 3000 servers, leading to a total of about 100,000 servers per WSC. We found many prior experiments have been conducted on relatively small physical testbeds, and they often assume the workload is static and that computations are only loosely coupled with the adaptive networking stack. We present a novel and cost-efficient FPGA-based evaluation methodology, called Datacenter-In-A-Box at LOw cost (DIABLO), which treats arrays as whole computers with tightly integrated hardware and software. We have built a 3,000-node prototype running the full WSC software stack. Using our prototype, we have successfully reproduced a few WSC phenomena, such as TCP Incast and memcached request latency long tail, and found that results do indeed change with both scale and with version of the full software stack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Hauswald:2015:SOE, author = "Johann Hauswald and Michael A. Laurenzano and Yunqi Zhang and Cheng Li and Austin Rovinski and Arjun Khurana and Ronald G. Dreslinski and Trevor Mudge and Vinicius Petrucci and Lingjia Tang and Jason Mars", title = "{Sirius}: an Open End-to-End Voice and Vision Personal Assistant and Its Implications for Future Warehouse Scale Computers", journal = j-SIGPLAN, volume = "50", number = "4", pages = "223--238", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694347", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As user demand scales for intelligent personal assistants (IPAs) such as Apple's Siri, Google's Google Now, and Microsoft's Cortana, we are approaching the computational limits of current datacenter architectures. It is an open question how future server architectures should evolve to enable this emerging class of applications, and the lack of an open-source IPA workload is an obstacle in addressing this question. In this paper, we present the design of Sirius, an open end-to-end IPA web-service application that accepts queries in the form of voice and images, and responds with natural language. We then use this workload to investigate the implications of four points in the design space of future accelerator-based server architectures spanning traditional CPUs, GPUs, manycore throughput co-processors, and FPGAs. To investigate future server designs for Sirius, we decompose Sirius into a suite of 7 benchmarks (Sirius Suite) comprising the computationally intensive bottlenecks of Sirius. We port Sirius Suite to a spectrum of accelerator platforms and use the performance and power trade-offs across these platforms to perform a total cost of ownership (TCO) analysis of various server design points. In our study, we find that accelerators are critical for the future scalability of IPA services. Our results show that GPU- and FPGA-accelerated servers improve the query latency on average by 10x and 16x. For a given throughput, GPU- and FPGA-accelerated servers can reduce the TCO of datacenters by 2.6x and 1.4x, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Xu:2015:ALD, author = "Chao Xu and Felix Xiaozhu Lin and Yuyang Wang and Lin Zhong", title = "Automated {OS}-level Device Runtime Power Management", journal = j-SIGPLAN, volume = "50", number = "4", pages = "239--252", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694360", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-CPU devices on a modern system-on-a-chip (SoC), ranging from accelerators to I/O controllers, account for a significant portion of the chip area. It is therefore vital for system energy efficiency that idle devices can enter a low-power state while still meeting the performance expectation. This is called device runtime Power Management (PM) for which individual device drivers in commodity OSes are held responsible today. Based on the observations of existing drivers and their evolution, we consider it harmful to rely on drivers for device runtime PM. This paper identifies three pieces of information as essential to device runtime PM, and shows that they can be obtained without involving drivers, either by using a software-only approach, or more efficiently, by adding one register bit to each device. We thus suggest a structural change to the current Linux runtime PM framework, replacing the PM code in all applicable drivers with a single kernel module called the central PM agent. Experimental evaluations show that the central PM agent is just as effective as hand-tuned driver PM code. The paper also presents a tool called PowerAdvisor that simplifies driver PM efforts under the current Linux runtime PM framework. PowerAdvisor analyzes execution traces and suggests where to insert PM calls in driver source code. Despite being a best-effort tool, PowerAdvisor not only reproduces hand-tuned PM code from stock drivers, but also correctly suggests PM code never known before. Overall, our experience shows that it is promising to ultimately free driver developers from manual PM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Goiri:2015:CTV, author = "{\'I}{\~n}igo Goiri and Thu D. Nguyen and Ricardo Bianchini", title = "{CoolAir}: Temperature- and Variation-Aware Management for Free-Cooled Datacenters", journal = j-SIGPLAN, volume = "50", number = "4", pages = "253--265", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694378", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite its benefits, free cooling may expose servers to high absolute temperatures, wide temperature variations, and high humidity when datacenters are sited at certain locations. Prior research (in non-free-cooled datacenters) has shown that high temperatures and/or wide temporal temperature variations can harm hardware reliability. In this paper, we identify the runtime management strategies required to limit absolute temperatures, temperature variations, humidity, and cooling energy in free-cooled datacenters. As the basis for our study, we propose CoolAir, a system that embodies these strategies. Using CoolAir and a real free-cooled datacenter prototype, we show that effective management requires cooling infrastructures that can act smoothly. In addition, we show that CoolAir can tightly manage temperature and significantly reduce temperature variation, often at a lower cooling cost than existing free-cooled datacenters. Perhaps most importantly, based on our results, we derive several principles and lessons that should guide the design of management systems for free-cooled datacenters of any size.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Mishra:2015:PGM, author = "Nikita Mishra and Huazhe Zhang and John D. Lafferty and Henry Hoffmann", title = "A Probabilistic Graphical Model-based Approach for Minimizing Energy Under Performance Constraints", journal = j-SIGPLAN, volume = "50", number = "4", pages = "267--281", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In many deployments, computer systems are underutilized --- meaning that applications have performance requirements that demand less than full system capacity. Ideally, we would take advantage of this under-utilization by allocating system resources so that the performance requirements are met and energy is minimized. This optimization problem is complicated by the fact that the performance and power consumption of various system configurations are often application --- or even input --- dependent. Thus, practically, minimizing energy for a performance constraint requires fast, accurate estimations of application-dependent performance and power tradeoffs. This paper investigates machine learning techniques that enable energy savings by learning Pareto-optimal power and performance tradeoffs. Specifically, we propose LEO, a probabilistic graphical model-based learning system that provides accurate online estimates of an application's power and performance as a function of system configuration. We compare LEO to (1) offline learning, (2) online learning, (3) a heuristic approach, and (4) the true optimal solution. We find that LEO produces the most accurate estimates and near optimal energy savings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Pang:2015:MLL, author = "Jun Pang and Chris Dwyer and Alvin R. Lebeck", title = "More is Less, Less is More: Molecular-Scale Photonic {NoC} Power Topologies", journal = j-SIGPLAN, volume = "50", number = "4", pages = "283--296", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694377", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Molecular-scale Network-on-Chip (mNoC) crossbars use quantum dot LEDs as an on-chip light source, and chromophores to provide optical signal filtering for receivers. An mNoC reduces power consumption or enables scaling to larger crossbars for a reduced energy budget compared to current nanophotonic NoC crossbars. Since communication latency is reduced by using a high-radix crossbar, minimizing power consumption becomes a primary design target. Conventional Single Writer Multiple Reader (SWMR) photonic crossbar designs broadcast all packets, and incur the commensurate required power, even if only two nodes are communicating. This paper introduces power topologies, enabled by unique capabilities of mNoC technology, to reduce overall interconnect power consumption. A power topology corresponds to the logical connectivity provided by a given power mode. Broadcast is one power mode and it consumes the maximum power. Additional power modes consume less power but allow a source to communicate with only a statically defined, potentially non-contiguous, subset of nodes. Overall interconnect power is reduced if the more frequently communicating nodes use modes that consume less power, while less frequently communicating nodes use modes that consume more power. We also investigate thread mapping techniques to fully exploit power topologies. We explore various mNoC power topologies with one, two and four power modes for a radix-256 SWMR mNoC crossbar. Our results show that the combination of power topologies and intelligent thread mapping can reduce total mNoC power by up to 51\% on average for a set of 12 SPLASH benchmarks. Furthermore performance is 10\% better than conventional resonator-based photonic NoCs and energy is reduced by 72\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Sridharan:2015:MEM, author = "Vilas Sridharan and Nathan DeBardeleben and Sean Blanchard and Kurt B. Ferreira and Jon Stearley and John Shalf and Sudhanva Gurumurthi", title = "Memory Errors in Modern Systems: The Good, The Bad, and The Ugly", journal = j-SIGPLAN, volume = "50", number = "4", pages = "297--310", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694348", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several recent publications have shown that hardware faults in the memory subsystem are commonplace. These faults are predicted to become more frequent in future systems that contain orders of magnitude more DRAM and SRAM than found in current memory subsystems. These memory subsystems will need to provide resilience techniques to tolerate these faults when deployed in high-performance computing systems and data centers containing tens of thousands of nodes. Therefore, it is critical to understand the efficacy of current hardware resilience techniques to determine whether they will be suitable for future systems. In this paper, we present a study of DRAM and SRAM faults and errors from the field. We use data from two leadership-class high-performance computer systems to analyze the reliability impact of hardware resilience schemes that are deployed in current systems. Our study has several key findings about the efficacy of many currently deployed reliability techniques such as DRAM ECC, DDR address/command parity, and SRAM ECC and parity. We also perform a methodological study, and find that counting errors instead of faults, a common practice among researchers and data center operators, can lead to incorrect conclusions about system reliability. Finally, we use our data to project the needs of future large-scale systems. We find that SRAM faults are unlikely to pose a significantly larger reliability threat in the future, while DRAM faults will be a major concern and stronger DRAM resilience schemes will be needed to maintain acceptable failure rates similar to those found on today's systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Yetim:2015:CMC, author = "Yavuz Yetim and Sharad Malik and Margaret Martonosi", title = "{CommGuard}: Mitigating Communication Errors in Error-Prone Parallel Execution", journal = j-SIGPLAN, volume = "50", number = "4", pages = "311--323", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694354", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As semiconductor technology scales towards ever-smaller transistor sizes, hardware fault rates are increasing. Since important application classes (e.g., multimedia, streaming workloads) are data-error-tolerant, recent research has proposed techniques that seek to save energy or improve yield by exploiting error tolerance at the architecture/microarchitecture level. Even seemingly error-tolerant applications, however, will crash or hang due to control-flow/memory addressing errors. In parallel computation, errors involving inter-thread communication can have equally catastrophic effects. Our work explores techniques that mitigate the impact of potentially catastrophic errors in parallel computation, while still garnering power, cost, or yield benefits from data error tolerance. Our proposed CommGuard solution uses FSM-based checkers to pad and discard data in order to maintain semantic alignment between program control flow and the data communicated between processors. CommGuard techniques are low overhead and they exploit application information already provided by some parallel programming languages (e.g. StreamIt). By converting potentially catastrophic communication errors into potentially tolerable data errors, CommGuard allows important streaming applications like JPEG and MP3 decoding to execute without crashing and to sustain good output quality, even for errors as frequent as every 500 \mu s.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Kim:2015:DEF, author = "Dohyeong Kim and Yonghwi Kwon and William N. Sumner and Xiangyu Zhang and Dongyan Xu", title = "Dual Execution for On-the-Fly Fine Grained Execution Comparison", journal = j-SIGPLAN, volume = "50", number = "4", pages = "325--338", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694394", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Execution comparison has many applications in debugging, malware analysis, software feature identification, and intrusion detection. Existing comparison techniques have various limitations. Some can only compare at the system event level and require executions to take the same input. Some require storing instruction traces that are very space-consuming and have difficulty dealing with non-determinism. In this paper, we propose a novel dual execution technique that allows on-the-fly comparison at the instruction level. Only differences between the executions are recorded. It allows executions to proceed in a coupled mode such that they share the same input sequence with the same timing, reducing nondeterminism. It also allows them to proceed in a decoupled mode such that the user can interact with each one differently. Decoupled executions can be recoupled to share the same future inputs and facilitate further comparison. We have implemented a prototype and applied it to identifying functional components for reuse, comparative debugging with new GDB primitives, and understanding real world regression failures. Our results show that dual execution is a critical enabling technique for execution comparison.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Hosek:2015:VUE, author = "Petr Hosek and Cristian Cadar", title = "{VARAN} the Unbelievable: an Efficient {$N$}-version Execution Framework", journal = j-SIGPLAN, volume = "50", number = "4", pages = "339--353", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694390", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the widespread availability of multi-core processors, running multiple diversified variants or several different versions of an application in parallel is becoming a viable approach for increasing the reliability and security of software systems. The key component of such N-version execution (NVX) systems is a runtime monitor that enables the execution of multiple versions in parallel. Unfortunately, existing monitors impose either a large performance overhead or rely on intrusive kernel-level changes. Moreover, none of the existing solutions scales well with the number of versions, since the runtime monitor acts as a performance bottleneck. In this paper, we introduce Varan, an NVX framework that combines selective binary rewriting with a novel event-streaming architecture to significantly reduce performance overhead and scale well with the number of versions, without relying on intrusive kernel modifications. Our evaluation shows that Varan can run NVX systems based on popular C10k network servers with only a modest performance overhead, and can be effectively used to increase software reliability using techniques such as transparent failover, live sanitization and multi-revision execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Malka:2015:REI, author = "Moshe Malka and Nadav Amit and Muli Ben-Yehuda and Dan Tsafrir", title = "{rIOMMU}: Efficient {IOMMU} for {I/O} Devices that Employ Ring Buffers", journal = j-SIGPLAN, volume = "50", number = "4", pages = "355--368", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694355", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The IOMMU allows the OS to encapsulate I/O devices in their own virtual memory spaces, thus restricting their DMAs to specific memory pages. The OS uses the IOMMU to protect itself against buggy drivers and malicious/errant devices. But the added protection comes at a cost, degrading the throughput of I/O-intensive workloads by up to an order of magnitude. This cost has motivated system designers to trade off some safety for performance, e.g., by leaving stale information in the IOTLB for a while so as to amortize costly invalidations. We observe that high-bandwidth devices---like network and PCIe SSD controllers---interact with the OS via circular ring buffers that induce a sequential, predictable workload. We design a ring IOMMU (rIOMMU) that leverages this characteristic by replacing the virtual memory page table hierarchy with a circular, flat table. A flat table is adequately supported by exactly one IOTLB entry, making every new translation an implicit invalidation of the former and thus requiring explicit invalidations only at the end of I/O bursts. Using standard networking benchmarks, we show that rIOMMU provides up to 7.56x higher throughput relative to the baseline IOMMU, and that it is within 0.77--1.00x the throughput of a system without IOMMU protection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Liu:2015:PPM, author = "Daofu Liu and Tianshi Chen and Shaoli Liu and Jinhong Zhou and Shengyuan Zhou and Olivier Teman and Xiaobing Feng and Xuehai Zhou and Yunji Chen", title = "{PuDianNao}: a Polyvalent Machine Learning Accelerator", journal = j-SIGPLAN, volume = "50", number = "4", pages = "369--381", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694358", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Machine Learning (ML) techniques are pervasive tools in various emerging commercial applications, but have to be accommodated by powerful computer systems to process very large data. Although general-purpose CPUs and GPUs have provided straightforward solutions, their energy-efficiencies are limited due to their excessive supports for flexibility. Hardware accelerators may achieve better energy-efficiencies, but each accelerator often accommodates only a single ML technique (family). According to the famous No-Free-Lunch theorem in the ML domain, however, an ML technique performs well on a dataset may perform poorly on another dataset, which implies that such accelerator may sometimes lead to poor learning accuracy. Even if regardless of the learning accuracy, such accelerator can still become inapplicable simply because the concrete ML task is altered, or the user chooses another ML technique. In this study, we present an ML accelerator called PuDianNao, which accommodates seven representative ML techniques, including k-means, k-nearest neighbors, naive Bayes, support vector machine, linear regression, classification tree, and deep neural network. Benefited from our thorough analysis on computational primitives and locality properties of different ML techniques, PuDianNao can perform up to 1056 GOP/s (e.g., additions and multiplications) in an area of 3.51 mm^2, and consumes 596 mW only. Compared with the NVIDIA K20M GPU (28nm process), PuDianNao (65nm process) is 1.20x faster, and can reduce the energy by 128.41x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Goiri:2015:ABA, author = "Inigo Goiri and Ricardo Bianchini and Santosh Nagarakatte and Thu D. Nguyen", title = "{ApproxHadoop}: Bringing Approximations to {MapReduce} Frameworks", journal = j-SIGPLAN, volume = "50", number = "4", pages = "383--397", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694351", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose and evaluate a framework for creating and running approximation-enabled MapReduce programs. Specifically, we propose approximation mechanisms that fit naturally into the MapReduce paradigm, including input data sampling, task dropping, and accepting and running a precise and a user-defined approximate version of the MapReduce code. We then show how to leverage statistical theories to compute error bounds for popular classes of MapReduce programs when approximating with input data sampling and/or task dropping. We implement the proposed mechanisms and error bound estimations in a prototype system called ApproxHadoop. Our evaluation uses MapReduce applications from different domains, including data analytics, scientific computing, video encoding, and machine learning. Our results show that ApproxHadoop can significantly reduce application execution time and/or energy consumption when the user is willing to tolerate small errors. For example, ApproxHadoop can reduce runtimes by up to 32x when the user can tolerate an error of 1\% with 95\% confidence. We conclude that our framework and system can make approximation easily accessible to many application domains using the MapReduce model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Ringenburg:2015:MDQ, author = "Michael Ringenburg and Adrian Sampson and Isaac Ackerman and Luis Ceze and Dan Grossman", title = "Monitoring and Debugging the Quality of Results in Approximate Programs", journal = j-SIGPLAN, volume = "50", number = "4", pages = "399--411", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694365", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy efficiency is a key concern in the design of modern computer systems. One promising approach to energy-efficient computation, approximate computing, trades off output accuracy for significant gains in energy efficiency. However, debugging the actual cause of output quality problems in approximate programs is challenging. This paper presents dynamic techniques to debug and monitor the quality of approximate computations. We propose both offline debugging tools that instrument code to determine the key sources of output degradation and online approaches that monitor the quality of deployed applications. We present two offline debugging techniques and three online monitoring mechanisms. The first offline tool identifies correlations between output quality and the execution of individual approximate operations. The second tracks approximate operations that flow into a particular value. Our online monitoring mechanisms are complementary approaches designed for detecting quality problems in deployed applications, while still maintaining the energy savings from approximation. We present implementations of our techniques and describe their usage with seven applications. Our online monitors control output quality while still maintaining significant energy efficiency gains, and our offline tools provide new insights into the effects of approximation on output quality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Banavar:2015:WEC, author = "Guruduth Banavar", title = "{Watson} and the Era of Cognitive Computing", journal = j-SIGPLAN, volume = "50", number = "4", pages = "413--413", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694376", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the last decade, the availability of massive amounts of new data, and the development of new machine learning technologies, have augmented reasoning systems to give rise to a new class of computing systems. These ``Cognitive Systems'' learn from data, reason from models, and interact naturally with us, to perform complex tasks better than either humans or machines can do by themselves. In essence, cognitive systems help us perform like the best by penetrating the complexity of big data and leverage the power of models. One of the first cognitive systems, called Watson, demonstrated through a Jeopardy! exhibition match, that it was capable of answering complex factoid questions as effectively as the world's champions. Follow-on cognitive systems perform other tasks, such as discovery, reasoning, and multi-modal understanding in a variety of domains, such as healthcare, insurance, and education. We believe such cognitive systems will transform every industry and our everyday life for the better. In this talk, I will give an overview of the applications, the underlying capabilities, and some of the key challenges, of cognitive systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Stewart:2015:ZDW, author = "Gordon Stewart and Mahanth Gowda and Geoffrey Mainland and Bozidar Radunovic and Dimitrios Vytiniotis and Cristina Luengo Agullo", title = "{Ziria}: a {DSL} for Wireless Systems Programming", journal = j-SIGPLAN, volume = "50", number = "4", pages = "415--428", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694368", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-defined radio (SDR) brings the flexibility of software to wireless protocol design, promising an ideal platform for innovation and rapid protocol deployment. However, implementing modern wireless protocols on existing SDR platforms often requires careful hand-tuning of low-level code, which can undermine the advantages of software. Ziria is a new domain-specific language (DSL) that offers programming abstractions suitable for wireless physical (PHY) layer tasks while emphasizing the pipeline reconfiguration aspects of PHY programming. The Ziria compiler implements a rich set of specialized optimizations, such as lookup table generation and pipeline fusion. We also offer a novel --- due to pipeline reconfiguration --- algorithm to optimize the data widths of computations in Ziria pipelines. We demonstrate the programming flexibility of Ziria and the performance of the generated code through a detailed evaluation of a line-rate Ziria WiFi 802.11a/g implementation that is on par and in many cases outperforms a hand-tuned state-of-the-art C++ implementation on commodity CPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Mullapudi:2015:PAO, author = "Ravi Teja Mullapudi and Vinay Vasista and Uday Bondhugula", title = "{PolyMage}: Automatic Optimization for Image Processing Pipelines", journal = j-SIGPLAN, volume = "50", number = "4", pages = "429--443", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694364", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents the design and implementation of PolyMage, a domain-specific language and compiler for image processing pipelines. An image processing pipeline can be viewed as a graph of interconnected stages which process images successively. Each stage typically performs one of point-wise, stencil, reduction or data-dependent operations on image pixels. Individual stages in a pipeline typically exhibit abundant data parallelism that can be exploited with relative ease. However, the stages also require high memory bandwidth preventing effective utilization of parallelism available on modern architectures. For applications that demand high performance, the traditional options are to use optimized libraries like OpenCV or to optimize manually. While using libraries precludes optimization across library routines, manual optimization accounting for both parallelism and locality is very tedious. The focus of our system, PolyMage, is on automatically generating high-performance implementations of image processing pipelines expressed in a high-level declarative language. Our optimization approach primarily relies on the transformation and code generation capabilities of the polyhedral compiler framework. To the best of our knowledge, this is the first model-driven compiler for image processing pipelines that performs complex fusion, tiling, and storage optimization automatically. Experimental results on a modern multicore system show that the performance achieved by our automatic approach is up to 1.81x better than that achieved through manual tuning in Halide, a state-of-the-art language and compiler for image processing pipelines. For a camera raw image processing pipeline, our performance is comparable to that of a hand-tuned implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Heckey:2015:CMC, author = "Jeff Heckey and Shruti Patil and Ali JavadiAbhari and Adam Holmes and Daniel Kudrow and Kenneth R. Brown and Diana Franklin and Frederic T. Chong and Margaret Martonosi", title = "Compiler Management of Communication and Parallelism for Quantum Computation", journal = j-SIGPLAN, volume = "50", number = "4", pages = "445--456", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694357", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Quantum computing (QC) offers huge promise to accelerate a range of computationally intensive benchmarks. Quantum computing is limited, however, by the challenges of decoherence: i.e., a quantum state can only be maintained for short windows of time before it decoheres. While quantum error correction codes can protect against decoherence, fast execution time is the best defense against decoherence, so efficient architectures and effective scheduling algorithms are necessary. This paper proposes the Multi-SIMD QC architecture and then proposes and evaluates effective schedulers to map benchmark descriptions onto Multi-SIMD architectures. The Multi-SIMD model consists of a small number of SIMD regions, each of which may support operations on up to thousands of qubits per cycle. Efficient Multi-SIMD operation requires efficient scheduling. This work develops schedulers to reduce communication requirements of qubits between operating regions, while also improving parallelism.We find that communication to global memory is a dominant cost in QC. We also note that many quantum benchmarks have long serial operation paths (although each operation may be data parallel). To exploit this characteristic, we introduce Longest-Path-First Scheduling (LPFS) which pins operations to SIMD regions to keep data in-place and reduce communication to memory. The use of small, local scratchpad memories also further reduces communication. Our results show a 3\% to 308\% improvement for LPFS over conventional scheduling algorithms, and an additional 3\% to 64\% improvement using scratchpad memories. Our work is the most comprehensive software-to-quantum toolflow published to date, with efficient and practical scheduling techniques that reduce communication and increase parallelism for full-scale quantum code executing up to a trillion quantum gate operations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Hassaan:2015:KDG, author = "Muhammad Amber Hassaan and Donald D. Nguyen and Keshav K. Pingali", title = "Kinetic Dependence Graphs", journal = j-SIGPLAN, volume = "50", number = "4", pages = "457--471", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694363", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Task graphs or dependence graphs are used in runtime systems to schedule tasks for parallel execution. In problem domains such as dense linear algebra and signal processing, dependence graphs can be generated from a program by static analysis. However, in emerging problem domains such as graph analytics, the set of tasks and dependences between tasks in a program are complex functions of runtime values and cannot be determined statically. In this paper, we introduce a novel approach for exploiting parallelism in such programs. This approach is based on a data structure called the kinetic dependence graph (KDG), which consists of a dependence graph together with update rules that incrementally update the graph to reflect changes in the dependence structure whenever a task is completed. We have implemented a simple programming model that allows programmers to write these applications at a high level of abstraction, and a runtime within the Galois system [15] that builds the KDG automatically and executes the program in parallel. On a suite of programs that are difficult to parallelize otherwise, we have obtained speedups of up to 33 on 40 cores, out-performing third-party implementations in many cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Sidiroglou-Douskos:2015:TAI, author = "Stelios Sidiroglou-Douskos and Eric Lahtinen and Nathan Rittenhouse and Paolo Piselli and Fan Long and Deokhwan Kim and Martin Rinard", title = "Targeted Automatic Integer Overflow Discovery Using Goal-Directed Conditional Branch Enforcement", journal = j-SIGPLAN, volume = "50", number = "4", pages = "473--486", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694389", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new technique and system, DIODE, for auto- matically generating inputs that trigger overflows at memory allocation sites. DIODE is designed to identify relevant sanity checks that inputs must satisfy to trigger overflows at target memory allocation sites, then generate inputs that satisfy these sanity checks to successfully trigger the overflow. DIODE works with off-the-shelf, production x86 binaries. Our results show that, for our benchmark set of applications, and for every target memory allocation site exercised by our seed inputs (which the applications process correctly with no overflows), either (1) DIODE is able to generate an input that triggers an overflow at that site or (2) there is no input that would trigger an overflow for the observed target expression at that site.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Dhawan:2015:ASS, author = "Udit Dhawan and Catalin Hritcu and Raphael Rubin and Nikos Vasilakis and Silviu Chiricescu and Jonathan M. Smith and Thomas F. {Knight, Jr.} and Benjamin C. Pierce and Andre DeHon", title = "Architectural Support for Software-Defined Metadata Processing", journal = j-SIGPLAN, volume = "50", number = "4", pages = "487--502", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694383", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Optimized hardware for propagating and checking software-programmable metadata tags can achieve low runtime overhead. We generalize prior work on hardware tagging by considering a generic architecture that supports software-defined policies over metadata of arbitrary size and complexity; we introduce several novel microarchitectural optimizations that keep the overhead of this rich processing low. Our model thus achieves the efficiency of previous hardware-based approaches with the flexibility of the software-based ones. We demonstrate this by using it to enforce four diverse safety and security policies---spatial and temporal memory safety, taint tracking, control-flow integrity, and code and data separation---plus a composite policy that enforces all of them simultaneously. Experiments on SPEC CPU2006 benchmarks with a PUMP-enhanced RISC processor show modest impact on runtime (typically under 10\%) and power ceiling (less than 10\%), in return for some increase in energy usage (typically under 60\%) and area for on-chip memory structures (110\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Zhang:2015:HDL, author = "Danfeng Zhang and Yao Wang and G. Edward Suh and Andrew C. Myers", title = "A Hardware Design Language for Timing-Sensitive Information-Flow Security", journal = j-SIGPLAN, volume = "50", number = "4", pages = "503--516", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Information security can be compromised by leakage via low-level hardware features. One recently prominent example is cache probing attacks, which rely on timing channels created by caches. We introduce a hardware design language, SecVerilog, which makes it possible to statically analyze information flow at the hardware level. With SecVerilog, systems can be built with verifiable control of timing channels and other information channels. SecVerilog is Verilog, extended with expressive type annotations that enable precise reasoning about information flow. It also comes with rigorous formal assurance: we prove that SecVerilog enforces timing-sensitive noninterference and thus ensures secure information flow. By building a secure MIPS processor and its caches, we demonstrate that SecVerilog makes it possible to build complex hardware designs with verified security, yet with low overhead in time, space, and HW designer effort.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Hicks:2015:SLR, author = "Matthew Hicks and Cynthia Sturton and Samuel T. King and Jonathan M. Smith", title = "{SPECS}: a Lightweight Runtime Mechanism for Protecting Software from Security-Critical Processor Bugs", journal = j-SIGPLAN, volume = "50", number = "4", pages = "517--529", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694366", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Processor implementation errata remain a problem, and worse, a subset of these bugs are security-critical. We classified 7 years of errata from recent commercial processors to understand the magnitude and severity of this problem, and found that of 301 errata analyzed, 28 are security-critical. We propose the SECURITY-CRITICAL PROCESSOR ER- RATA CATCHING SYSTEM (SPECS) as a low-overhead solution to this problem. SPECS employs a dynamic verification strategy that is made lightweight by limiting protection to only security-critical processor state. As a proof-of- concept, we implement a hardware prototype of SPECS in an open source processor. Using this prototype, we evaluate SPECS against a set of 14 bugs inspired by the types of security-critical errata we discovered in the classification phase. The evaluation shows that SPECS is 86\% effective as a defense when deployed using only ISA-level state; incurs less than 5\% area and power overhead; and has no software run-time overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Duan:2015:AMF, author = "Yuelu Duan and Nima Honarmand and Josep Torrellas", title = "Asymmetric Memory Fences: Optimizing Both Performance and Implementability", journal = j-SIGPLAN, volume = "50", number = "4", pages = "531--543", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694388", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There have been several recent efforts to improve the performance of fences. The most aggressive designs allow post-fence accesses to retire and complete before the fence completes. Unfortunately, such designs present implementation difficulties due to their reliance on global state and structures. This paper's goal is to optimize both the performance and the implementability of fences. We start-off with a design like the most aggressive ones but without the global state. We call it Weak Fence or wF. Since the concurrent execution of multiple wFs can deadlock, we combine wFs with a conventional fence (i.e., Strong Fence or sF) for the less performance-critical thread(s). We call the result an Asymmetric fence group. We also propose a taxonomy of Asymmetric fence groups under TSO. Compared to past aggressive fences, Asymmetric fence groups both are substantially easier to implement and have higher average performance. The two main designs presented (WS+ and W+) speed-up workloads under TSO by an average of 13\% and 21\%, respectively, over conventional fences.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Sung:2015:DES, author = "Hyojin Sung and Sarita V. Adve", title = "{DeNovoSync}: Efficient Support for Arbitrary Synchronization without Writer-Initiated Invalidations", journal = j-SIGPLAN, volume = "50", number = "4", pages = "545--559", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694356", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Current shared-memory hardware is complex and inefficient. Prior work on the DeNovo coherence protocol showed that disciplined shared-memory programming models can enable more complexity-, performance-, and energy-efficient hardware than the state-of-the-art MESI protocol. DeNovo, however, severely restricted the synchronization constructs an application can support. This paper proposes DeNovoSync, a technique to support arbitrary synchronization in DeNovo. The key challenge is that DeNovo exploits race-freedom to use reader-initiated local self-invalidations (instead of conventional writer-initiated remote cache invalidations) to ensure coherence. Synchronization accesses are inherently racy and not directly amenable to self-invalidations. DeNovoSync addresses this challenge using a novel combination of registration of all synchronization reads with a judicious hardware backoff to limit unnecessary registrations. For a wide variety of synchronization constructs and applications, compared to MESI, DeNovoSync shows comparable or up to 22\% lower execution time and up to 58\% lower network traffic, enabling DeNovo's advantages for a much broader class of software than previously possible.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Sengupta:2015:HSD, author = "Aritra Sengupta and Swarnendu Biswas and Minjia Zhang and Michael D. Bond and Milind Kulkarni", title = "Hybrid Static-Dynamic Analysis for Statically Bounded Region Serializability", journal = j-SIGPLAN, volume = "50", number = "4", pages = "561--575", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694379", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data races are common. They are difficult to detect, avoid, or eliminate, and programmers sometimes introduce them intentionally. However, shared-memory programs with data races have unexpected, erroneous behaviors. Intentional and unintentional data races lead to atomicity and sequential consistency (SC) violations, and they make it more difficult to understand, test, and verify software. Existing approaches for providing stronger guarantees for racy executions add high run-time overhead and/or rely on custom hardware. This paper shows how to provide stronger semantics for racy programs while providing relatively good performance on commodity systems. A novel hybrid static--dynamic analysis called \emph{EnfoRSer} provides end-to-end support for a memory model called \emph{statically bounded region serializability} (SBRS) that is not only stronger than weak memory models but is strictly stronger than SC. EnfoRSer uses static compiler analysis to transform regions, and dynamic analysis to detect and resolve conflicts at run time. By demonstrating commodity support for a reasonably strong memory model with reasonable overheads, we show its potential as an always-on execution model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Alglave:2015:GCW, author = "Jade Alglave and Mark Batty and Alastair F. Donaldson and Ganesh Gopalakrishnan and Jeroen Ketema and Daniel Poetzl and Tyler Sorensen and John Wickerson", title = "{GPU} Concurrency: Weak Behaviours and Programming Assumptions", journal = j-SIGPLAN, volume = "50", number = "4", pages = "577--591", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694391", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency is pervasive and perplexing, particularly on graphics processing units (GPUs). Current specifications of languages and hardware are inconclusive; thus programmers often rely on folklore assumptions when writing software. To remedy this state of affairs, we conducted a large empirical study of the concurrent behaviour of deployed GPUs. Armed with litmus tests (i.e. short concurrent programs), we questioned the assumptions in programming guides and vendor documentation about the guarantees provided by hardware. We developed a tool to generate thousands of litmus tests and run them under stressful workloads. We observed a litany of previously elusive weak behaviours, and exposed folklore beliefs about GPU programming---often supported by official tutorials---as false. As a way forward, we propose a model of Nvidia GPU hardware, which correctly models every behaviour witnessed in our experiments. The model is a variant of SPARC Relaxed Memory Order (RMO), structured following the GPU concurrency hierarchy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Park:2015:CCP, author = "Jason Jong Kyu Park and Yongjun Park and Scott Mahlke", title = "{Chimera}: Collaborative Preemption for Multitasking on a Shared {GPU}", journal = j-SIGPLAN, volume = "50", number = "4", pages = "593--606", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694346", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The demand for multitasking on graphics processing units (GPUs) is constantly increasing as they have become one of the default components on modern computer systems along with traditional processors (CPUs). Preemptive multitasking on CPUs has been primarily supported through context switching. However, the same preemption strategy incurs substantial overhead due to the large context in GPUs. The overhead comes in two dimensions: a preempting kernel suffers from a long preemption latency, and the system throughput is wasted during the switch. Without precise control over the large preemption overhead, multitasking on GPUs has little use for applications with strict latency requirements. In this paper, we propose Chimera, a collaborative preemption approach that can precisely control the overhead for multitasking on GPUs. Chimera first introduces streaming multiprocessor (SM) flushing, which can instantly preempt an SM by detecting and exploiting idempotent execution. Chimera utilizes flushing collaboratively with two previously proposed preemption techniques for GPUs, namely context switching and draining to minimize throughput overhead while achieving a required preemption latency. Evaluations show that Chimera violates the deadline for only 0.2\% of preemption requests when a 15us preemption latency constraint is used. For multi-programmed workloads, Chimera can improve the average normalized turnaround time by 5.5x, and system throughput by 12.2\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Agarwal:2015:PPS, author = "Neha Agarwal and David Nellans and Mark Stephenson and Mike O'Connor and Stephen W. Keckler", title = "Page Placement Strategies for {GPUs} within Heterogeneous Memory Systems", journal = j-SIGPLAN, volume = "50", number = "4", pages = "607--618", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694381", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Systems from smartphones to supercomputers are increasingly heterogeneous, being composed of both CPUs and GPUs. To maximize cost and energy efficiency, these systems will increasingly use globally-addressable heterogeneous memory systems, making choices about memory page placement critical to performance. In this work we show that current page placement policies are not sufficient to maximize GPU performance in these heterogeneous memory systems. We propose two new page placement policies that improve GPU performance: one application agnostic and one using application profile information. Our application agnostic policy, bandwidth-aware (BW-AWARE) placement, maximizes GPU throughput by balancing page placement across the memories based on the aggregate memory bandwidth available in a system. Our simulation-based results show that BW-AWARE placement outperforms the existing Linux INTERLEAVE and LOCAL policies by 35\% and 18\% on average for GPU compute workloads. We build upon BW-AWARE placement by developing a compiler-based profiling mechanism that provides programmers with information about GPU application data structure access patterns. Combining this information with simple program-annotated hints about memory placement, our hint-based page placement approach performs within 90\% of oracular page placement on average, largely mitigating the need for costly dynamic page tracking and migration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Zhao:2015:FPS, author = "Zhijia Zhao and Xipeng Shen", title = "On-the-Fly Principled Speculation for {FSM} Parallelization", journal = j-SIGPLAN, volume = "50", number = "4", pages = "619--630", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Finite State Machine (FSM) is the backbone of an important class of applications in many domains. Its parallelization has been extremely difficult due to inherent strong dependences in the computation. Recently, principled speculation shows good promise to solve the problem. However, the reliance on offline training makes the approach inconvenient to adopt and hard to apply to many practical FSM applications, which often deal with a large variety of inputs different from training inputs. This work presents an assembly of techniques that completely remove the needs for offline training. The techniques include a set of theoretical results on inherent properties of FSMs, and two newly designed dynamic optimizations for efficient FSM characterization. The new techniques, for the first time, make principle speculation applicable on the fly, and enables swift, automatic configuration of speculative parallelizations to best suit a given FSM and its current input. They eliminate the fundamental barrier for practical adoption of principle speculation for FSM parallelization. Experiments show that the new techniques give significantly higher speedups for some difficult FSM applications in the presence of input changes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{David:2015:ACS, author = "Tudor David and Rachid Guerraoui and Vasileios Trigonakis", title = "Asynchronized Concurrency: The Secret to Scaling Concurrent Search Data Structures", journal = j-SIGPLAN, volume = "50", number = "4", pages = "631--644", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694359", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce ``asynchronized concurrency (ASCY),'' a paradigm consisting of four complementary programming patterns. ASCY calls for the design of concurrent search data structures (CSDSs) to resemble that of their sequential counterparts. We argue that ASCY leads to implementations which are portably scalable: they scale across different types of hardware platforms, including single and multi-socket ones, for various classes of workloads, such as read-only and read-write, and according to different performance metrics, including throughput, latency, and energy. We substantiate our thesis through the most exhaustive evaluation of CSDSs to date, involving 6 platforms, 22 state-of-the-art CSDS algorithms, 10 re-engineered state-of-the-art CSDS algorithms following the ASCY patterns, and 2 new CSDS algorithms designed with ASCY in mind. We observe up to 30\% improvements in throughput in the re-engineered algorithms, while our new algorithms out-perform the state-of-the-art alternatives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Bhatotia:2015:ITL, author = "Pramod Bhatotia and Pedro Fonseca and Umut A. Acar and Bj{\"o}rn B. Brandenburg and Rodrigo Rodrigues", title = "{iThreads}: a Threading Library for Parallel Incremental Computation", journal = j-SIGPLAN, volume = "50", number = "4", pages = "645--659", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694371", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Incremental computation strives for efficient successive runs of applications by re-executing only those parts of the computation that are affected by a given input change instead of recomputing everything from scratch. To realize these benefits automatically, we describe iThreads, a threading library for parallel incremental computation. iThreads supports unmodified shared-memory multithreaded programs: it can be used as a replacement for pthreads by a simple exchange of dynamically linked libraries, without even recompiling the application code. To enable such an interface, we designed algorithms and an implementation to operate at the compiled binary code level by leveraging MMU-assisted memory access tracking and process-based thread isolation. Our evaluation on a multicore platform using applications from the PARSEC and Phoenix benchmarks and two case-studies shows significant performance gains.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Gidra:2015:NGC, author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and Marc Shapiro and Nhan Nguyen", title = "{NumaGiC}: a Garbage Collector for Big Data on Big {NUMA} Machines", journal = j-SIGPLAN, volume = "50", number = "4", pages = "661--673", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694361", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "On contemporary cache-coherent Non-Uniform Memory Access (ccNUMA) architectures, applications with a large memory footprint suffer from the cost of the garbage collector (GC), because, as the GC scans the reference graph, it makes many remote memory accesses, saturating the interconnect between memory nodes. We address this problem with NumaGiC, a GC with a mostly-distributed design. In order to maximise memory access locality during collection, a GC thread avoids accessing a different memory node, instead notifying a remote GC thread with a message; nonetheless, NumaGiC avoids the drawbacks of a pure distributed design, which tends to decrease parallelism. We compare NumaGiC with Parallel Scavenge and NAPS on two different ccNUMA architectures running on the Hotspot Java Virtual Machine of OpenJDK 7. On Spark and Neo4j, two industry-strength analytics applications, with heap sizes ranging from 160GB to 350GB, and on SPECjbb2013 and SPECjbb2005, our gc improves overall performance by up to 45\% over NAPS (up to 94\% over Parallel Scavenge), and increases the performance of the collector itself by up to 3.6x over NAPS (up to 5.4x over Parallel Scavenge).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Nguyen:2015:FCR, author = "Khanh Nguyen and Kai Wang and Yingyi Bu and Lu Fang and Jianfei Hu and Guoqing Xu", title = "{FACADE}: a Compiler and Runtime for (Almost) Object-Bounded Big Data Applications", journal = j-SIGPLAN, volume = "50", number = "4", pages = "675--690", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694345", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The past decade has witnessed the increasing demands on data-driven business intelligence that led to the proliferation of data-intensive applications. A managed object-oriented programming language such as Java is often the developer's choice for implementing such applications, due to its quick development cycle and rich community resource. While the use of such languages makes programming easier, their automated memory management comes at a cost. When the managed runtime meets Big Data, this cost is significantly magnified and becomes a scalability-prohibiting bottleneck. This paper presents a novel compiler framework, called Facade, that can generate highly-efficient data manipulation code by automatically transforming the data path of an existing Big Data application. The key treatment is that in the generated code, the number of runtime heap objects created for data types in each thread is (almost) statically bounded, leading to significantly reduced memory management cost and improved scalability. We have implemented Facade and used it to transform 7 common applications on 3 real-world, already well-optimized Big Data frameworks: GraphChi, Hyracks, and GPS. Our experimental results are very positive: the generated programs have (1) achieved a 3\%--48\% execution time reduction and an up to 88X GC reduction; (2) consumed up to 50\% less memory, and (3) scaled to much larger datasets.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Agrawal:2015:ASD, author = "Varun Agrawal and Abhiroop Dabral and Tapti Palit and Yongming Shen and Michael Ferdman", title = "Architectural Support for Dynamic Linking", journal = j-SIGPLAN, volume = "50", number = "4", pages = "691--702", month = apr, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2775054.2694392", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue May 12 17:41:19 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "All software in use today relies on libraries, including standard libraries (e.g., C, C++) and application-specific libraries (e.g., libxml, libpng). Most libraries are loaded in memory and dynamically linked when programs are launched, resolving symbol addresses across the applications and libraries. Dynamic linking has many benefits: It allows code to be reused between applications, conserves memory (because only one copy of a library is kept in memory for all the applications that share it), and allows libraries to be patched and updated without modifying programs, among numerous other benefits. However, these benefits come at the cost of performance. For every call made to a function in a dynamically linked library, a trampoline is used to read the function address from a lookup table and branch to the function, incurring memory load and branch operations. Static linking avoids this performance penalty, but loses all the benefits of dynamic linking. Given its myriad benefits, dynamic linking is the predominant choice today, despite the performance cost. In this work, we propose a speculative hardware mechanism to optimize dynamic linking by avoiding executing the trampolines for library function calls, providing the benefits of dynamic linking with the performance of static linking. Speculatively skipping the memory load and branch operations of the library call trampolines improves performance by reducing the number of executed instructions and gains additional performance by reducing pressure on the instruction and data caches, TLBs, and branch predictors. Because the indirect targets of library call trampolines do not change during program execution, our speculative mechanism never misspeculates in practice. We evaluate our technique on real hardware with production software and observe up to 4\% speedup using only 1.5KB of on-chip storage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '15 conference proceedings.", } @Article{Baird:2015:OTC, author = "Ryan Baird and Peter Gavin and Magnus Sj{\"a}lander and David Whalley and Gang-Ryung Uh", title = "Optimizing Transfers of Control in the Static Pipeline Architecture", journal = j-SIGPLAN, volume = "50", number = "5", pages = "1:1--1:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754952", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Statically pipelined processors offer a new way to improve the performance beyond that of a traditional in-order pipeline while simultaneously reducing energy usage by enabling the compiler to control more fine-grained details of the program execution. This paper describes how a compiler can exploit the features of the static pipeline architecture to apply optimizations on transfers of control that are not possible on a conventional architecture. The optimizations presented in this paper include hoisting the target address calculations for branches, jumps, and calls out of loops, performing branch chaining between calls and jumps, hoisting the setting of return addresses out of loops, and exploiting conditional calls and returns. The benefits of performing these transfer of control optimizations include a 6.8\% reduction in execution time and a 3.6\% decrease in estimated energy usage.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Liu:2015:CCD, author = "Qingrui Liu and Changhee Jung and Dongyoon Lee and Devesh Tiwari", title = "{Clover}: Compiler Directed Lightweight Soft Error Resilience", journal = j-SIGPLAN, volume = "50", number = "5", pages = "2:1--2:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754959", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents Clover, a compiler directed soft error detection and recovery scheme for lightweight soft error resilience. The compiler carefully generates soft error tolerant code based on idempotent processing without explicit checkpoint. During program execution, Clover relies on a small number of acoustic wave detectors deployed in the processor to identify soft errors by sensing the wave made by a particle strike. To cope with DUE (detected unrecoverable errors) caused by the sensing latency of error detection, Clover leverages a novel selective instruction duplication technique called tail-DMR (dual modular redundancy). Once a soft error is detected by either the sensor or the tail-DMR, Clover takes care of the error as in the case of exception handling. To recover from the error, Clover simply redirects program control to the beginning of the code region where the error is detected. The experiment results demonstrate that the average runtime overhead is only 26\%, which is a 75\% reduction compared to that of the state-of-the-art soft error resilience technique.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Bardizbanyan:2015:IDA, author = "Alen Bardizbanyan and Magnus Sj{\"a}lander and David Whalley and Per Larsson-Edefors", title = "Improving Data Access Efficiency by Using Context-Aware Loads and Stores", journal = j-SIGPLAN, volume = "50", number = "5", pages = "3:1--3:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754960", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Memory operations have a significant impact on both performance and energy usage even when an access hits in the level-one data cache (L1 DC). Load instructions in particular affect performance as they frequently result in stalls since the register to be loaded is often referenced before the data is available in the pipeline. L1 DC accesses also impact energy usage as they typically require significantly more energy than a register file access. Despite their impact on performance and energy usage, L1 DC accesses on most processors are performed in a general fashion without regard to the context in which the load or store operation is performed. We describe a set of techniques where the compiler enhances load and store instructions so that they can be executed with fewer stalls and/or enable the L1 DC to be accessed in a more energy-efficient manner. We show that using these techniques can simultaneously achieve a 6\% gain in performance and a 43\% reduction in L1 DC energy usage.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Stilkerich:2015:PGA, author = "Isabella Stilkerich and Clemens Lang and Christoph Erhardt and Michael Stilkerich", title = "A Practical Getaway: Applications of Escape Analysis in Embedded Real-Time Systems", journal = j-SIGPLAN, volume = "50", number = "5", pages = "4:1--4:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754961", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The use of a managed, type-safe language such as Java in real-time and embedded systems offers productivity and, in particular, safety and dependability benefits at a reasonable cost. It has been shown for commodity systems that escape analysis (EA) enables a set of useful optimization, and benefits from the properties of a type-safe language. In this paper, we explore the application of escape analysis in KESO [34], a Java ahead-of-time compiler targeting (deeply) embedded real-time systems. We present specific applications of EA for embedded programs that go beyond the widely known stack-allocation and synchronization optimizations such as extended remote procedure call support for software-isolated applications, automated inference of immutable data or improved upper space and time bounds for worst-case estimations.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Manilov:2015:FRT, author = "Stanislav Manilov and Bj{\"o}rn Franke and Anthony Magrath and Cedric Andrieu", title = "Free Rider: a Tool for Retargeting Platform-Specific Intrinsic Functions", journal = j-SIGPLAN, volume = "50", number = "5", pages = "5:1--5:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754962", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Short-vector SIMD and DSP instructions are popular extensions to common ISAs. These extensions deliver excellent performance and compact code for some compute-intensive applications, but they require specialised compiler support. To enable the programmer to explicitly request the use of such an instruction, many C compilers provide platform-specific intrinsic functions, whose implementation is handled specially by the compiler. The use of such intrinsics, however, inevitably results in non-portable code. In this paper we develop a novel methodology for retargeting such non-portable code, which maps intrinsics from one platform to another, taking advantage of similar intrinsics on the target platform. We employ a description language to specify the signature and semantics of intrinsics and perform graph-based pattern matching and high-level code transformations to derive optimised implementations exploiting the target's intrinsics, wherever possible. We demonstrate the effectiveness of our new methodology, implemented in the FREE RIDER tool, by automatically retargeting benchmarks derived from OpenCV samples and a complex embedded application optimised to run on an Arm Cortex-M4 to an Intel Edison module with Sse4.2 instructions. We achieve a speedup of up to 3.73 over a plain C baseline, and on average 96.0\% of the speedup of manually ported and optimised versions of the benchmarks.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Dietrich:2015:CKC, author = "Christian Dietrich and Martin Hoffmann and Daniel Lohmann", title = "Cross-Kernel Control-Flow--Graph Analysis for Event-Driven Real-Time Systems", journal = j-SIGPLAN, volume = "50", number = "5", pages = "6:1--6:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754963", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded real-time control systems generally have a dedicated purpose and fixed set of functionalities. This manifests in a large amount of implicit and explicit static knowledge, available already at compile time. Modern compilers can extract and exploit this information to perform extensive whole-program analyses and interprocedural optimizations. However, these analyses typically end at the application--kernel boundary, thus control-flow transitions between different threads are not covered, yet. This restriction stems from the pessimistic assumption of a probabilistic scheduling policy of the underlying operating system, impeding detailed predictions of the overall system behavior. Real-time operating systems, however, do provide deterministic and exactly specified scheduling decisions, as embedded control systems rely on a timely and precise behavior. In this paper, we present an approach that incorporates the RTOS semantics into the control-flow analysis, to cross the application--kernel boundary. By combining operating system semantics, the static system configuration and the application logic, we determine a cross-kernel control-flow--graph, that provides a global view on all possible execution paths of a real-time system. Having this knowledge at hand, enables us to tailor the operating system kernel more closely to the particular application scenario. On the example of a real-world safety-critical control system, we present two possible use cases: Run-time optimizations, by means of specialized system calls for each call site, allow to speed up the kernel execution path by 33 percent in our benchmark scenario. An automated generation of OS state assertions on the expected system behavior, targeting transient hardware fault tolerance, leverages significant robustness improvements.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Ghosh:2015:EEA, author = "Soumyadeep Ghosh and Yongjun Park and Arun Raman", title = "Enabling Efficient Alias Speculation", journal = j-SIGPLAN, volume = "50", number = "5", pages = "7:1--7:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754964", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Microprocessors designed using HW/SW codesign principles, such as Transmeta{\TM} Efficeon{\TM} and the soon-to-ship NVIDIA 64-bit Tegra{\reg} K1, use dynamic binary optimization to extract instruction-level parallelism. Many code optimizations are made significantly more effective through the use of alias speculation. The state-of-the-art alias speculation system, SMARQ, provides 40\% speedup on average over a system with no alias speculation. This performance, however, comes at the cost of introducing new alias registers and increased power consumption due to new checks for validating speculation. Consequently, improving the efficiency of alias speculation by reducing alias register requirements and rationalizing speculation validation checks is critical for the viability of SMARQ. This paper presents alias coalescing, a novel technique to significantly improve the efficiency of SMARQ through a synergistic combination of compiler and microarchitectural techniques. By using a more compact encoding for memory access ranges for memory instructions, alias coalescing simultaneously reduces the alias register pressure in SMARQ by a geomean of 26.09\% and 39.96\%, and the dynamic alias checks by 20.73\% and 33.87\%, across the entire SPEC CINT2006 and SPEC CFP2006 suites respectively.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Zheng:2015:WAD, author = "Wenguang Zheng and Hui Wu", title = "{WCET-Aware} Dynamic {D}-cache Locking for a Single Task", journal = j-SIGPLAN, volume = "50", number = "5", pages = "8:1--8:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754965", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Caches have been extensively used to bridge the increasing speed gap between processors and off-chip memory. However, caches make it much harder to compute the WCET (Worst-Case Execution Time) of a program. Cache locking is an effective technique for overcoming the unpredictability problem of caches. We investigate the WCET aware D-cache locking problem for a single task, and propose two dynamic cache locking approaches. The first approach formulates the problem as a global ILP (Integer Linear Programming) problem that simultaneously selects a near-optimal set of variables as the locked cache contents and allocates them to the D-cache. The second one iteratively constructs a subgraph of the CFG of the task where the lengths of all the paths are close to the longest path length, and uses an ILP formulation to select a near-optimal set of variables in the subgraph as the locked cache contents and allocate them to the D-cache. For both approaches, we propose a novel, efficient D-cache allocation algorithm. We have implemented both approaches and compared them with the longest path-based, dynamic cache locking approach proposed in [22] and the static WCET analysis approach without cache locking proposed in [14] by using a set of benchmarks from the M{\"a}lardalen WCET benchmark suite, SNU real-time benchmarks and the benchmarks used in [27]. Compared to the static WCET analysis approach, the average WCET improvements of the first approach range between 11.3\% and 31.6\%, and the average WCET improvements of the second approach range between 12.3\% and 32.9\%. Compared to the longest path-based, dynamic cache locking approach, the average WCET improvements of the first approach range between 4.7\% and 14.3\%, and the average WCET improvements of the second approach range between 5.3\% and 15.0\%.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Lin:2015:STU, author = "Yixiao Lin and Sayan Mitra", title = "{StarL}: Towards a Unified Framework for Programming, Simulating and Verifying Distributed Robotic Systems", journal = j-SIGPLAN, volume = "50", number = "5", pages = "9:1--9:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We developed StarL as a framework for programming, simulating, and verifying distributed systems that interacts with physical processes. StarL framework has (a) a collection of distributed primitives for coordination, such as mutual exclusion, registration and geocast that can be used to build sophisticated applications, (b) theory libraries for verifying StarL applications in the PVS theorem prover, and (c) an execution environment that can be used to deploy the applications on hardware or to execute them in a discrete event simulator. The primitives have (i) abstract, nondeterministic specifications in terms of invariants, and assume-guarantee style progress properties, (ii) implementations in Java/Android that always satisfy the invariants and attempt progress using best effort strategies. The PVS theories specify the invariant and progress properties of the primitives, and have to be appropriately instantiated and composed with the application's state machine to prove properties about the application. We have built two execution environments: one for deploying applications on Android/iRobot Create platform and a second one for simulating large instantiations of the applications in a discrete even simulator. The capabilities are illustrated with a StarL application for vehicle to vehicle coordination in an automatic intersection that uses primitives for point-to-point motion, mutual exclusion, and registration.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Zhang:2015:IPA, author = "Zhenkai Zhang and Xenofon Koutsoukos", title = "Improving the Precision of Abstract Interpretation Based Cache Persistence Analysis", journal = j-SIGPLAN, volume = "50", number = "5", pages = "10:1--10:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754967", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When designing hard real-time embedded systems, it is required to estimate the worst-case execution time (WCET) of each task for schedulability analysis. Precise cache persistence analysis can significantly tighten the WCET estimation, especially when the program has many loops. Methods for persistence analysis should safely and precisely classify memory references as persistent. Existing safe approaches suffer from multiple sources of pessimism and may not provide precise results. In this paper, we first identify some sources of pessimism that two recent approaches based on younger set and may analysis may encounter. Then, we propose two methods to eliminate these sources of pessimism. The first method improves the update function of the may analysis-based approach; and the second method integrates the younger set-based and may analysis-based approaches together to further reduce pessimism. We also prove the two proposed methods are still safe. We evaluate the approaches on a set of benchmarks and observe the number of memory references classified as persistent is increased by the proposed methods. Moreover, we empirically compare the storage space and analysis time used by different methods.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Barijough:2015:IAM, author = "Kamyar Mirzazad Barijough and Matin Hashemi and Volodymyr Khibin and Soheil Ghiasi", title = "Implementation-Aware Model Analysis: The Case of Buffer-Throughput Tradeoff in Streaming Applications", journal = j-SIGPLAN, volume = "50", number = "5", pages = "11:1--11:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754968", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Models of computation abstract away a number of implementation details in favor of well-defined semantics. While this has unquestionable benefits, we argue that analysis of models solely based on operational semantics (implementation-oblivious analysis) is unfit to drive implementation design space exploration. Specifically, we study the tradeoff between buffer size and streaming throughput in applications modeled as synchronous data flow (SDF) graphs. We demonstrate the inherent inaccuracy of implementation-oblivious approach, which only considers SDF operational semantic. We propose a rigorous transformation, which equips the state of the art buffer-throughput tradeoff analysis technique with implementation awareness. Extensive empirical evaluation show that our approach results in significantly more accurate estimates in streaming throughput at the model level, while running two orders of magnitude faster than cycle-accurate simulation of implementations.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Liu:2015:SDS, author = "Chen Liu and Chengmo Yang", title = "Secure and Durable {(SEDURA)}: an Integrated Encryption and Wear-leveling Framework for {PCM}-based Main Memory", journal = j-SIGPLAN, volume = "50", number = "5", pages = "12:1--12:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754969", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Phase changing memory (PCM) is considered a promising candidate for next-generation main-memory. Despite its advantages of lower power and high density, PCM faces critical security challenges due to its non-volatility: data are still accessible by the attacker even if the device is detached from a power supply. While encryption has been widely adopted as the solution to protect data, it not only creates additional performance and energy overhead during data encryption/decryption, but also hurts PCM lifetime by introducing more writes to PCM cells. In this paper, we propose a framework that integrates encryption and wear-leveling so as to mitigate the adverse impact of encryption on PCM performance and lifetime. Moreover, by randomizing the address space during wear-leveling, an extra level of protection is provided to the data in memory. We propose two algorithms that respectively prioritize data security and memory lifetime, allowing designers to trade-off between these two factors based on their needs. Compared to previous encryption techniques, the proposed SEDURA framework is able to deliver both more randomness to protect data and more balanced PCM writes, thus effectively balancing the three aspects of data security, application performance, and device lifetime.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Procter:2015:SDH, author = "Adam Procter and William L. Harrison and Ian Graves and Michela Becchi and Gerard Allwein", title = "Semantics Driven Hardware Design, Implementation, and Verification with {ReWire}", journal = j-SIGPLAN, volume = "50", number = "5", pages = "13:1--13:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754970", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There is no such thing as high assurance without high assurance hardware. High assurance hardware is essential, because any and all high assurance systems ultimately depend on hardware that conforms to, and does not undermine, critical system properties and invariants. And yet, high assurance hardware development is stymied by the conceptual gap between formal methods and hardware description languages used by engineers. This paper presents ReWire, a functional programming language providing a suitable foundation for formal verification of hardware designs, and a compiler for that language that translates high-level, semantics-driven designs directly into working hardware. ReWire's design and implementation are presented, along with a case study in the design of a secure multicore processor, demonstrating both ReWire's expressiveness as a programming language and its power as a framework for formal, high-level reasoning about hardware systems.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Woithe:2015:TPA, author = "Hans Christian Woithe and Ulrich Kremer", title = "{TrilobiteG}: a programming architecture for autonomous underwater vehicles", journal = j-SIGPLAN, volume = "50", number = "5", pages = "14:1--14:??", month = may, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2808704.2754971", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jul 31 19:39:44 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming autonomous systems can be challenging because many programming decisions must be made in real time and under stressful conditions, such as on a battle field, during a short communication window, or during a storm at sea. As such, new programming designs are needed to reflect these specific and extreme challenges. TrilobiteG is a programming architecture for buoyancy-driven autonomous underwater vehicles (AUVs), called gliders. Gliders are designed to spend weeks to months in the ocean, where they operate fully autonomously while submerged and can only communicate via satellite during their limited time at the surface. Based on the experience gained from a seven year long collaboration with two oceanographic institutes, the TrilobiteG architecture has been developed with the main goal of enabling users to run more effective missions. The TrilobiteG programming environment consists of a domain-specific language called ALGAE, a lower level service layer, and a set of real-time and faster-than-real-time simulators. The system has been used to program novel and robust glider behaviors, as well as to find software problems that otherwise may have remained undetected, with potentially catastrophic results. We believe that TrilobiteG can serve as a blueprint for other autonomous systems as well, and that TrilobiteG will motivate and enable a broader scientific community to work on extreme, real-world problems by using the simulation infrastructure.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '15 conference proceedings.", } @Article{Panchekha:2015:AIA, author = "Pavel Panchekha and Alex Sanchez-Stern and James R. Wilcox and Zachary Tatlock", title = "Automatically improving accuracy for floating point expressions", journal = j-SIGPLAN, volume = "50", number = "6", pages = "1--11", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737959", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scientific and engineering applications depend on floating point arithmetic to approximate real arithmetic. This approximation introduces rounding error, which can accumulate to produce unacceptable results. While the numerical methods literature provides techniques to mitigate rounding error, applying these techniques requires manually rearranging expressions and understanding the finer details of floating point arithmetic. We introduce Herbie, a tool which automatically discovers the rewrites experts perform to improve accuracy. Herbie's heuristic search estimates and localizes rounding error using sampled points (rather than static error analysis), applies a database of rules to generate improvements, takes series expansions, and combines improvements for different input regions. We evaluated Herbie on examples from a classic numerical methods textbook, and found that Herbie was able to improve accuracy on each example, some by up to 60 bits, while imposing a median performance overhead of 40\%. Colleagues in machine learning have used Herbie to significantly improve the results of a clustering algorithm, and a mathematical library has accepted two patches generated using Herbie.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Zhang:2015:DTE, author = "Danfeng Zhang and Andrew C. Myers and Dimitrios Vytiniotis and Simon Peyton-Jones", title = "Diagnosing type errors with class", journal = j-SIGPLAN, volume = "50", number = "6", pages = "12--21", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738009", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type inference engines often give terrible error messages, and the more sophisticated the type system the worse the problem. We show that even with the highly expressive type system implemented by the Glasgow Haskell Compiler (GHC)--including type classes, GADTs, and type families--it is possible to identify the most likely source of the type error, rather than the first source that the inference engine trips over. To determine which are the likely error sources, we apply a simple Bayesian model to a graph representation of the typing constraints; the satisfiability or unsatisfiability of paths within the graph provides evidence for or against possible explanations. While we build on prior work on error diagnosis for simpler type systems, inference in the richer type system of Haskell requires extending the graph with new nodes. The augmentation of the graph creates challenges both for Bayesian reasoning and for ensuring termination. Using a large corpus of Haskell programs, we show that this error localization technique is practical and significantly improves accuracy over the state of the art.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Lopes:2015:PCP, author = "Nuno P. Lopes and David Menendez and Santosh Nagarakatte and John Regehr", title = "Provably correct peephole optimizations with {Alive}", journal = j-SIGPLAN, volume = "50", number = "6", pages = "22--32", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737965", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compilers should not miscompile. Our work addresses problems in developing peephole optimizations that perform local rewriting to improve the efficiency of LLVM code. These optimizations are individually difficult to get right, particularly in the presence of undefined behavior; taken together they represent a persistent source of bugs. This paper presents Alive, a domain-specific language for writing optimizations and for automatically either proving them correct or else generating counterexamples. Furthermore, Alive can be automatically translated into C++ code that is suitable for inclusion in an LLVM optimization pass. Alive is based on an attempt to balance usability and formal methods; for example, it captures---but largely hides---the detailed semantics of three different kinds of undefined behavior in LLVM. We have translated more than 300 LLVM optimizations into Alive and, in the process, found that eight of them were wrong.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Faddegon:2015:ADR, author = "Maarten Faddegon and Olaf Chitil", title = "Algorithmic debugging of real-world {Haskell} programs: deriving dependencies from the cost centre stack", journal = j-SIGPLAN, volume = "50", number = "6", pages = "33--42", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737985", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing algorithmic debuggers for Haskell require a transformation of all modules in a program, even libraries that the user does not want to debug and which may use language features not supported by the debugger. This is a pity, because a promising approach to debugging is therefore not applicable to many real-world programs. We use the cost centre stack from the Glasgow Haskell Compiler profiling environment together with runtime value observations as provided by the Haskell Object Observation Debugger (HOOD) to collect enough information for algorithmic debugging. Program annotations are in suspected modules only. With this technique algorithmic debugging is applicable to a much larger set of Haskell programs. This demonstrates that for functional languages in general a simple stack trace extension is useful to support tasks such as profiling and debugging.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Sidiroglou-Douskos:2015:AEE, author = "Stelios Sidiroglou-Douskos and Eric Lahtinen and Fan Long and Martin Rinard", title = "Automatic error elimination by horizontal code transfer across multiple applications", journal = j-SIGPLAN, volume = "50", number = "6", pages = "43--54", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737988", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Code Phage (CP), a system for automatically transferring correct code from donor applications into recipient applications that process the same inputs to successfully eliminate errors in the recipient. Experimental results using seven donor applications to eliminate ten errors in seven recipient applications highlight the ability of CP to transfer code across applications to eliminate out of bounds access, integer overflow, and divide by zero errors. Because CP works with binary donors with no need for source code or symbolic information, it supports a wide range of use cases. To the best of our knowledge, CP is the first system to automatically transfer code across multiple applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Liu:2015:LRT, author = "Peng Liu and Xiangyu Zhang and Omer Tripp and Yunhui Zheng", title = "{Light}: replay via tightly bounded recording", journal = j-SIGPLAN, volume = "50", number = "6", pages = "55--64", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738001", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reproducing concurrency bugs is a prominent challenge. Existing techniques either rely on recording very fine grained execution information and hence have high runtime overhead, or strive to log as little information as possible but provide no guarantee in reproducing a bug. We present Light, a technique that features much lower overhead compared to techniques based on fine grained recording, and that guarantees to reproduce concurrent bugs. We leverage and formally prove that recording flow dependences is the necessary and sufficient condition to reproduce a concurrent bug. The flow dependences, together with the thread local orders that can be automatically inferred (and hence not logged), are encoded as scheduling constraints. An SMT solver is used to derive a replay schedule, which is guaranteed to exist even though it may be different from the original schedule. Our experiments show that Light has only 44\% logging overhead, almost one order of magnitude lower than the state of the art techniques relying on logging memory accesses. Its space overhead is only 10\% of those techniques. Light can also reproduce all the bugs we have collected whereas existing techniques miss some of them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Lidbury:2015:MCC, author = "Christopher Lidbury and Andrei Lascu and Nathan Chong and Alastair F. Donaldson", title = "Many-core compiler fuzzing", journal = j-SIGPLAN, volume = "50", number = "6", pages = "65--76", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737986", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address the compiler correctness problem for many-core systems through novel applications of fuzz testing to OpenCL compilers. Focusing on two methods from prior work, random differential testing and testing via equivalence modulo inputs (EMI), we present several strategies for random generation of deterministic, communicating OpenCL kernels, and an injection mechanism that allows EMI testing to be applied to kernels that otherwise exhibit little or no dynamically-dead code. We use these methods to conduct a large, controlled testing campaign with respect to 21 OpenCL (device, compiler) configurations, covering a range of CPU, GPU, accelerator, FPGA and emulator implementations. Our study provides independent validation of claims in prior work related to the effectiveness of random differential testing and EMI testing, proposes novel methods for lifting these techniques to the many-core setting and reveals a significant number of OpenCL compiler bugs in commercial implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Sergey:2015:MVF, author = "Ilya Sergey and Aleksandar Nanevski and Anindya Banerjee", title = "Mechanized verification of fine-grained concurrent programs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "77--87", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737964", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient concurrent programs and data structures rarely employ coarse-grained synchronization mechanisms (i.e., locks); instead, they implement custom synchronization patterns via fine-grained primitives, such as compare-and-swap. Due to sophisticated interference scenarios between threads, reasoning about such programs is challenging and error-prone, and can benefit from mechanization. In this paper, we present the first completely formalized framework for mechanized verification of full functional correctness of fine-grained concurrent programs. Our tool is based on the recently proposed program logic FCSL. It is implemented as an embedded DSL in the dependently-typed language of the Coq proof assistant, and is powerful enough to reason about programming features such as higher-order functions and local thread spawning. By incorporating a uniform concurrency model, based on state-transition systems and partial commutative monoids, FCSL makes it possible to build proofs about concurrent libraries in a thread-local, compositional way, thus facilitating scalability and reuse: libraries are verified just once, and their specifications are used ubiquitously in client-side reasoning. We illustrate the proof layout in FCSL by example, outline its infrastructure, and report on our experience of using FCSL to verify a number of concurrent algorithms and data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Sharma:2015:VPC, author = "Rahul Sharma and Michael Bauer and Alex Aiken", title = "Verification of producer-consumer synchronization in {GPU} programs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "88--98", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737962", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Previous efforts to formally verify code written for GPUs have focused solely on kernels written within the traditional data-parallel GPU programming model. No previous work has considered the higher performance, but more complex, warp-specialized kernels based on producer-consumer named barriers available on current hardware. In this work we present the first formal operational semantics for named barriers and define what it means for a warp-specialized kernel to be correct. We give algorithms for verifying the correctness of warp-specialized kernels and prove that they are both sound and complete for the most common class of warp-specialized programs. We also present WEFT, a verification tool for checking warp-specialized code. Using WEFT, we discover several non-trivial bugs in production warp-specialized kernels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Gammie:2015:RSV, author = "Peter Gammie and Antony L. Hosking and Kai Engelhardt", title = "Relaxing safely: verified on-the-fly garbage collection for {x86-TSO}", journal = j-SIGPLAN, volume = "50", number = "6", pages = "99--109", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738006", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We report on a machine-checked verification of safety for a state-of-the-art, on-the-fly, concurrent, mark-sweep garbage collector that is designed for multi-core architectures with weak memory consistency. The proof explicitly incorporates the relaxed memory semantics of x86 multiprocessors. To our knowledge, this is the first fully machine-checked proof of safety for such a garbage collector. We couch the proof in a framework that system implementers will find appealing, with the fundamental components of the system specified in a simple and intuitive programming language. The abstract model is detailed enough for its correspondence with an assembly language implementation to be straightforward.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Tassarotti:2015:VRC, author = "Joseph Tassarotti and Derek Dreyer and Viktor Vafeiadis", title = "Verifying read-copy-update in a logic for weak memory", journal = j-SIGPLAN, volume = "50", number = "6", pages = "110--120", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737992", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Read-Copy-Update (RCU) is a technique for letting multiple readers safely access a data structure while a writer concurrently modifies it. It is used heavily in the Linux kernel in situations where fast reads are important and writes are infrequent. Optimized implementations rely only on the weaker memory orderings provided by modern hardware, avoiding the need for expensive synchronization instructions (such as memory barriers) as much as possible. Using GPS, a recently developed program logic for the C/C++11 memory model, we verify an implementation of RCU for a singly-linked list assuming ``release-acquire'' semantics. Although release-acquire synchronization is stronger than what is required by real RCU implementations, it is nonetheless significantly weaker than the assumption of sequential consistency made in prior work on RCU verification. Ours is the first formal proof of correctness for an implementation of RCU under a weak memory model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Ko:2015:LCT, author = "Yousun Ko and Bernd Burgstaller and Bernhard Scholz", title = "{LaminarIR}: compile-time queues for structured streams", journal = j-SIGPLAN, volume = "50", number = "6", pages = "121--130", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737994", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Stream programming languages employ FIFO (first-in, first-out) semantics to model data channels between producers and consumers. A FIFO data channel stores tokens in a buffer that is accessed indirectly via read- and write-pointers. This indirect token-access decouples a producer's write-operations from the read-operations of the consumer, thereby making dataflow implicit. For a compiler, indirect token-access obscures data-dependencies, which renders standard optimizations ineffective and impacts stream program performance negatively. In this paper we propose a transformation for structured stream programming languages such as StreamIt that shifts FIFO buffer management from run-time to compile-time and eliminates splitters and joiners, whose task is to distribute and merge streams. To show the effectiveness of our lowering transformation, we have implemented a StreamIt to C compilation framework. We have developed our own intermediate representation (IR) called LaminarIR, which facilitates the transformation. We report on the enabling effect of the LaminarIR on LLVM's optimizations, which required the conversion of several standard StreamIt benchmarks from static to randomized input, to prevent computation of partial results at compile-time. We conducted our experimental evaluation on the Intel i7-2600K, AMD Opteron 6378, Intel Xeon Phi 3120A and ARM Cortex-A15 platforms. Our LaminarIR reduces data-communication on average by 35.9\% and achieves platform-specific speedups between 3.73x and 4.98x over StreamIt. We reduce memory accesses by more than 60\% and achieve energy savings of up to 93.6\% on the Intel i7-2600K.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Ding:2015:OCA, author = "Wei Ding and Xulong Tang and Mahmut Kandemir and Yuanrui Zhang and Emre Kultursay", title = "Optimizing off-chip accesses in multicores", journal = j-SIGPLAN, volume = "50", number = "6", pages = "131--142", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737989", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In a network-on-chip (NoC) based manycore architecture, an off-chip data access (main memory access) needs to travel through the on-chip network, spending considerable amount of time within the chip (in addition to the memory access latency). In addition, it contends with on-chip (cache) accesses as both use the same NoC resources. In this paper, focusing on data-parallel, multithreaded applications, we propose a compiler-based off-chip data access localization strategy, which places data elements in the memory space such that an off-chip access traverses a minimum number of links (hops) to reach the memory controller that handles this access. This brings three main benefits. First, the network latency of off-chip accesses gets reduced; second, the network latency of on-chip accesses gets reduced; and finally, the memory latency of off-chip accesses improves, due to reduced queue latencies. We present an experimental evaluation of our optimization strategy using a set of 13 multithreaded application programs under both private and shared last-level caches. The results collected emphasize the importance of optimizing the off-chip data accesses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Mehta:2015:ICS, author = "Sanyam Mehta and Pen-Chung Yew", title = "Improving compiler scalability: optimizing large programs at small price", journal = j-SIGPLAN, volume = "50", number = "6", pages = "143--152", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737954", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiler scalability is a well known problem: reasoning about the application of useful optimizations over large program scopes consumes too much time and memory during compilation. This problem is exacerbated in polyhedral compilers that use powerful yet costly integer programming algorithms to compose loop optimizations. As a result, the benefits that a polyhedral compiler has to offer to programs such as real scientific applications that contain sequences of loop nests, remain impractical for the common users. In this work, we address this scalability problem in polyhedral compilers. We identify three causes of unscalability, each of which stems from large number of statements and dependences in the program scope. We propose a one-shot solution to the problem by reducing the effective number of statements and dependences as seen by the compiler. We achieve this by representing a sequence of statements in a program by a single super-statement. This set of super-statements exposes the minimum sufficient constraints to the Integer Linear Programming (ILP) solver for finding correct optimizations. We implement our approach in the PLuTo polyhedral compiler and find that it condenses the program statements and program dependences by factors of 4.7x and 6.4x, respectively, averaged over 9 hot regions (ranging from 48 to 121 statements) in 5 real applications. As a result, the improvements in time and memory requirement for compilation are 268x and 20x, respectively, over the latest version of the PLuTo compiler. The final compile times are comparable to the Intel compiler while the performance is 1.92x better on average due to the latter's conservative approach to loop optimization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Appel:2015:VCP, author = "Andrew W. Appel", title = "Verification of a cryptographic primitive: {SHA-256} (abstract)", journal = j-SIGPLAN, volume = "50", number = "6", pages = "153--153", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2774972", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A full formal machine-checked verification of a C program: the OpenSSL implementation of SHA-256. This is an interactive proof of functional correctness in the Coq proof assistant, using the Verifiable C program logic. Verifiable C is a separation logic for the C language, proved sound w.r.t. the operational semantics for C, connected to the CompCert verified optimizing C compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Deligiannis:2015:APA, author = "Pantazis Deligiannis and Alastair F. Donaldson and Jeroen Ketema and Akash Lal and Paul Thomson", title = "Asynchronous programming, analysis and testing with state machines", journal = j-SIGPLAN, volume = "50", number = "6", pages = "154--164", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737996", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming efficient asynchronous systems is challenging because it can often be hard to express the design declaratively, or to defend against data races and interleaving-dependent assertion violations. Previous work has only addressed these challenges in isolation, by either designing a new declarative language, a new data race detection tool or a new testing technique. We present P\#, a language for high-reliability asynchronous programming co-designed with a static data race analysis and systematic concurrency testing infrastructure. We describe our experience using P\# to write several distributed protocols and port an industrial-scale system internal to Microsoft, showing that the combined techniques, by leveraging the design of P\#, are effective in finding bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Huang:2015:SMC, author = "Jeff Huang", title = "Stateless model checking concurrent programs with maximal causality reduction", journal = j-SIGPLAN, volume = "50", number = "6", pages = "165--174", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737975", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present maximal causality reduction (MCR), a new technique for stateless model checking. MCR systematically explores the state-space of concurrent programs with a provably minimal number of executions. Each execution corresponds to a distinct maximal causal model extracted from a given execution trace, which captures the largest possible set of causally equivalent executions. Moreover, MCR is embarrassingly parallel by shifting the runtime exploration cost to offline analysis. We have designed and implemented MCR using a constraint-based approach and compared with iterative context bounding (ICB) and dynamic partial order reduction (DPOR) on both benchmarks and real-world programs. MCR reduces the number of executions explored by ICB and ICB+DPOR by orders of magnitude, and significantly improves the scalability, efficiency, and effectiveness of the state-of-the-art for both state-space exploration and bug finding. In our experiments, MCR has also revealed several new data races and null pointer dereference errors in frequently studied real-world programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Samak:2015:SRT, author = "Malavika Samak and Murali Krishna Ramanathan and Suresh Jagannathan", title = "Synthesizing racy tests", journal = j-SIGPLAN, volume = "50", number = "6", pages = "175--185", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737998", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Subtle concurrency errors in multithreaded libraries that arise because of incorrect or inadequate synchronization are often difficult to pinpoint precisely using only static techniques. On the other hand, the effectiveness of dynamic race detectors is critically dependent on multithreaded test suites whose execution can be used to identify and trigger races. Usually, such multithreaded tests need to invoke a specific combination of methods with objects involved in the invocations being shared appropriately to expose a race. Without a priori knowledge of the race, construction of such tests can be challenging. In this paper, we present a lightweight and scalable technique for synthesizing precisely these kinds of tests. Given a multithreaded library and a sequential test suite, we describe a fully automated analysis that examines sequential execution traces, and produces as its output a concurrent client program that drives shared objects via library method calls to states conducive for triggering a race. Experimental results on a variety of well-tested Java libraries yield 101 synthesized multithreaded tests in less than four minutes. Analyzing the execution of these tests using an off-the-shelf race detector reveals 187 harmful races, including several previously unreported ones. Our implementation, named NARADA, and the results of our experiments are available at http://www.csa.iisc.ernet.in/~sss/tools/narada.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Koskinen:2015:PPM, author = "Eric Koskinen and Matthew Parkinson", title = "The {Push\slash Pull} model of transactions", journal = j-SIGPLAN, volume = "50", number = "6", pages = "186--195", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737995", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a general theory of serializability, unifying a wide range of transactional algorithms, including some that are yet to come. To this end, we provide a compact semantics in which concurrent transactions PUSH their effects into the shared view (or UNPUSH to recall effects) and PULL the effects of potentially uncommitted concurrent transactions into their local view (or UNPULL to detangle). Each operation comes with simple criteria given in terms of commutativity (Lipton's left-movers and right-movers). The benefit of this model is that most of the elaborate reasoning (coinduction, simulation, subtle invariants, etc.) necessary for proving the serializability of a transactional algorithm is already proved within the semantic model. Thus, proving serializability (or opacity) amounts simply to mapping the algorithm on to our rules, and showing that it satisfies the rules' criteria.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{McClurg:2015:ESN, author = "Jedidiah McClurg and Hossein Hojjat and Pavol Cern{\'y} and Nate Foster", title = "Efficient synthesis of network updates", journal = j-SIGPLAN, volume = "50", number = "6", pages = "196--207", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737980", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-defined networking (SDN) is revolutionizing the networking industry, but current SDN programming platforms do not provide automated mechanisms for updating global configurations on the fly. Implementing updates by hand is challenging for SDN programmers because networks are distributed systems with hundreds or thousands of interacting nodes. Even if initial and final configurations are correct, naively updating individual nodes can lead to incorrect transient behaviors, including loops, black holes, and access control violations. This paper presents an approach for automatically synthesizing updates that are guaranteed to preserve specified properties. We formalize network updates as a distributed programming problem and develop a synthesis algorithm based on counterexample-guided search and incremental model checking. We describe a prototype implementation, and present results from experiments on real-world topologies and properties demonstrating that our tool scales to updates involving over one-thousand nodes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Nori:2015:ESP, author = "Aditya V. Nori and Sherjil Ozair and Sriram K. Rajamani and Deepak Vijaykeerthy", title = "Efficient synthesis of probabilistic programs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "208--217", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737982", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show how to automatically synthesize probabilistic programs from real-world datasets. Such a synthesis is feasible due to a combination of two techniques: (1) We borrow the idea of ``sketching'' from synthesis of deterministic programs, and allow the programmer to write a skeleton program with ``holes''. Sketches enable the programmer to communicate domain-specific intuition about the structure of the desired program and prune the search space, and (2) we design an efficient Markov Chain Monte Carlo (MCMC) based synthesis algorithm to instantiate the holes in the sketch with program fragments. Our algorithm efficiently synthesizes a probabilistic program that is most consistent with the data. A core difficulty in synthesizing probabilistic programs is computing the likelihood L(P | D) of a candidate program P generating data D. We propose an approximate method to compute likelihoods using mixtures of Gaussian distributions, thereby avoiding expensive computation of integrals. The use of such approximations enables us to speed up evaluation of the likelihood of candidate programs by a factor of 1000, and makes Markov Chain Monte Carlo based search feasible. We have implemented our algorithm in a tool called PSKETCH, and our results are encouraging PSKETCH is able to automatically synthesize 16 non-trivial real-world probabilistic programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Barowy:2015:FER, author = "Daniel W. Barowy and Sumit Gulwani and Ted Hart and Benjamin Zorn", title = "{FlashRelate}: extracting relational data from semi-structured spreadsheets using examples", journal = j-SIGPLAN, volume = "50", number = "6", pages = "218--228", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737952", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "With hundreds of millions of users, spreadsheets are one of the most important end-user applications. Spreadsheets are easy to use and allow users great flexibility in storing data. This flexibility comes at a price: users often treat spreadsheets as a poor man's database, leading to creative solutions for storing high-dimensional data. The trouble arises when users need to answer queries with their data. Data manipulation tools make strong assumptions about data layouts and cannot read these ad-hoc databases. Converting data into the appropriate layout requires programming skills or a major investment in manual reformatting. The effect is that a vast amount of real-world data is ``locked-in'' to a proliferation of one-off formats. We introduce FlashRelate, a synthesis engine that lets ordinary users extract structured relational data from spreadsheets without programming. Instead, users extract data by supplying examples of output relational tuples. FlashRelate uses these examples to synthesize a program in Flare. Flare is a novel extraction language that extends regular expressions with geometric constructs. An interactive user interface on top of FlashRelate lets end users extract data by point-and-click. We demonstrate that correct Flare programs can be synthesized in seconds from a small set of examples for 43 real-world scenarios. Finally, our case study demonstrates FlashRelate's usefulness addressing the widespread problem of data trapped in corporate and government formats.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Feser:2015:SDS, author = "John K. Feser and Swarat Chaudhuri and Isil Dillig", title = "Synthesizing data structure transformations from input-output examples", journal = j-SIGPLAN, volume = "50", number = "6", pages = "229--239", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737977", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a method for example-guided synthesis of functional programs over recursive data structures. Given a set of input-output examples, our method synthesizes a program in a functional language with higher-order combinators like map and fold. The synthesized program is guaranteed to be the simplest program in the language to fit the examples. Our approach combines three technical ideas: inductive generalization, deduction, and enumerative search. First, we generalize the input-output examples into hypotheses about the structure of the target program. For each hypothesis, we use deduction to infer new input/output examples for the missing subexpressions. This leads to a new subproblem where the goal is to synthesize expressions within each hypothesis. Since not every hypothesis can be realized into a program that fits the examples, we use a combination of best-first enumeration and deduction to search for a hypothesis that meets our needs. We have implemented our method in a tool called \lambda 2, and we evaluate this tool on a large set of synthesis problems involving lists, trees, and nested data structures. The experiments demonstrate the scalability and broad scope of \lambda 2. A highlight is the synthesis of a program believed to be the world's earliest functional pearl.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Ziv:2015:CCC, author = "Ofri Ziv and Alex Aiken and Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv", title = "Composing concurrency control", journal = j-SIGPLAN, volume = "50", number = "6", pages = "240--249", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737970", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency control poses significant challenges when composing computations over multiple data-structures (objects) with different concurrency-control implementations. We formalize the usually desired requirements (serializability, abort-safety, deadlock-safety, and opacity) as well as stronger versions of these properties that enable composition. We show how to compose protocols satisfying these properties so that the resulting combined protocol also satisfies these properties. Our approach generalizes well-known protocols (such as two-phase-locking and two-phase-commit) and leads to new protocols. We apply this theory to show how we can safely compose optimistic and pessimistic concurrency control. For example, we show how we can execute a transaction that accesses two objects, one controlled by an STM and another by locking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Zhang:2015:DPO, author = "Naling Zhang and Markus Kusano and Chao Wang", title = "Dynamic partial order reduction for relaxed memory models", journal = j-SIGPLAN, volume = "50", number = "6", pages = "250--259", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737956", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Under a relaxed memory model such as TSO or PSO, a concurrent program running on a shared-memory multiprocessor may observe two types of nondeterminism: the nondeterminism in thread scheduling and the nondeterminism in store buffering. Although there is a large body of work on mitigating the scheduling nondeterminism during runtime verification, methods for soundly mitigating the store buffering nondeterminism are lacking. We propose a new dynamic partial order reduction (POR) algorithm for verifying concurrent programs under TSO and PSO. Our method relies on modeling both types of nondeterminism in a unified framework, which allows us to extend existing POR techniques to TSO and PSO without overhauling the verification algorithm. In addition to sound POR, we also propose a buffer-bounding method for more aggressively reducing the state space. We have implemented our new methods in a stateless model checking tool and demonstrated their effectiveness on a set of multithreaded C benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Emmi:2015:MRS, author = "Michael Emmi and Constantin Enea and Jad Hamza", title = "Monitoring refinement via symbolic reasoning", journal = j-SIGPLAN, volume = "50", number = "6", pages = "260--269", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737983", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient implementations of concurrent objects such as semaphores, locks, and atomic collections are essential to modern computing. Programming such objects is error prone: in minimizing the synchronization overhead between concurrent object invocations, one risks the conformance to reference implementations --- or in formal terms, one risks violating observational refinement. Precisely testing this refinement even within a single execution is intractable, limiting existing approaches to executions with very few object invocations. We develop scalable and effective algorithms for detecting refinement violations. Our algorithms are founded on incremental, symbolic reasoning, and exploit foundational insights into the refinement-checking problem. Our approach is sound, in that we detect only actual violations, and scales far beyond existing violation-detection algorithms. Empirically, we find that our approach is practically complete, in that we detect the violations arising in actual executions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Longfield:2015:PGS, author = "Stephen Longfield and Brittany Nkounkou and Rajit Manohar and Ross Tate", title = "Preventing glitches and short circuits in high-level self-timed chip specifications", journal = j-SIGPLAN, volume = "50", number = "6", pages = "270--279", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737967", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Self-timed chip designs are commonly specified in a high-level message-passing language called CHP. This language is closely related to Hoare's CSP except it admits erroneous behavior due to the necessary limitations of efficient hardware implementations. For example, two processes sending on the same channel at the same time causes glitches and short circuits in the physical chip implementation. If a CHP program maintains certain invariants, such as only one process is sending on any given channel at a time, it can guarantee an error-free execution that behaves much like a CSP program would. In this paper, we present an inferable effect system for ensuring that these invariants hold, drawing from model-checking methodologies while exploiting language-usage patterns and domain-specific specializations to achieve efficiency. This analysis is sound, and is even complete for the common subset of CHP programs without data-sensitive synchronization. We have implemented the analysis and demonstrated that it scales to validate even microprocessors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Lal:2015:DID, author = "Akash Lal and Shaz Qadeer", title = "{DAG} inlining: a decision procedure for reachability-modulo-theories in hierarchical programs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "280--290", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737987", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A hierarchical program is one with multiple procedures but no loops or recursion. This paper studies the problem of deciding reachability queries in hierarchical programs where individual statements can be encoded in a decidable logic (say in SMT). This problem is fundamental to verification and most directly applicable to doing bounded reachability in programs, i.e., reachability under a bound on the number of loop iterations and recursive calls. The usual method of deciding reachability in hierarchical programs is to first inline all procedures and then do reachability on the resulting single-procedure program. Such inlining unfolds the call graph of the program to a tree and may lead to an exponential increase in the size of the program. We design and evaluate a method called DAG inlining that unfolds the call graph to a directed acyclic graph (DAG) instead of a tree by sharing the bodies of procedures at certain points during inlining. DAG inlining can produce much more compact representations than tree inlining. Empirically, we show that it leads to significant improvements in the running time of a state-of-the-art verifier.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Johnson:2015:EES, author = "Andrew Johnson and Lucas Waye and Scott Moore and Stephen Chong", title = "Exploring and enforcing security guarantees via program dependence graphs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "291--302", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737957", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present PIDGIN, a program analysis and understanding tool that enables the specification and enforcement of precise application-specific information security guarantees. PIDGIN also allows developers to interactively explore the information flows in their applications to develop policies and investigate counter-examples. PIDGIN combines program dependence graphs (PDGs), which precisely capture the information flows in a whole application, with a custom PDG query language. Queries express properties about the paths in the PDG; because paths in the PDG correspond to information flows in the application, queries can be used to specify global security policies. PIDGIN is scalable. Generating a PDG for a 330k line Java application takes 90 seconds, and checking a policy on that PDG takes under 14 seconds. The query language is expressive, supporting a large class of precise, application-specific security guarantees. Policies are separate from the code and do not interfere with testing or development, and can be used for security regression testing. We describe the design and implementation of PIDGIN and report on using it: (1) to explore information security guarantees in legacy programs; (2) to develop and modify security policies concurrently with application development; and (3) to develop policies based on known vulnerabilities.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Singh:2015:MNP, author = "Gagandeep Singh and Markus P{\"u}schel and Martin Vechev", title = "Making numerical program analysis fast", journal = j-SIGPLAN, volume = "50", number = "6", pages = "303--313", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738000", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Numerical abstract domains are a fundamental component in modern static program analysis and are used in a wide range of scenarios (e.g. computing array bounds, disjointness, etc). However, analysis with these domains can be very expensive, deeply affecting the scalability and practical applicability of the static analysis. Hence, it is critical to ensure that these domains are made highly efficient. In this work, we present a complete approach for optimizing the performance of the Octagon numerical abstract domain, a domain shown to be particularly effective in practice. Our optimization approach is based on two key insights: (i) the ability to perform online decomposition of the octagons leading to a massive reduction in operation counts, and (ii) leveraging classic performance optimizations from linear algebra such as vectorization, locality of reference, scalar replacement and others, for improving the key bottlenecks of the domain. Applying these ideas, we designed new algorithms for the core Octagon operators with better asymptotic runtime than prior work and combined them with the optimization techniques to achieve high actual performance. We implemented our approach in the Octagon operators exported by the popular APRON C library, thus enabling existing static analyzers using APRON to immediately benefit from our work. To demonstrate the performance benefits of our approach, we evaluated our framework on three published static analyzers showing massive speed-ups for the time spent in Octagon analysis (e.g., up to 146x) as well as significant end-to-end program analysis speed-ups (up to 18.7x). Based on these results, we believe that our framework can serve as a new basis for static analysis with the Octagon numerical domain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Weijiang:2015:TDA, author = "Yusheng Weijiang and Shruthi Balakrishna and Jianqiao Liu and Milind Kulkarni", title = "Tree dependence analysis", journal = j-SIGPLAN, volume = "50", number = "6", pages = "314--325", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737972", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop a new framework for analyzing recursive methods that perform traversals over trees, called tree dependence analysis. This analysis translates dependence analysis techniques for regular programs to the irregular space, identifying the structure of dependences within a recursive method that traverses trees. We develop a dependence test that exploits the dependence structure of such programs, and can prove that several locality- and parallelism- enhancing transformations are legal. In addition, we extend our analysis with a novel path-dependent, conditional analysis to refine the dependence test and prove the legality of transformations for a wider range of algorithms. We then use these analyses to show that several common algorithms that manipulate trees recursively are amenable to several locality- and parallelism-enhancing transformations. This work shows that classical dependence analysis techniques, which have largely been confined to nested loops over array data structures, can be extended and translated to work for complex, recursive programs that operate over pointer-based data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Kang:2015:FCM, author = "Jeehoon Kang and Chung-Kil Hur and William Mansky and Dmitri Garbuzov and Steve Zdancewic and Viktor Vafeiadis", title = "A formal {C} memory model supporting integer-pointer casts", journal = j-SIGPLAN, volume = "50", number = "6", pages = "326--335", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738005", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ISO C standard does not specify the semantics of many valid programs that use non-portable idioms such as integer-pointer casts. Recent efforts at formal definitions and verified implementation of the C language inherit this feature. By adopting high-level abstract memory models, they validate common optimizations. On the other hand, this prevents reasoning about much low-level code relying on the behavior of common implementations, where formal verification has many applications. We present the first formal memory model that allows many common optimizations and fully supports operations on the representation of pointers. All arithmetic operations are well-defined for pointers that have been cast to integers. Crucially, our model is also simple to understand and program with. All our results are fully formalized in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Hathhorn:2015:DUC, author = "Chris Hathhorn and Chucky Ellison and Grigore Rosu", title = "Defining the undefinedness of {C}", journal = j-SIGPLAN, volume = "50", number = "6", pages = "336--345", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737979", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a ``negative'' semantics of the C11 language---a semantics that does not just give meaning to correct programs, but also rejects undefined programs. We investigate undefined behavior in C and discuss the techniques and special considerations needed for formally specifying it. We have used these techniques to modify and extend a semantics of C into one that captures undefined behavior. The amount of semantic infrastructure and effort required to achieve this was unexpectedly high, in the end nearly doubling the size of the original semantics. From our semantics, we have automatically extracted an undefinedness checker, which we evaluate against other popular analysis tools, using our own test suite in addition to a third-party test suite. Our checker is capable of detecting examples of all 77 categories of core language undefinedness appearing in the C11 standard, more than any other tool we considered. Based on this evaluation, we argue that our work is the most comprehensive and complete semantic treatment of undefined behavior in C, and thus of the C language itself.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Park:2015:KCF, author = "Daejun Park and Andrei Stefanescu and Grigore Rosu", title = "{KJS}: a complete formal semantics of {JavaScript}", journal = j-SIGPLAN, volume = "50", number = "6", pages = "346--356", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737991", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents KJS, the most complete and thoroughly tested formal semantics of JavaScript to date. Being executable, KJS has been tested against the ECMAScript 5.1 conformance test suite, and passes all 2,782 core language tests. Among the existing implementations of JavaScript, only Chrome V8's passes all the tests, and no other semantics passes more than 90\%. In addition to a reference implementation for JavaScript, KJS also yields a simple coverage metric for a test suite: the set of semantic rules it exercises. Our semantics revealed that the ECMAScript 5.1 conformance test suite fails to cover several semantic rules. Guided by the semantics, we wrote tests to exercise those rules. The new tests revealed bugs both in production JavaScript engines (Chrome V8, Safari WebKit, Firefox SpiderMonkey) and in other semantics. KJS is symbolically executable, thus it can be used for formal analysis and verification of JavaScript programs. We verified non-trivial programs and found a known security vulnerability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Wilcox:2015:VFI, author = "James R. Wilcox and Doug Woos and Pavel Panchekha and Zachary Tatlock and Xi Wang and Michael D. Ernst and Thomas Anderson", title = "{Verdi}: a framework for implementing and formally verifying distributed systems", journal = j-SIGPLAN, volume = "50", number = "6", pages = "357--368", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737958", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Distributed systems are difficult to implement correctly because they must handle both concurrency and failures: machines may crash at arbitrary points and networks may reorder, drop, or duplicate packets. Further, their behavior is often too complex to permit exhaustive testing. Bugs in these systems have led to the loss of critical data and unacceptable service outages. We present Verdi, a framework for implementing and formally verifying distributed systems in Coq. Verdi formalizes various network semantics with different faults, and the developer chooses the most appropriate fault model when verifying their implementation. Furthermore, Verdi eases the verification burden by enabling the developer to first verify their system under an idealized fault model, then transfer the resulting correctness guarantees to a more realistic fault model without any additional proof burden. To demonstrate Verdi's utility, we present the first mechanically checked proof of linearizability of the Raft state machine replication algorithm, as well as verified implementations of a primary-backup replication system and a key-value store. These verified systems provide similar performance to unverified equivalents.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Olivo:2015:SDA, author = "Oswaldo Olivo and Isil Dillig and Calvin Lin", title = "Static detection of asymptotic performance bugs in collection traversals", journal = j-SIGPLAN, volume = "50", number = "6", pages = "369--378", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper identifies and formalizes a prevalent class of asymptotic performance bugs called redundant traversal bugs and presents a novel static analysis for automatically detecting them. We evaluate our technique by implementing it in a tool called CLARITY and applying it to widely-used software packages such as the Google Core Collections Library, the Apache Common Collections, and the Apache Ant build tool. Across 1.6M lines of Java code, CLARITY finds 92 instances of redundant traversal bugs, including 72 that have never been previously reported, with just 5 false positives. To evaluate the performance impact of these bugs, we manually repair these programs and find that for an input size of 50,000, all repaired programs are at least 2.45 faster than their original code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Ding:2015:AAC, author = "Yufei Ding and Jason Ansel and Kalyan Veeramachaneni and Xipeng Shen and Una-May O'Reilly and Saman Amarasinghe", title = "Autotuning algorithmic choice for input sensitivity", journal = j-SIGPLAN, volume = "50", number = "6", pages = "379--390", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737969", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A daunting challenge faced by program performance autotuning is input sensitivity, where the best autotuned configuration may vary with different input sets. This paper presents a novel two-level input learning algorithm to tackle the challenge for an important class of autotuning problems, algorithmic autotuning. The new approach uses a two-level input clustering method to automatically refine input grouping, feature selection, and classifier construction. Its design solves a series of open issues that are particularly essential to algorithmic autotuning, including the enormous optimization space, complex influence by deep input features, high cost in feature extraction, and variable accuracy of algorithmic choices. Experimental results show that the new solution yields up to a 3x speedup over using a single configuration for all inputs, and a 34x speedup over a traditional one-level method for addressing input sensitivity in program optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Mendis:2015:HLH, author = "Charith Mendis and Jeffrey Bosboom and Kevin Wu and Shoaib Kamil and Jonathan Ragan-Kelley and Sylvain Paris and Qin Zhao and Saman Amarasinghe", title = "Helium: lifting high-performance stencil kernels from stripped x86 binaries to halide {DSL} code", journal = j-SIGPLAN, volume = "50", number = "6", pages = "391--402", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737974", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Highly optimized programs are prone to bit rot, where performance quickly becomes suboptimal in the face of new hardware and compiler techniques. In this paper we show how to automatically lift performance-critical stencil kernels from a stripped x86 binary and generate the corresponding code in the high-level domain-specific language Halide. Using Halide's state-of-the-art optimizations targeting current hardware, we show that new optimized versions of these kernels can replace the originals to rejuvenate the application for newer hardware. The original optimized code for kernels in stripped binaries is nearly impossible to analyze statically. Instead, we rely on dynamic traces to regenerate the kernels. We perform buffer structure reconstruction to identify input, intermediate and output buffer shapes. We abstract from a forest of concrete dependency trees which contain absolute memory addresses to symbolic trees suitable for high-level code generation. This is done by canonicalizing trees, clustering them based on structure, inferring higher-dimensional buffer accesses and finally by solving a set of linear equations based on buffer accesses to lift them up to simple, high-level expressions. Helium can handle highly optimized, complex stencil kernels with input-dependent conditionals. We lift seven kernels from Adobe Photoshop giving a 75\% performance improvement, four kernels from IrfanView, leading to 4.97$ \times $ performance, and one stencil from the miniGMG multigrid benchmark netting a 4.25$ \times $ improvement in performance. We manually rejuvenated Photoshop by replacing eleven of Photoshop's filters with our lifted implementations, giving 1.12$ \times $ speedup without affecting the user experience.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Bowman:2015:PGM, author = "William J. Bowman and Swaha Miller and Vincent St-Amour and R. Kent Dybvig", title = "Profile-guided meta-programming", journal = j-SIGPLAN, volume = "50", number = "6", pages = "403--412", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737990", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Contemporary compiler systems such as GCC, .NET, and LLVM incorporate profile-guided optimizations (PGOs) on low-level intermediate code and basic blocks, with impressive results over purely static heuristics. Recent work shows that profile information is also useful for performing source-to-source optimizations via meta-programming. For example, using profiling information to inform decisions about data structures and algorithms can potentially lead to asymptotic improvements in performance. We present a design for profile-guided meta-programming in a general-purpose meta-programming system. Our design is parametric over the particular profiler and meta-programming system. We implement this design in two different meta-programming systems---the syntactic extensions systems of Chez Scheme and Racket---and provide several profile-guided meta-programs as usability case studies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Sivaramakrishnan:2015:DPE, author = "KC Sivaramakrishnan and Gowtham Kaki and Suresh Jagannathan", title = "Declarative programming over eventually consistent data stores", journal = j-SIGPLAN, volume = "50", number = "6", pages = "413--424", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737981", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "User-facing online services utilize geo-distributed data stores to minimize latency and tolerate partial failures, with the intention of providing a fast, always-on experience. However, geo-distribution does not come for free; application developers have to contend with weak consistency behaviors, and the lack of abstractions to composably construct high-level replicated data types, necessitating the need for complex application logic and invariably exposing inconsistencies to the user. Some commercial distributed data stores and several academic proposals provide a lattice of consistency levels, with stronger consistency guarantees incurring increased latency and throughput costs. However, correctly assigning the right consistency level for an operation requires subtle reasoning and is often an error-prone task. In this paper, we present QUELEA, a declarative programming model for eventually consistent data stores (ECDS), equipped with a contract language, capable of specifying fine-grained application --- level consistency properties. A contract enforcement system analyses contracts, and automatically generates the appropriate consistency protocol for the method protected by the contract. We describe an implementation of QUELEA on top of an off-the-shelf ECDS that provides support for coordination-free transactions. Several benchmarks including two large web applications, illustrate the effectiveness of our approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Siek:2015:BCT, author = "Jeremy Siek and Peter Thiemann and Philip Wadler", title = "Blame and coercion: together again for the first time", journal = j-SIGPLAN, volume = "50", number = "6", pages = "425--435", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737968", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "C\#, Dart, Pyret, Racket, TypeScript, VB: many recent languages integrate dynamic and static types via gradual typing. We systematically develop three calculi for gradual typing and the relations between them, building on and strengthening previous work. The calculi are: \lambda B, based on the blame calculus of Wadler and Findler (2009); \lambda C, inspired by the coercion calculus of Henglein (1994); \lambda S inspired by the space-efficient calculus of Herman, Tomb, and Flanagan (2006) and the threesome calculus of Siek and Wadler (2010). While \lambda B is little changed from previous work, \lambda C and \lambda S are new. Together, \lambda B, \lambda C, and \lambda S provide a coherent foundation for design, implementation, and optimisation of gradual types. We define translations from \lambda B to \lambda C and from \lambda C to \lambda S. Much previous work lacked proofs of correctness or had weak correctness criteria; here we demonstrate the strongest correctness criterion one could hope for, that each of the translations is fully abstract. Each of the calculi reinforces the design of the others: \lambda C has a particularly simple definition, and the subtle definition of blame safety for \lambda B is justified by the simple definition of blame safety for \lambda C. Our calculus \lambda S is implementation-ready: the first space-efficient calculus that is both straightforward to implement and easy to understand. We give two applications: first, using full abstraction from \lambda C to \lambda S to validate the challenging part of full abstraction between \lambda B and \lambda C; and, second, using full abstraction from \lambda B to \lambda S to easily establish the Fundamental Property of Casts, which required a custom bisimulation and six lemmas in earlier work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Zhang:2015:LFO, author = "Yizhou Zhang and Matthew C. Loring and Guido Salvaneschi and Barbara Liskov and Andrew C. Myers", title = "Lightweight, flexible object-oriented generics", journal = j-SIGPLAN, volume = "50", number = "6", pages = "436--445", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738008", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The support for generic programming in modern object-oriented programming languages is awkward and lacks desirable expressive power. We introduce an expressive genericity mechanism that adds expressive power and strengthens static checking, while remaining lightweight and simple in common use cases. Like type classes and concepts, the mechanism allows existing types to model type constraints retroactively. For expressive power, we expose models as named constructs that can be defined and selected explicitly to witness constraints; in common uses of genericity, however, types implicitly witness constraints without additional programmer effort. Models are integrated into the object-oriented style, with features like model generics, model-dependent types, model enrichment, model multimethods, constraint entailment, model inheritance, and existential quantification further extending expressive power in an object-oriented setting. We introduce the new genericity features and show that common generic programming idioms, including current generic libraries, can be expressed more precisely and concisely. The static semantics of the mechanism and a proof of a key decidability property can be found in an associated technical report.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Nguyen:2015:RCC, author = "Ph{\'u}c C. Nguy{\v{e}}n and David {Van Horn}", title = "Relatively complete counterexamples for higher-order programs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "446--456", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737971", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we study the problem of generating inputs to a higher-order program causing it to error. We first approach the problem in the setting of PCF, a typed, core functional language and contribute the first relatively complete method for constructing counterexamples for PCF programs. The method is relatively complete with respect to a first-order solver over the base types of PCF. In practice, this means an SMT solver can be used for the effective, automated generation of higher-order counterexamples for a large class of programs. We achieve this result by employing a novel form of symbolic execution for higher-order programs. The remarkable aspect of this symbolic execution is that even though symbolic higher-order inputs and values are considered, the path condition remains a first-order formula. Our handling of symbolic function application enables the reconstruction of higher-order counterexamples from this first-order formula. After establishing our main theoretical results, we sketch how to apply the approach to untyped, higher-order, stateful languages with first-class contracts and show how counterexample generation can be used to detect contract violations in this setting. To validate our approach, we implement a tool generating counterexamples for erroneous modules written in Racket.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Chu:2015:AIP, author = "Duc-Hiep Chu and Joxan Jaffar and Minh-Thai Trinh", title = "Automatic induction proofs of data-structures in imperative programs", journal = j-SIGPLAN, volume = "50", number = "6", pages = "457--466", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737984", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the problem of automated reasoning about dynamically manipulated data structures. Essential properties are encoded as predicates whose definitions are formalized via user-defined recursive rules. Traditionally, proving relationships between such properties is limited to the unfold-and-match (U+M) paradigm which employs systematic transformation steps of folding/unfolding the rules. A proof, using U+M, succeeds when we find a sequence of transformations that produces a final formula which is obviously provable by simply matching terms. Our contribution here is the addition of the fundamental principle of induction to this automated process. We first show that some proof obligations that are dynamically generated in the process can be used as induction hypotheses in the future, and then we show how to use these hypotheses in an induction step which generates a new proof obligation aside from those obtained by using the fold/unfold operations. While the adding of induction is an obvious need in general, no automated method has managed to include this in a systematic and general way. The main reason for this is the problem of avoiding circular reasoning. We overcome this with a novel checking condition. In summary, our contribution is a proof method which --- beyond U+M --- performs automatic formula re-writing by treating previously encountered obligations in each proof path as possible induction hypotheses. In the practical evaluation part of this paper, we show how the commonly used technique of using unproven lemmas can be avoided, using realistic benchmarks. This not only removes the current burden of coming up with the appropriate lemmas, but also significantly boosts up the verification process, since lemma applications, coupled with unfolding, often induce a large search space. In the end, our method can automatically reason about a new class of formulas arising from practical program verification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Carbonneaux:2015:CCR, author = "Quentin Carbonneaux and Jan Hoffmann and Zhong Shao", title = "Compositional certified resource bounds", journal = j-SIGPLAN, volume = "50", number = "6", pages = "467--478", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737955", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a new approach for automatically deriving worst-case resource bounds for C programs. The described technique combines ideas from amortized analysis and abstract interpretation in a unified framework to address four challenges for state-of-the-art techniques: compositionality, user interaction, generation of proof certificates, and scalability. Compositionality is achieved by incorporating the potential method of amortized analysis. It enables the derivation of global whole-program bounds with local derivation rules by naturally tracking size changes of variables in sequenced loops and function calls. The resource consumption of functions is described abstractly and a function call can be analyzed without access to the function body. User interaction is supported with a new mechanism that clearly separates qualitative and quantitative verification. A user can guide the analysis to derive complex non-linear bounds by using auxiliary variables and assertions. The assertions are separately proved using established qualitative techniques such as abstract interpretation or Hoare logic. Proof certificates are automatically generated from the local derivation rules. A soundness proof of the derivation system with respect to a formal cost semantics guarantees the validity of the certificates. Scalability is attained by an efficient reduction of bound inference to a linear optimization problem that can be solved by off-the-shelf LP solvers. The analysis framework is implemented in the publicly-available tool C4B. An experimental evaluation demonstrates the advantages of the new technique with a comparison of C4B with existing tools on challenging micro benchmarks and the analysis of more than 2900 lines of C code from the cBench benchmark suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Crary:2015:PPA, author = "Karl Crary and Michael J. Sullivan", title = "Peer-to-peer affine commitment using bitcoin", journal = j-SIGPLAN, volume = "50", number = "6", pages = "479--488", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737997", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The power of linear and affine logic lies in their ability to model state change. However, in a trustless, peer-to-peer setting, it is difficult to force principals to commit to state changes. We show how to solve the peer-to-peer affine commitment problem using a generalization of Bitcoin in which transactions deal in types rather than numbers. This has applications to proof-carrying authorization and mechanically executable contracts. Importantly, our system can be---and is---implemented on top of the existing Bitcoin network, so there is no need to recruit computing power to a new protocol.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Le:2015:TNT, author = "Ton Chanh Le and Shengchao Qin and Wei-Ngan Chin", title = "Termination and non-termination specification inference", journal = j-SIGPLAN, volume = "50", number = "6", pages = "489--498", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737993", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Techniques for proving termination and non-termination of imperative programs are usually considered as orthogonal mechanisms. In this paper, we propose a novel mechanism that analyzes and proves both program termination and non-termination at the same time. We first introduce the concept of second-order termination constraints and accumulate a set of relational assumptions on them via a Hoare-style verification. We then solve these assumptions with case analysis to determine the (conditional) termination and non-termination scenarios expressed in some specification logic form. In contrast to current approaches, our technique can construct a summary of terminating and non-terminating behaviors for each method. This enables modularity and reuse for our termination and non-termination proving processes. We have tested our tool on sample programs from a recent termination competition, and compared favorably against state-of-the-art termination analyzers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Emani:2015:CDM, author = "Murali Krishna Emani and Michael O'Boyle", title = "Celebrating diversity: a mixture of experts approach for runtime mapping in dynamic environments", journal = j-SIGPLAN, volume = "50", number = "6", pages = "499--508", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Matching program parallelism to platform parallelism using thread selection is difficult when the environment and available resources dynamically change. Existing compiler or runtime approaches are typically based on a one-size fits all policy. There is little ability to either evaluate or adapt the policy when encountering new external workloads or hardware resources. This paper focuses on selecting the best number of threads for a parallel application in dynamic environments. It develops a new scheme based on a mixture of experts approach. It learns online which, of a number of existing policies, or experts, is best suited for a particular environment without having to try out each policy. It does this by using a novel environment predictor as a proxy for the quality of an expert thread selection policy. Additional expert policies can easily be added and are selected only when appropriate. We evaluate our scheme in environments with varying external workloads and hardware resources.We then consider the case when workloads use affinity scheduling or are themselves adaptive and show that our approach, in all cases, outperforms existing schemes and surprisingly improves workload performance. On average, we improve 1.66x over OpenMP default, 1.34x over an online scheme, 1.25x over an offline policy and 1.2x over a state-of-art analytic model. Determining the right number and type of experts is an open problem and our initial analysis shows that adding more experts improves accuracy and performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Ren:2015:EER, author = "Bin Ren and Youngjoon Jo and Sriram Krishnamoorthy and Kunal Agrawal and Milind Kulkarni", title = "Efficient execution of recursive programs on commodity vector hardware", journal = j-SIGPLAN, volume = "50", number = "6", pages = "509--520", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738004", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The pursuit of computational efficiency has led to the proliferation of throughput-oriented hardware, from GPUs to increasingly wide vector units on commodity processors and accelerators. This hardware is designed to efficiently execute data-parallel computations in a vectorized manner. However, many algorithms are more naturally expressed as divide-and-conquer, recursive, task-parallel computations. In the absence of data parallelism, it seems that such algorithms are not well suited to throughput-oriented architectures. This paper presents a set of novel code transformations that expose the data parallelism latent in recursive, task-parallel programs. These transformations facilitate straightforward vectorization of task-parallel programs on commodity hardware. We also present scheduling policies that maintain high utilization of vector resources while limiting space usage. Across several task-parallel benchmarks, we demonstrate both efficient vector resource utilization and substantial speedup on chips using Intel's SSE4.2 vector units, as well as accelerators using Intel's AVX512 units.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Venkat:2015:LDT, author = "Anand Venkat and Mary Hall and Michelle Strout", title = "Loop and data transformations for sparse matrix code", journal = j-SIGPLAN, volume = "50", number = "6", pages = "521--532", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738003", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces three new compiler transformations for representing and transforming sparse matrix computations and their data representations. In cooperation with run-time inspection, our compiler derives transformed matrix representations and associated transformed code to implement a variety of representations targeting different architecture platforms. This systematic approach to combining code and data transformations on sparse computations, which extends a polyhedral transformation and code generation framework, permits the compiler to compose these transformations with other transformations to generate code that is on average within 5\% and often exceeds manually-tuned, high-performance sparse matrix libraries CUSP and OSKI. Additionally, the compiler-generated inspector codes are on average 1.5 faster than OSKI and perform comparably to CUSP, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Prountzos:2015:SPG, author = "Dimitrios Prountzos and Roman Manevich and Keshav Pingali", title = "Synthesizing parallel graph programs via automated planning", journal = j-SIGPLAN, volume = "50", number = "6", pages = "533--544", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737953", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a system that uses automated planning to synthesize correct and efficient parallel graph programs from high-level algorithmic specifications. Automated planning allows us to use constraints to declaratively encode program transformations such as scheduling, implementation selection, and insertion of synchronization. Each plan emitted by the planner satisfies all constraints simultaneously, and corresponds to a composition of these transformations. In this way, we obtain an integrated compilation approach for a very challenging problem domain. We have used this system to synthesize parallel programs for four graph problems: triangle counting, maximal independent set computation, preflow-push maxflow, and connected components. Experiments on a variety of inputs show that the synthesized implementations perform competitively with hand-written, highly-tuned code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Marr:2015:ZOM, author = "Stefan Marr and Chris Seaton and St{\'e}phane Ducasse", title = "Zero-overhead metaprogramming: reflection and metaobject protocols fast and without compromises", journal = j-SIGPLAN, volume = "50", number = "6", pages = "545--554", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737963", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Runtime metaprogramming enables many useful applications and is often a convenient solution to solve problems in a generic way, which makes it widely used in frameworks, middleware, and domain-specific languages. However, powerful metaobject protocols are rarely supported and even common concepts such as reflective method invocation or dynamic proxies are not optimized. Solutions proposed in literature either restrict the metaprogramming capabilities or require application or library developers to apply performance improving techniques. For overhead-free runtime metaprogramming, we demonstrate that dispatch chains, a generalized form of polymorphic inline caches common to self-optimizing interpreters, are a simple optimization at the language-implementation level. Our evaluation with self-optimizing interpreters shows that unrestricted metaobject protocols can be realized for the first time without runtime overhead, and that this optimization is applicable for just-in-time compilation of interpreters based on meta-tracing as well as partial evaluation. In this context, we also demonstrate that optimizing common reflective operations can lead to significant performance improvements for existing applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Isradisaikul:2015:FCP, author = "Chinawat Isradisaikul and Andrew C. Myers", title = "Finding counterexamples from parsing conflicts", journal = j-SIGPLAN, volume = "50", number = "6", pages = "555--564", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737961", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing a parser remains remarkably painful. Automatic parser generators offer a powerful and systematic way to parse complex grammars, but debugging conflicts in grammars can be time-consuming even for experienced language designers. Better tools for diagnosing parsing conflicts will alleviate this difficulty. This paper proposes a practical algorithm that generates compact, helpful counterexamples for LALR grammars. For each parsing conflict in a grammar, a counterexample demonstrating the conflict is constructed. When the grammar in question is ambiguous, the algorithm usually generates a compact counterexample illustrating the ambiguity. This algorithm has been implemented as an extension to the CUP parser generator. The results from applying this implementation to a diverse collection of faulty grammars show that the algorithm is practical, effective, and suitable for inclusion in other LALR parser generators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Leung:2015:IPS, author = "Alan Leung and John Sarracino and Sorin Lerner", title = "Interactive parser synthesis by example", journal = j-SIGPLAN, volume = "50", number = "6", pages = "565--574", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738002", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite decades of research on parsing, the construction of parsers remains a painstaking, manual process prone to subtle bugs and pitfalls. We present a programming-by-example framework called Parsify that is able to synthesize a parser from input/output examples. The user does not write a single line of code. To achieve this, Parsify provides: (a) an iterative algorithm for synthesizing and refining a grammar one example at a time, (b) an interface that provides immediate visual feedback in response to changes in the grammar being refined, and (c) a graphical mechanism for specifying example parse trees using only textual selections. We empirically demonstrate the viability of our approach by using Parsify to construct parsers for source code drawn from Verilog, SQL, Apache, and Tiger.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Lucia:2015:SSP, author = "Brandon Lucia and Benjamin Ransford", title = "A simpler, safer programming and execution model for intermittent systems", journal = j-SIGPLAN, volume = "50", number = "6", pages = "575--585", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737978", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy harvesting enables novel devices and applications without batteries, but intermittent operation under energy harvesting poses new challenges to memory consistency that threaten to leave applications in failed states not reachable in continuous execution. This paper presents analytical models that aid in reasoning about intermittence. Using these, we develop DINO (Death Is Not an Option), a programming and execution model that simplifies programming for intermittent systems and ensures volatile and nonvolatile data consistency despite near-constant interruptions. DINO is the first system to address these consistency problems in the context of intermittent execution. We evaluate DINO on three energy-harvesting hardware platforms running different applications. The applications fail and exhibit error without DINO, but run correctly with DINO's modest 1.8-2.7$ \times $ run-time overhead. DINO also dramatically simplifies programming, reducing the set of possible failure-related control transfers by 5--9$ \times $.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Machado:2015:CDD, author = "Nuno Machado and Brandon Lucia and Lu{\'\i}s Rodrigues", title = "Concurrency debugging with differential schedule projections", journal = j-SIGPLAN, volume = "50", number = "6", pages = "586--595", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737973", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Symbiosis: a concurrency debugging technique based on novel differential schedule projections (DSPs). A DSP shows the small set of memory operations and data-flows responsible for a failure, as well as a reordering of those elements that avoids the failure. To build a DSP, Symbiosis first generates a full, failing, multithreaded schedule via thread path profiling and symbolic constraint solving. Symbiosis selectively reorders events in the failing schedule to produce a non-failing, alternate schedule. A DSP reports the ordering and data-flow differences between the failing and non-failing schedules. Our evaluation on buggy real-world software and benchmarks shows that, in practical time, Symbiosis generates DSPs that both isolate the small fraction of event orders and data-flows responsible for the failure, and show which event reorderings prevent failing. In our experiments, DSPs contain 81\% fewer events and 96\% less data-flows than the full failure-inducing schedules. Moreover, by allowing developers to focus on only a few events, DSPs reduce the amount of time required to find a valid fix.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Srinivasan:2015:SMC, author = "Venkatesh Srinivasan and Thomas Reps", title = "Synthesis of machine code from semantics", journal = j-SIGPLAN, volume = "50", number = "6", pages = "596--607", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737960", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we present a technique to synthesize machine-code instructions from a semantic specification, given as a Quantifier-Free Bit-Vector (QFBV) logic formula. Our technique uses an instantiation of the Counter-Example Guided Inductive Synthesis (CEGIS) framework, in combination with search-space pruning heuristics to synthesize instruction-sequences. To counter the exponential cost inherent in enumerative synthesis, our technique uses a divide-and-conquer strategy to break the input QFBV formula into independent sub-formulas, and synthesize instructions for the sub-formulas. Synthesizers created by our technique could be used to create semantics-based binary rewriting tools such as optimizers, partial evaluators, program obfuscators/de-obfuscators, etc. Our experiments for Intel's IA-32 instruction set show that, in comparison to our baseline algorithm, our search-space pruning heuristics reduce the synthesis time by a factor of 473, and our divide-and-conquer strategy reduces the synthesis time by a further 3 to 5 orders of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Gonnord:2015:SRF, author = "Laure Gonnord and David Monniaux and Gabriel Radanne", title = "Synthesis of ranking functions using extremal counterexamples", journal = j-SIGPLAN, volume = "50", number = "6", pages = "608--618", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737976", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a complete method for synthesizing lexicographic linear ranking functions (and thus proving termination), supported by inductive invariants, in the case where the transition relation of the program includes disjunctions and existentials (large block encoding of control flow). Previous work would either synthesize a ranking function at every basic block head, not just loop headers, which reduces the scope of programs that may be proved to be terminating, or expand large block transitions including tests into (exponentially many) elementary transitions, prior to computing the ranking function, resulting in a very large global constraint system. In contrast, our algorithm incrementally refines a global linear constraint system according to extremal counterexamples: only constraints that exclude spurious solutions are included. Experiments with our tool Termite show marked performance and scalability improvements compared to other systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Osera:2015:TED, author = "Peter-Michael Osera and Steve Zdancewic", title = "Type-and-example-directed program synthesis", journal = j-SIGPLAN, volume = "50", number = "6", pages = "619--630", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2738007", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents an algorithm for synthesizing recursive functions that process algebraic datatypes. It is founded on proof-theoretic techniques that exploit both type information and input-output examples to prune the search space. The algorithm uses refinement trees, a data structure that succinctly represents constraints on the shape of generated code. We evaluate the algorithm by using a prototype implementation to synthesize more than 40 benchmarks and several non-trivial larger examples. Our results demonstrate that the approach meets or outperforms the state-of-the-art for this domain, in terms of synthesis time or attainable size of the generated programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '15 conference proceedings.", } @Article{Tu:2015:CIE, author = "Cheng-Chun Tu and Michael Ferdman and Chao-tung Lee and Tzi-cker Chiueh", title = "A Comprehensive Implementation and Evaluation of Direct Interrupt Delivery", journal = j-SIGPLAN, volume = "50", number = "7", pages = "1--15", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731189", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As the performance overhead associated with CPU and memory virtualization becomes largely negligible, research efforts are directed toward reducing the I/O virtualization overhead, which mainly comes from two sources: DMA set-up and payload copy, and interrupt delivery. The advent of SRIOV and MRIOV effectively reduces the DMA-related virtualization overhead to a minimum. Therefore, the last battleground for minimizing virtualization overhead is how to directly deliver every interrupt to its target VM without involving the hypervisor. This paper describes the design, implementation, and evaluation of a KVM-based direct interrupt delivery system called DID. DID delivers interrupts from SRIOV devices, virtual devices, and timers to their target VMs directly, completely avoiding VM exits. Moreover, DID does not require any modifications to the VM's operating system and preserves the correct priority among interrupts in all cases. We demonstrate that DID reduces the number of VM exits by a factor of 100 for I/O-intensive workloads, decreases the interrupt invocation latency by 80\%, and improves the throughput of a VM running Memcached by a factor of 3.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Pfefferle:2015:HVF, author = "Jonas Pfefferle and Patrick Stuedi and Animesh Trivedi and Bernard Metzler and Ionnis Koltsidas and Thomas R. Gross", title = "A Hybrid {I/O} Virtualization Framework for {RDMA}-capable Network Interfaces", journal = j-SIGPLAN, volume = "50", number = "7", pages = "17--30", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731200", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "DMA-capable interconnects, providing ultra-low latency and high bandwidth, are increasingly being used in the context of distributed storage and data processing systems. However, the deployment of such systems in virtualized data centers is currently inhibited by the lack of a flexible and high-performance virtualization solution for RDMA network interfaces. In this work, we present a hybrid virtualization architecture which builds upon the concept of separation of paths for control and data operations available in RDMA. With hybrid virtualization, RDMA control operations are virtualized using hypervisor involvement, while data operations are set up to bypass the hypervisor completely. We describe HyV (Hybrid Virtualization), a virtualization framework for RDMA devices implementing such a hybrid architecture. In the paper, we provide a detailed evaluation of HyV for different RDMA technologies and operations. We further demonstrate the advantages of HyV in the context of a real distributed system by running RAMCloud on a set of HyV-enabled virtual machines deployed across a 6-node RDMA cluster. All of the performance results we obtained illustrate that hybrid virtualization enables bare-metal RDMA performance inside virtual machines while retaining the flexibility typically associated with paravirtualization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Younge:2015:SHP, author = "Andrew J. Younge and John Paul Walters and Stephen P. Crago and Geoffrey C. Fox", title = "Supporting High Performance Molecular Dynamics in Virtualized Clusters using {IOMMU}, {SR-IOV}, and {GPUDirect}", journal = j-SIGPLAN, volume = "50", number = "7", pages = "31--38", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Cloud Infrastructure-as-a-Service paradigms have recently shown their utility for a vast array of computational problems, ranging from advanced web service architectures to high throughput computing. However, many scientific computing applications have been slow to adapt to virtualized cloud frameworks. This is due to performance impacts of virtualization technologies, coupled with the lack of advanced hardware support necessary for running many high performance scientific applications at scale. By using KVM virtual machines that leverage both Nvidia GPUs and InfiniBand, we show that molecular dynamics simulations with LAMMPS and HOOMD run at near-native speeds. This experiment also illustrates how virtualized environments can support the latest parallel computing paradigms, including both MPI+CUDA and new GPUDirect RDMA functionality. Specific findings show initial promise in scaling of such applications to larger production deployments targeting large scale computational workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Guo:2015:PBL, author = "Fei Guo and Seongbeom Kim and Yury Baskakov and Ishan Banerjee", title = "Proactively Breaking Large Pages to Improve Memory Overcommitment Performance in {VMware ESXi}", journal = j-SIGPLAN, volume = "50", number = "7", pages = "39--51", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731187", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "VMware ESXi leverages hardware support for MMU virtualization available in modern Intel/AMD CPUs. To optimize address translation performance when running on such CPUs, ESXi preferably uses host large pages (2MB in x86-64 systems) to back VM's guest memory. While using host large pages provides best performance when host has sufficient free memory, it increases host memory pressure and effectively defeats page sharing. Hence, the host is more likely to hit the point where ESXi has to reclaim VM memory through much more expensive techniques such as ballooning or host swapping. As a result, using host large pages may significantly hurt consolidation ratio. To deal with this problem, we propose a new host large page management policy that allows to: (a) identify 'cold' large pages and break them even when host has plenty of free memory; (b) break all large pages proactively when host free memory becomes scarce, but before the host starts ballooning or swapping; (c) reclaim the small pages within the broken large pages through page sharing. With the new policy, the shareable small pages can be shared much earlier and the amount of memory that needs to be ballooned or swapped can be largely reduced when host memory pressure is high. We also propose an algorithm to dynamically adjust the page sharing rate when proactively breaking large pages using a VM large page shareability estimator for higher efficiency. Experimental results show that the proposed large page management policy can improve the performance of various workloads up to 2.1x by significantly reducing the amount of ballooned or swapped memory when host memory pressure is high. Applications still fully benefit from host large pages when memory pressure is low.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Wang:2015:HPI, author = "Zhe Wang and Jianjun Li and Chenggang Wu and Dongyan Yang and Zhenjiang Wang and Wei-Chung Hsu and Bin Li and Yong Guan", title = "{HSPT}: Practical Implementation and Efficient Management of Embedded Shadow Page Tables for Cross-{ISA} System Virtual Machines", journal = j-SIGPLAN, volume = "50", number = "7", pages = "53--64", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731188", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Cross-ISA (Instruction Set Architecture) system-level virtual machine has a significant research and practical value. For example, several recently announced virtual smart phones for iOS which run smart phone applications on x86 based PCs are deployed on cross-ISA system level virtual machines. Also, for mobile device application development, by emulating the Android/ARM environment on the more powerful x86-64 platform, application development and debugging become more convenient and productive. However, the virtualization layer often incurs high performance overhead. The key overhead comes from memory virtualization where a guest virtual address (GVA) must go through multi-level address translation to become a host physical address (HPA). The Embedded Shadow Page Table (ESPT) approach has been proposed to effectively decrease this address translation cost. ESPT directly maps GVA to HPA, thus avoid the lengthy guest virtual to guest physical, guest physical to host virtual, and host virtual to host physical address translation. However, the original ESPT work has a few drawbacks. For example, its implementation relies on a loadable kernel module (LKM) to manage the shadow page table. Using LKMs is less desirable for system virtual machines due to portability, security and maintainability concerns. Our work proposes a different, yet more practical, implementation to address the shortcomings. Instead of relying on using LKMs, our approach adopts a shared memory mapping scheme to maintain the shadow page table (SPT) using only ''mmap'' system call. Furthermore, this work studies the support of SPT for multi-processing in greater details. It devices three different SPT organizations and evaluates their strength and weakness with standard and real Android applications on the system virtual machine which emulates the Android/ARM platform on x86-64 systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Kehne:2015:GEO, author = "Jens Kehne and Jonathan Metter and Frank Bellosa", title = "{GPUswap}: Enabling Oversubscription of {GPU} Memory through Transparent Swapping", journal = j-SIGPLAN, volume = "50", number = "7", pages = "65--77", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731192", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the last few years, GPUs have been finding their way into cloud computing platforms, allowing users to benefit from the performance of GPUs at low cost. However, a large portion of the cloud's cost advantage traditionally stems from oversubscription: Cloud providers rent out more resources to their customers than are actually available, expecting that the customers will not actually use all of the promised resources. For GPU memory, this oversubscription is difficult due to the lack of support for demand paging in current GPUs. Therefore, recent approaches to enabling oversubscription of GPU memory resort to software scheduling of GPU kernels --- which has been shown to induce significant runtime overhead in applications even if sufficient GPU memory is available --- to ensure that data is present on the GPU when referenced. In this paper, we present GPUswap, a novel approach to enabling oversubscription of GPU memory that does not rely on software scheduling of GPU kernels. GPUswap uses the GPU's ability to access system RAM directly to extend the GPU's own memory. To that end, GPUswap transparently relocates data from the GPU to system RAM in response to memory pressure. GPUswap ensures that all data is permanently accessible to the GPU and thus allows applications to submit commands to the GPU directly at any time, without the need for software scheduling. Experiments with our prototype implementation show that GPU applications can still execute even with only 20 MB of GPU memory available. In addition, while software scheduling suffers from permanent overhead even with sufficient GPU memory available, our approach executes GPU applications with native performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Gupta:2015:HER, author = "Vishal Gupta and Min Lee and Karsten Schwan", title = "{HeteroVisor}: Exploiting Resource Heterogeneity to Enhance the Elasticity of Cloud Platforms", journal = j-SIGPLAN, volume = "50", number = "7", pages = "79--92", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731191", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents HeteroVisor, a heterogeneity-aware hypervisor, that exploits resource heterogeneity to enhance the elasticity of cloud systems. Introducing the notion of 'elasticity' (E) states, HeteroVisor permits applications to manage their changes in resource requirements as state transitions that implicitly move their execution among heterogeneous platform components. Masking the details of platform heterogeneity from virtual machines, the E-state abstraction allows applications to adapt their resource usage in a fine-grained manner via VM-specific 'elasticity drivers' encoding VM-desired policies. The approach is explored for the heterogeneous processor and memory subsystems evolving for modern server platforms, leading to mechanisms that can manage these heterogeneous resources dynamically and as required by the different VMs being run. HeteroVisor is implemented for the Xen hypervisor, with mechanisms that go beyond core scaling to also deal with memory resources, via the online detection of hot memory pages and transparent page migration. Evaluation on an emulated heterogeneous platform uses workload traces from real-world data, demonstrating the ability to provide high on-demand performance while also reducing resource usage for these workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Wang:2015:DAA, author = "Hui Wang and Canturk Isci and Lavanya Subramanian and Jongmoo Choi and Depei Qian and Onur Mutlu", title = "{A-DRM}: Architecture-aware Distributed Resource Management of Virtualized Clusters", journal = j-SIGPLAN, volume = "50", number = "7", pages = "93--106", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731202", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Virtualization technologies has been widely adopted by large-scale cloud computing platforms. These virtualized systems employ distributed resource management (DRM) to achieve high resource utilization and energy savings by dynamically migrating and consolidating virtual machines. DRM schemes usually use operating-system-level metrics, such as CPU utilization, memory capacity demand and I/O utilization, to detect and balance resource contention. However, they are oblivious to microarchitecture-level resource interference (e.g., memory bandwidth contention between different VMs running on a host), which is currently not exposed to the operating system. We observe that the lack of visibility into microarchitecture-level resource interference significantly impacts the performance of virtualized systems. Motivated by this observation, we propose a novel architecture-aware DRM scheme (ADRM), that takes into account microarchitecture-level resource interference when making migration decisions in a virtualized cluster. ADRM makes use of three core techniques: (1) a profiler to monitor the microarchitecture-level resource usage behavior online for each physical host, (2) a memory bandwidth interference model to assess the interference degree among virtual machines on a host, and (3) a cost-benefit analysis to determine a candidate virtual machine and a host for migration. Real system experiments on thirty randomly selected combinations of applications from the CPU2006, PARSEC, STREAM, NAS Parallel Benchmark suites in a four-host virtualized cluster show that ADRM can improve performance by up to 26.55\%, with an average of 9.67\%, compared to traditional DRM schemes that lack visibility into microarchitecture-level resource utilization and contention.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Singh:2015:TVC, author = "Rayman Preet Singh and Tim Brecht and S. Keshav", title = "Towards {VM} Consolidation Using a Hierarchy of Idle States", journal = j-SIGPLAN, volume = "50", number = "7", pages = "107--119", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731195", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Typical VM consolidation approaches re-pack VMs into fewer physical machines, resulting in energy and cost savings [13, 19, 23, 40]. Recent work has explored a just-in time approach to VM consolidation by transitioning VMsto an inactive state when idle and activating them on the arrival of client requests[17, 21]. This leads to increased VM density at the cost of an increase in client request latency (called miss penalty ). The VM density so obtained, although greater, is still limited by the number of VMs that can be hosted in the one inactive state. If idle VMs were hosted in multiple inactive states, VM density can be increased further while ensuring small miss penalties. However, VMs in different inactive states have different capacities, activation times, and resource requirements. Therefore, a key question is: How should VMs be transitioned between different states to minimize the expected miss penalty? This paper explores the hosting of idle VMs in a hierarchy of multiple such inactive states, and studies the effect of different idle VMmanagement policies on VMdensity and miss penalties. We formulate a mathematical model for the problem, and provide a theoretical lower bound on the miss penalty. Using an off-the-shelf virtualization solution (LXC [2]), we demonstrate how the required model parameters can be obtained. We evaluate a variety of policies and quantify their miss penalties for different VM densities. We observe that some policies consolidate up to 550 VMs per machine with average miss penalties smaller than 1 ms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Kyle:2015:ADA, author = "Stephen Kyle and Hugh Leather and Bj{\"o}rn Franke and Dave Butcher and Stuart Monteith", title = "Application of Domain-aware Binary Fuzzing to Aid {Android} Virtual Machine Testing", journal = j-SIGPLAN, volume = "50", number = "7", pages = "121--132", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731198", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "The development of a new application virtual machine (VM), like the creation of any complex piece of software, is a bug-prone process. In version 5.0, the widely-used Android operating system has changed from the Dalvik VM to the newly-developed ART VM to execute Android applications. As new iterations of this VM are released, how can the developers aim to reduce the number of potentially security-threatening bugs that make it into the final product? In this paper we combine domain-aware binary fuzzing and differential testing to produce DexFuzz, a tool that exploits the presence of multiple modes of execution within a VM to test for defects. These modes of execution include the interpreter and a runtime that executes ahead-of-time compiled code. We find and present a number of bugs in the in-development version of ART in the Android Open Source Project. We also assess DexFuzz's ability to highlight defects in the experimental version of ART released in the previous version of Android, 4.4, finding 189 crashing programs and 15 divergent programs that indicate defects after only 5,000 attempts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Suneja:2015:EVI, author = "Sahil Suneja and Canturk Isci and Eyal de Lara and Vasanth Bala", title = "Exploring {VM} Introspection: Techniques and Trade-offs", journal = j-SIGPLAN, volume = "50", number = "7", pages = "133--146", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731196", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "While there are a variety of existing virtual machine introspection (VMI) techniques, their latency, overhead, complexity and consistency trade-offs are not clear. In this work, we address this gap by first organizing the various existing VMI techniques into a taxonomy based upon their operational principles, so that they can be put into context. Next we perform a thorough exploration of their trade-offs both qualitatively and quantitatively. We present a comprehensive set of observations and best practices for efficient, accurate and consistent VMI operation based on our experiences with these techniques. Our results show the stunning range of variations in performance, complexity and overhead with different VMI techniques.We further present a deep dive on VMI consistency aspects to understand the sources of inconsistency in observed VM state and show that, contrary to common expectation, pause-and-introspect based VMI techniques achieve very little to improve consistency despite their substantial performance impact.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Zeng:2015:PPH, author = "Junyuan Zeng and Yangchun Fu and Zhiqiang Lin", title = "{PEMU}: a Pin Highly Compatible Out-of-{VM} Dynamic Binary Instrumentation Framework", journal = j-SIGPLAN, volume = "50", number = "7", pages = "147--160", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731201", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Over the past 20 years, we have witnessed a widespread adoption of dynamic binary instrumentation (DBI) for numerous program analyses and security applications including program debugging, profiling, reverse engineering, and malware analysis. To date, there are many DBI platforms, and the most popular one is Pin, which provides various instrumentation APIs for process instrumentation. However, Pin does not support the instrumentation of OS kernels. In addition, the execution of the instrumentation and analysis routine is always inside the virtual machine (VM). Consequently, it cannot support any out-of-VM introspection that requires strong isolation. Therefore, this paper presents PEMU, a new open source DBI framework that is compatible with Pin-APIs, but supports out-of-VM introspection for both user level processes and OS kernels. Unlike in-VM instrumentation in which there is no semantic gap, for out-of-VM introspection we have to bridge the semantic gap and provide abstractions (i.e., APIs) for programmers. One important feature of PEMU is its API compatibility with Pin. As such, many Pin plugins are able to execute atop PEMU without any source code modification. We have implemented PEMU, and our experimental results with the SPEC 2006 benchmarks show that PEMU introduces reasonable overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Jaffer:2015:IRD, author = "Shehbaz Jaffer and Piyus Kedia and Sorav Bansal", title = "Improving Remote Desktopping Through Adaptive Record\slash Replay", journal = j-SIGPLAN, volume = "50", number = "7", pages = "161--172", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731193", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Accessing the display of a computer remotely, is popularly called remote desktopping. Remote desktopping software installs at both the user-facing client computer and the remote server computer; it simulates user's input events at server, and streams the corresponding display changes to client, thus providing an illusion to the user of controlling the remote machine using local input devices (e.g., keyboard/mouse). Many such remote desktopping tools are widely used. We show that if the remote server is a virtual machine (VM) and the client is reasonably powerful (e.g., current laptop and desktop grade hardware), VM deterministic replay capabilities can be used adaptively to significantly reduce the network bandwidth consumption and server-side CPU utilization of a remote desktopping tool. We implement these optimizations in a tool based on Qemu/KVM virtualization platform and VNC remote desktopping platform. Our tool reduces VNC's network bandwidth consumption by up to 9x and server-side CPU utilization by up to 56\% for popular graphics-intensive applications. On the flip side, our techniques consume higher CPU/memory/disk resources at the client. The effect of our optimizations on user-perceived latency is negligible.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Oh:2015:MWA, author = "JinSeok Oh and Jin-woo Kwon and Hyukwoo Park and Soo-Mook Moon", title = "Migration of {Web} Applications with Seamless Execution", journal = j-SIGPLAN, volume = "50", number = "7", pages = "173--185", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731197", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Web applications (apps) are programmed using HTML5, CSS, and JavaScript, and are distributed in the source code format. Web apps can be executed on any devices where a web browser is installed, allowing one-source, multi-platform environment. We can exploit this advantage of platform independence for a new user experience called app migration, which allows migrating an app in the middle of execution seamlessly between smart devices. This paper proposes such a migration framework for web apps where we can save the current state of a running app and resume its execution on a different device by restoring the saved state. We save the web app's state in the form of a snapshot, which is actually another web app whose execution can restore the saved state. In the snapshot, the state of the JavaScript variables and DOM trees are saved using the JSON format. We solved some of the saving/restoring problems related to event handlers and closures by accessing the browser and the JavaScript engine internals. Our framework does not require instrumenting an app or changing its source code, but works for the original app. We implemented the framework on the Chrome browser with the V8 JavaScript engine and successfully migrated non-trivial sample apps with reasonable saving and restoring overhead. We also discuss other usage of the snapshot for optimizations and user experiences for the web platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Ren:2015:ASE, author = "Jianbao Ren and Yong Qi and Yuehua Dai and Xiaoguang Wang and Yi Shi", title = "{AppSec}: a Safe Execution Environment for Security Sensitive Applications", journal = j-SIGPLAN, volume = "50", number = "7", pages = "187--199", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731199", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Malicious OS kernel can easily access user's private data in main memory and pries human-machine interaction data, even one that employs privacy enforcement based on application level or OS level. This paper introduces AppSec, a hypervisor-based safe execution environment, to protect both the memory data and human-machine interaction data of security sensitive applications from the untrusted OS transparently. AppSec provides several security mechanisms on an untrusted OS. AppSec introduces a safe loader to check the code integrity of application and dynamic shared objects. During runtime, AppSec protects application and dynamic shared objects from being modified and verifies kernel memory accesses according to application's intention. AppSec provides a devices isolation mechanism to prevent the human-machine interaction devices being accessed by compromised kernel. On top of that, AppSec further provides a privileged-based window system to protect application's X resources. The major advantages of AppSec are threefold. First, AppSec verifies and protects all dynamic shared objects during runtime. Second, AppSec mediates kernel memory access according to application's intention but not encrypts all application's data roughly. Third, AppSec provides a trusted I/O path from end-user to application. A prototype of AppSec is implemented and shows that AppSec is efficient and practical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Jin:2015:HAS, author = "Seongwook Jin and Jinho Seol and Jaehyuk Huh and Seungryoul Maeng", title = "Hardware-Assisted Secure Resource Accounting under a Vulnerable Hypervisor", journal = j-SIGPLAN, volume = "50", number = "7", pages = "201--213", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731203", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "With the proliferation of cloud computing to outsource computation in remote servers, the accountability of computational resources has emerged as an important new challenge for both cloud users and providers. Among the cloud resources, CPU and memory are difficult to verify their actual allocation, since the current virtualization techniques attempt to hide the discrepancy between physical and virtual allocations for the two resources. This paper proposes an online verifiable resource accounting technique for CPU and memory allocation for cloud computing. Unlike prior approaches for cloud resource accounting, the proposed accounting mechanism, called Hardware-assisted Resource Accounting (HRA), uses the hardware support for system management mode (SMM) and virtualization to provide secure resource accounting, even if the hypervisor is compromised. Using a secure isolated execution support of SMM, this study investigates two aspects of verifiable resource accounting for cloud systems. First, this paper presents how the hardware-assisted SMM and virtualization techniques can be used to implement the secure resource accounting mechanism even under a compromised hypervisor. Second, the paper investigates a sample-based resource accounting technique to minimize performance overheads. Using a statistical random sampling method, the technique estimates the overall CPU and memory allocation status with 99\%~100\% accuracies and performance degradations of 0.1\%~0.5\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Cui:2015:PPA, author = "Lei Cui and Tianyu Wo and Bo Li and Jianxin Li and Bin Shi and Jinpeng Huai", title = "{PARS}: a Page-Aware Replication System for Efficiently Storing Virtual Machine Snapshots", journal = j-SIGPLAN, volume = "50", number = "7", pages = "215--228", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731190", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Virtual machine (VM) snapshot enhances the system availability by saving the running state into stable storage during failure-free execution and rolling back to the snapshot point upon failures. Unfortunately, the snapshot state may be lost due to disk failures, so that the VM fails to be recovered. The popular distributed file systems employ replication technique to tolerate disk failures by placing redundant copies across disperse disks. However, unless user-specific personalization is provided, these systems consider the data in the file as of same importance and create identical copies of the entire file, leading to non-trivial additional storage overhead. This paper proposes a page-aware replication system (PARS) to store VM snapshots efficiently. PARS employs VM introspection technique to explore how a page is used by guest, and classifies the pages by their importance to system execution. If a page is critical, PARS replicates it multiple copies to ensure high availability and long-term durability. Otherwise, the loss of this page causes no harm for system to work properly, PARS therefore saves only one copy of the page. Consequently, PARS improves storage efficiency without compromising availability. We have implemented PARS to justify its practicality. The experimental results demonstrate that PARS achieves 53.9\% space saving compared to the native replication approach in HDFS which replicates the whole snapshot file fully and identically.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '15 conference proceedings.", } @Article{Gramoli:2015:MTY, author = "Vincent Gramoli", title = "More than you ever wanted to know about synchronization: synchrobench, measuring the impact of the synchronization on concurrent algorithms", journal = j-SIGPLAN, volume = "50", number = "8", pages = "1--10", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688501", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we present the most extensive comparison of synchronization techniques. We evaluate 5 different synchronization techniques through a series of 31 data structure algorithms from the recent literature on 3 multicore platforms from Intel, Sun Microsystems and AMD. To this end, we developed in C/C++ and Java a new micro-benchmark suite, called Synchrobench, hence helping the community evaluate new data structures and synchronization techniques. The main conclusion of this evaluation is threefold: (i) although compare-and-swap helps achieving the best performance on multicores, doing so correctly is hard; (ii) optimistic locking offers varying performance results while transactional memory offers more consistent results; and (iii) copy-on-write and read-copy-update suffer more from contention than any other technique but could be combined with others to derive efficient algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Alistarh:2015:SSR, author = "Dan Alistarh and Justin Kopinsky and Jerry Li and Nir Shavit", title = "The {SprayList}: a scalable relaxed priority queue", journal = j-SIGPLAN, volume = "50", number = "8", pages = "11--20", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High-performance concurrent priority queues are essential for applications such as task scheduling and discrete event simulation. Unfortunately, even the best performing implementations do not scale past a number of threads in the single digits. This is because of the sequential bottleneck in accessing the elements at the head of the queue in order to perform a DeleteMin operation. In this paper, we present the SprayList, a scalable priority queue with relaxed ordering semantics. Starting from a non-blocking SkipList, the main innovation behind our design is that the DeleteMin operations avoid a sequential bottleneck by ``spraying'' themselves onto the head of the SkipList list in a coordinated fashion. The spraying is implemented using a carefully designed random walk, so that DeleteMin returns an element among the first $O(p \log^3 p)$ in the list, with high probability, where $p$ is the number of threads. We prove that the running time of a DeleteMin operation is $O(\log^3 p)$, with high probability, independent of the size of the list. Our experiments show that the relaxed semantics allow the data structure to scale for high thread counts, comparable to a classic unordered SkipList. Furthermore, we observe that, for reasonably parallel workloads, the scalability benefits of relaxation considerably outweigh the additional work due to out-of-order execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Arbel:2015:PRR, author = "Maya Arbel and Adam Morrison", title = "Predicate {RCU}: an {RCU} for scalable concurrent updates", journal = j-SIGPLAN, volume = "50", number = "8", pages = "21--30", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Read-copy update (RCU) is a shared memory synchronization mechanism with scalable synchronization-free reads that nevertheless execute correctly with concurrent updates. To guarantee the consistency of such reads, an RCU update transitioning the data structure between certain states must wait for the completion of all existing reads. Unfortunately, these waiting periods quickly become a bottleneck, and thus RCU remains unused in data structures that require scalable, fine-grained, update operations. To solve this problem, we present Predicate RCU (PRCU), an RCU variant in which an update waits only for the reads whose consistency it affects, which are specified by a user-supplied predicate. We explore the trade-offs in implementing PRCU, describing implementations that reduce wait times by 10--100x with varying overhead on reads on modern x86 multiprocessor machines. We demonstrate the applicability of PRCU by applying it to two RCU-based concurrent algorithms---the Citrus binary search tree and a resizable hash table---and show experimentally that PRCU significantly improves the performance of both algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Golan-Gueta:2015:ASA, author = "Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv and Eran Yahav", title = "Automatic scalable atomicity via semantic locking", journal = j-SIGPLAN, volume = "50", number = "8", pages = "31--41", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we consider concurrent programs in which the shared state consists of instances of linearizable ADTs (abstract data types). We present an automated approach to concurrency control that addresses a common need: the need to atomically execute a code fragment, which may contain multiple ADT operations on multiple ADT instances. We present a synthesis algorithm that automatically enforces atomicity of given code fragments (in a client program) by inserting pessimistic synchronization that guarantees atomicity and deadlock-freedom (without using any rollback mechanism). Our algorithm takes a commutativity specification as an extra input. This specification indicates for every pair of ADT operations the conditions under which the operations commute. Our algorithm enables greater parallelism by permitting commuting operations to execute concurrently. We have implemented the synthesis algorithm in a Java compiler, and applied it to several Java programs. Our results show that our approach produces efficient and scalable synchronization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Benson:2015:FPP, author = "Austin R. Benson and Grey Ballard", title = "A framework for practical parallel fast matrix multiplication", journal = j-SIGPLAN, volume = "50", number = "8", pages = "42--53", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688513", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Matrix multiplication is a fundamental computation in many scientific disciplines. In this paper, we show that novel fast matrix multiplication algorithms can significantly outperform vendor implementations of the classical algorithm and Strassen's fast algorithm on modest problem sizes and shapes. Furthermore, we show that the best choice of fast algorithm depends not only on the size of the matrices but also the shape. We develop a code generation tool to automatically implement multiple sequential and shared-memory parallel variants of each fast algorithm, including our novel parallelization scheme. This allows us to rapidly benchmark over 20 fast algorithms on several problem sizes. Furthermore, we discuss a number of practical implementation issues for these algorithms on shared-memory machines that can direct further research on making fast algorithms practical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "AMD Core Math Library (ACML); Cray Scientific Library (LibSci); IBM Engineering and Scientific Subroutine Library (ESSL); Intel MKL; LINPACK benchmark; numerical instability of $O(N^p)$ algorithms with $p < 3$; Strassen matrix multiplication; Strassen--Winograd algorithm", remark = "PPoPP '15 conference proceedings.", } @Article{Acharya:2015:PNC, author = "Aravind Acharya and Uday Bondhugula", title = "{PLUTO+}: near-complete modeling of affine transformations for parallelism and locality", journal = j-SIGPLAN, volume = "50", number = "8", pages = "54--64", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Affine transformations have proven to be very powerful for loop restructuring due to their ability to model a very wide range of transformations. A single multi-dimensional affine function can represent a long and complex sequence of simpler transformations. Existing affine transformation frameworks like the Pluto algorithm, that include a cost function for modern multicore architectures where coarse-grained parallelism and locality are crucial, consider only a sub-space of transformations to avoid a combinatorial explosion in finding the transformations. The ensuing practical trade-offs lead to the exclusion of certain useful transformations, in particular, transformation compositions involving loop reversals and loop skewing by negative factors. In this paper, we propose an approach to address this limitation by modeling a much larger space of affine transformations in conjunction with the Pluto algorithm's cost function. We perform an experimental evaluation of both, the effect on compilation time, and performance of generated codes. The evaluation shows that our new framework, Pluto+, provides no degradation in performance in any of the Polybench benchmarks. For Lattice Boltzmann Method (LBM) codes with periodic boundary conditions, it provides a mean speedup of 1.33x over Pluto. We also show that Pluto+ does not increase compile times significantly. Experimental results on Polybench show that Pluto+ increases overall polyhedral source-to-source optimization time only by 15\%. In cases where it improves execution time significantly, it increased polyhedral optimization time only by 2.04x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Ravishankar:2015:DMC, author = "Mahesh Ravishankar and Roshan Dathathri and Venmugil Elango and Louis-No{\"e}l Pouchet and J. Ramanujam and Atanas Rountev and P. Sadayappan", title = "Distributed memory code generation for mixed irregular\slash regular computations", journal = j-SIGPLAN, volume = "50", number = "8", pages = "65--75", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688515", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many applications feature a mix of irregular and regular computational structures. For example, codes using adaptive mesh refinement (AMR) typically use a collection of regular blocks, where the number of blocks and the relationship between blocks is irregular. The computational structure in such applications generally involves regular (affine) loop computations within some number of innermost loops, while outer loops exhibit irregularity due to data-dependent control flow and indirect array access patterns. Prior approaches to distributed memory parallelization do not handle such computations effectively. They either target loop nests that are completely affine using polyhedral frameworks, or treat all loops as irregular. Consequently, the generated distributed memory code contains artifacts that disrupt the regular nature of previously affine innermost loops of the computation. This hampers subsequent optimizations to improve on-node performance. We propose a code generation framework that can effectively transform such applications for execution on distributed memory systems. Our approach generates distributed memory code which preserves program properties that enable subsequent polyhederal optimizations. Simultaneously, it addresses a major memory bottleneck of prior techniques that limits the scalability of the generated code. The effectiveness of the proposed framework is demonstrated on computations that are mixed regular/irregular, completely regular, and completely irregular.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Xiang:2015:SPH, author = "Lingxiang Xiang and Michael L. Scott", title = "Software partitioning of hardware transactions", journal = j-SIGPLAN, volume = "50", number = "8", pages = "76--86", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688506", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Best-effort hardware transactional memory (HTM) allows complex operations to execute atomically and in parallel, so long as hardware buffers do not overflow, and conflicts are not encountered with concurrent operations. We describe a programming technique and compiler support to reduce both overflow and conflict rates by partitioning common operations into read-mostly (planning) and write-mostly (completion) operations, which then execute separately. The completion operation remains transactional; planning can often occur in ordinary code. High-level (semantic) atomicity for the overall operation is ensured by passing an application-specific validator object between planning and completion. Transparent composition of partitioned operations is made possible through fully-automated compiler support, which migrates all planning operations out of the parent transaction while respecting all program data flow and dependences. For both micro- and macro-benchmarks, experiments on IBM z-Series and Intel Haswell machines demonstrate that partitioning can lead to dramatically lower abort rates and higher scalability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Baldassin:2015:PID, author = "Alexandro Baldassin and Edson Borin and Guido Araujo", title = "Performance implications of dynamic memory allocators on transactional memory systems", journal = j-SIGPLAN, volume = "50", number = "8", pages = "87--96", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688504", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Although dynamic memory management accounts for a significant part of the execution time on many modern software systems, its impact on the performance of transactional memory systems has been mostly overlooked. In order to shed some light into this subject, this paper conducts a thorough investigation of the interplay between memory allocators and software transactional memory (STM) systems. We show that allocators can interfere with the way memory addresses are mapped to versioned locks on state-of-the-art software transactional memory implementations. Moreover, we observed that key aspects of allocators such as false sharing avoidance, scalability, and locality have a drastic impact on the final performance. For instance, we have detected performance differences of up to 171\% in the STAMP applications when using distinct allocators. Moreover, we show that optimizations at the STM-level (such as caching transactional objects) are not effective when a modern allocator is already in use. All in all, our study highlights the importance of reporting the allocator utilized in the performance evaluation of transactional memory systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Zhang:2015:LOS, author = "Minjia Zhang and Jipeng Huang and Man Cao and Michael D. Bond", title = "Low-overhead software transactional memory with progress guarantees and strong semantics", journal = j-SIGPLAN, volume = "50", number = "8", pages = "97--108", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software transactional memory offers an appealing alternative to locks by improving programmability, reliability, and scalability. However, existing STMs are impractical because they add high instrumentation costs and often provide weak progress guarantees and/or semantics. This paper introduces a novel STM called LarkTM that provides three significant features. (1) Its instrumentation adds low overhead except when accesses actually conflict, enabling low single-thread overhead and scaling well on low-contention workloads. (2) It uses eager concurrency control mechanisms, yet naturally supports flexible conflict resolution, enabling strong progress guarantees. (3) It naturally provides strong atomicity semantics at low cost. LarkTM's design works well for low-contention workloads, but adds significant overhead under higher contention, so we design an adaptive version of LarkTM that uses alternative concurrency control for high-contention objects. An implementation and evaluation in a Java virtual machine show that the basic and adaptive versions of LarkTM not only provide low single-thread overhead, but their multithreaded performance compares favorably with existing high-performance STMs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Chabbi:2015:BEP, author = "Milind Chabbi and Wim Lavrijsen and Wibe de Jong and Koushik Sen and John Mellor-Crummey and Costin Iancu", title = "Barrier elision for production parallel programs", journal = j-SIGPLAN, volume = "50", number = "8", pages = "109--119", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large scientific code bases are often composed of several layers of runtime libraries, implemented in multiple programming languages. In such situation, programmers often choose conservative synchronization patterns leading to suboptimal performance. In this paper, we present context-sensitive dynamic optimizations that elide barriers redundant during the program execution. In our technique, we perform data race detection alongside the program to identify redundant barriers in their calling contexts; after an initial learning, we start eliding all future instances of barriers occurring in the same calling context. We present an automatic on-the-fly optimization and a multi-pass guided optimization. We apply our techniques to NWChem--a 6 million line computational chemistry code written in C/C++/Fortran that uses several runtime libraries such as Global Arrays, ComEx, DMAPP, and MPI. Our technique elides a surprisingly high fraction of barriers (as many as 63\%) in production runs. This redundancy elimination translates to application speedups as high as 14\% on 2048 cores. Our techniques also provided valuable insight about the application behavior, later used by NWChem developers. Overall, we demonstrate the value of holistic context-sensitive analyses that consider the domain science in conjunction with the associated runtime software stack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Thebault:2015:SEI, author = "Lo{\"\i}c Th{\'e}bault and Eric Petit and Quang Dinh", title = "Scalable and efficient implementation of {$3$D} unstructured meshes computation: a case study on matrix assembly", journal = j-SIGPLAN, volume = "50", number = "8", pages = "120--129", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Exposing massive parallelism on 3D unstructured meshes computation with efficient load balancing and minimal synchronizations is challenging. Current approaches relying on domain decomposition and mesh coloring struggle to scale with the increasing number of cores per nodes, especially with new many-core processors. In this paper, we propose an hybrid approach using domain decomposition to exploit distributed memory parallelism, Divide-and-Conquer, D{\&}C, to exploit shared memory parallelism and improve locality, and mesh coloring at core level to exploit vectors. It illustrates a new trade-off for many-cores between structuredness, memory locality, and vectorization. We evaluate our approach on the finite element matrix assembly of an industrial fluid dynamic code developed by Dassault Aviation. We compare our D{\&}C approach to domain decomposition and to mesh coloring. D{\&}C achieves a high parallel efficiency, a good data locality as well as an improved bandwidth usage. It competes on current nodes with the optimized pure MPI version with a minimum 10\% speed-up. D{\&}C shows an impressive 319x strong scaling on 512 cores (32 nodes) with only 2000 vertices per core. Finally, the Intel Xeon Phi version has a performance similar to 10 Intel E5-2665 Xeon Sandy Bridge cores and 95\% parallel efficiency on the 60 physical cores. Running on 4 Xeon Phi (240 cores), D{\&}C has 92\% efficiency on the physical cores and performance similar to 33 Intel E5-2665 Xeon Sandy Bridge cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Tallent:2015:DCS, author = "Nathan R. Tallent and Abhinav Vishnu and Hubertus {Van Dam} and Jeff Daily and Darren J. Kerbyson and Adolfy Hoisie", title = "Diagnosing the causes and severity of one-sided message contention", journal = j-SIGPLAN, volume = "50", number = "8", pages = "130--139", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Two trends suggest network contention for one-sided messages is poised to become a performance problem that concerns application developers: an increased interest in one-sided programming models and a rising ratio of hardware threads to network injection bandwidth. Often it is difficult to reason about when one-sided tasks decrease or increase network contention. We present effective and portable techniques for diagnosing the causes and severity of one-sided message contention. To detect that a message is affected by contention, we maintain statistics representing instantaneous network resource demand. Using lightweight measurement and modeling, we identify the portion of a message's latency that is due to contention and whether contention occurs at the initiator or target. We attribute these metrics to program statements in their full static and dynamic context. We characterize contention for an important computational chemistry benchmark on InfiniBand, Cray Aries, and IBM Blue Gene/Q interconnects. We pinpoint the sources of contention, estimate their severity, and show that when message delivery time deviates from an ideal model, there are other messages contending for the same network links. With a small change to the benchmark, we reduce contention by 50\% and improve total runtime by 20\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Chang:2015:PAG, author = "Yen-Jung Chang and Vijay K. Garg", title = "A parallel algorithm for global states enumeration in concurrent systems", journal = j-SIGPLAN, volume = "50", number = "8", pages = "140--149", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Verifying the correctness of the executions of a concurrent program is difficult because of its nondeterministic behavior. One of the verification methods is predicate detection, which predicts whether the user specified condition (predicate) could become true in any global states of the program. The method is predictive because it generates inferred execution paths from the observed execution path and then checks the predicate on the global states of inferred paths. One important part of predicate detection is global states enumeration, which generates the global states on inferred paths. Cooper and Marzullo gave the first enumeration algorithm based on a breadth first strategy (BFS). Later, many algorithms have been proposed to improve space and time complexity. None of them, however, takes parallelism into consideration. In this paper, we present the first parallel and online algorithm, named ParaMount, for global state enumeration. Our experimental results show that ParaMount speeds up the existing sequential algorithms by a factor of 6 with 8 threads. We have implemented an online predicate detector using ParaMount. For predicate detection, our detector based on ParaMount is 10 to 50 times faster than RV runtime (a verification tool that uses Cooper and Marzullo's BFS enumeration algorithm).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Cogumbreiro:2015:DDV, author = "Tiago Cogumbreiro and Raymond Hu and Francisco Martins and Nobuko Yoshida", title = "Dynamic deadlock verification for general barrier synchronisation", journal = j-SIGPLAN, volume = "50", number = "8", pages = "150--160", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Armus, a dynamic verification tool for deadlock detection and avoidance specialised in barrier synchronisation. Barriers are used to coordinate the execution of groups of tasks, and serve as a building block of parallel computing. Our tool verifies more barrier synchronisation patterns than current state-of-the-art. To improve the scalability of verification, we introduce a novel event-based representation of concurrency constraints, and a graph-based technique for deadlock analysis. The implementation is distributed and fault-tolerant, and can verify X10 and Java programs. To formalise the notion of barrier deadlock, we introduce a core language expressive enough to represent the three most widespread barrier synchronisation patterns: group, split-phase, and dynamic membership. We propose a graph analysis technique that selects from two alternative graph representations: the Wait-For Graph, that favours programs with more tasks than barriers; and the State Graph, optimised for programs with more barriers than tasks. We prove that finding a deadlock in either representation is equivalent, and that the verification algorithm is sound and complete with respect to the notion of deadlock in our core language. Armus is evaluated with three benchmark suites in local and distributed scenarios. The benchmarks show that graph analysis with automatic graph-representation selection can record a 7-fold execution increase versus the traditional fixed graph representation. The performance measurements for distributed deadlock detection between 64 processes show negligible overheads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{You:2015:VFO, author = "Yi-Ping You and Hen-Jung Wu and Yeh-Ning Tsai and Yen-Ting Chao", title = "{VirtCL}: a framework for {OpenCL} device abstraction and management", journal = j-SIGPLAN, volume = "50", number = "8", pages = "161--172", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688505", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "The interest in using multiple graphics processing units (GPUs) to accelerate applications has increased in recent years. However, the existing heterogeneous programming models (e.g., OpenCL) abstract details of GPU devices at the per-device level and require programmers to explicitly schedule their kernel tasks on a system equipped with multiple GPU devices. Unfortunately, multiple applications running on a multi-GPU system may compete for some of the GPU devices while leaving other GPU devices unused. Moreover, the distributed memory model defined in OpenCL, where each device has its own memory space, increases the complexity of managing the memory among multiple GPU devices. In this article we propose a framework (called VirtCL) that reduces the programming burden by acting as a layer between the programmer and the native OpenCL run-time system for abstracting multiple devices into a single virtual device and for scheduling computations and communications among the multiple devices. VirtCL comprises two main components: (1) a front-end library, which exposes primary OpenCL APIs and the virtual device, and (2) a back-end run-time system (called CLDaemon) for scheduling and dispatching kernel tasks based on a history-based scheduler. The front-end library forwards computation requests to the back-end CLDaemon, which then schedules and dispatches the requests. We also propose a history-based scheduler that is able to schedule kernel tasks in a contention- and communication-aware manner. Experiments demonstrated that the VirtCL framework introduced a small overhead (mean of 6\%) but outperformed the native OpenCL run-time system for most benchmarks in the Rodinia benchmark suite, which was due to the abstraction layer eliminating the time-consuming initialization of OpenCL contexts. We also evaluated different scheduling policies in VirtCL with a real-world application (clsurf) and various synthetic workload traces. The results indicated that the VirtCL framework provides scalability for multiple kernel tasks running on multi-GPU systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Ashari:2015:OML, author = "Arash Ashari and Shirish Tatikonda and Matthias Boehm and Berthold Reinwald and Keith Campbell and John Keenleyside and P. Sadayappan", title = "On optimizing machine learning workloads via kernel fusion", journal = j-SIGPLAN, volume = "50", number = "8", pages = "173--182", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Exploitation of parallel architectures has become critical to scalable machine learning (ML). Since a wide range of ML algorithms employ linear algebraic operators, GPUs with BLAS libraries are a natural choice for such an exploitation. Two approaches are commonly pursued: (i) developing specific GPU accelerated implementations of complete ML algorithms; and (ii) developing GPU kernels for primitive linear algebraic operators like matrix-vector multiplication, which are then used in developing ML algorithms. This paper extends the latter approach by developing fused kernels for a combination of primitive operators that are commonly found in popular ML algorithms. We identify the generic pattern of computation (alpha * X^T (v * (X * y)) + beta * z) and its various instantiations. We develop a fused kernel to optimize this computation on GPUs --- with specialized techniques to handle both sparse and dense matrices. This approach not only reduces the cost of data loads due to improved temporal locality but also enables other optimizations like coarsening and hierarchical aggregation of partial results. We also present an analytical model that considers input data characteristics and available GPU resources to estimate near-optimal settings for kernel launch parameters. The proposed approach provides speedups ranging from 2 to 67 for different instances of the generic pattern compared to launching multiple operator-level kernels using GPU accelerated libraries. We conclude by demonstrating the effectiveness of the approach in improving end-to-end performance on an entire ML algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Zhang:2015:NAG, author = "Kaiyuan Zhang and Rong Chen and Haibo Chen", title = "{NUMA}-aware graph-structured analytics", journal = j-SIGPLAN, volume = "50", number = "8", pages = "183--193", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688507", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graph-structured analytics has been widely adopted in a number of big data applications such as social computation, web-search and recommendation systems. Though much prior research focuses on scaling graph-analytics on distributed environments, the strong desire on performance per core, dollar and joule has generated considerable interests of processing large-scale graphs on a single server-class machine, which may have several terabytes of RAM and 80 or more cores. However, prior graph-analytics systems are largely neutral to NUMA characteristics and thus have suboptimal performance. This paper presents a detailed study of NUMA characteristics and their impact on the efficiency of graph-analytics. Our study uncovers two insights: (1) either random or interleaved allocation of graph data will significantly hamper data locality and parallelism; (2) sequential inter-node (i.e., remote) memory accesses have much higher bandwidth than both intra- and inter-node random ones. Based on them, this paper describes Polymer, a NUMA-aware graph-analytics system on multicore with two key design decisions. First, Polymer differentially allocates and places topology data, application-defined data and mutable runtime states of a graph system according to their access patterns to minimize remote accesses. Second, for some remaining random accesses, Polymer carefully converts random remote accesses into sequential remote accesses, by using lightweight replication of vertices across NUMA nodes. To improve load balance and vertex convergence, Polymer is further built with a hierarchical barrier to boost parallelism and locality, an edge-oriented balanced partitioning for skewed graphs, and adaptive data structures according to the proportion of active vertices. A detailed evaluation on an 80-core machine shows that Polymer often outperforms the state-of-the-art single-machine graph-analytics systems, including Ligra, X-Stream and Galois, for a set of popular real-world and synthetic graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Xie:2015:SAT, author = "Chenning Xie and Rong Chen and Haibing Guan and Binyu Zang and Haibo Chen", title = "{SYNC} or {ASYNC}: time to fuse for distributed graph-parallel computation", journal = j-SIGPLAN, volume = "50", number = "8", pages = "194--204", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688508", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale graph-structured computation usually exhibits iterative and convergence-oriented computing nature, where input data is computed iteratively until a convergence condition is reached. Such features have led to the development of two different computation modes for graph-structured programs, namely synchronous (Sync) and asynchronous (Async) modes. Unfortunately, there is currently no in-depth study on their execution properties and thus programmers have to manually choose a mode, either requiring a deep understanding of underlying graph engines, or suffering from suboptimal performance. This paper makes the first comprehensive characterization on the performance of the two modes on a set of typical graph-parallel applications. Our study shows that the performance of the two modes varies significantly with different graph algorithms, partitioning methods, execution stages, input graphs and cluster scales, and no single mode consistently outperforms the other. To this end, this paper proposes Hsync, a hybrid graph computation mode that adaptively switches a graph-parallel program between the two modes for optimal performance. Hsync constantly collects execution statistics on-the-fly and leverages a set of heuristics to predict future performance and determine when a mode switch could be profitable. We have built online sampling and offline profiling approaches combined with a set of heuristics to accurately predicting future performance in the two modes. A prototype called PowerSwitch has been built based on PowerGraph, a state-of-the-art distributed graph-parallel system, to support adaptive execution of graph algorithms. On a 48-node EC2-like cluster, PowerSwitch consistently outperforms the best of both modes, with a speedup ranging from 9\% to 73\% due to timely switch between two modes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Tang:2015:COW, author = "Yuan Tang and Ronghui You and Haibin Kan and Jesmin Jahan Tithi and Pramod Ganapathi and Rezaul A. Chowdhury", title = "Cache-oblivious wavefront: improving parallelism of recursive dynamic programming algorithms without losing cache-efficiency", journal = j-SIGPLAN, volume = "50", number = "8", pages = "205--214", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "State-of-the-art cache-oblivious parallel algorithms for dynamic programming (DP) problems usually guarantee asymptotically optimal cache performance without any tuning of cache parameters, but they often fail to exploit the theoretically best parallelism at the same time. While these algorithms achieve cache-optimality through the use of a recursive divide-and-conquer (DAC) strategy, scheduling tasks at the granularity of task dependency introduces artificial dependencies in addition to those arising from the defining recurrence equations. We removed the artificial dependency by scheduling tasks ready for execution as soon as all its real dependency constraints are satisfied, while preserving the cache-optimality by inheriting the DAC strategy. We applied our approach to a set of widely known dynamic programming problems, such as Floyd-Warshall's All-Pairs Shortest Paths, Stencil, and LCS. Theoretical analyses show that our techniques improve the span of 2-way DAC-based Floyd Warshall's algorithm on an $n$ node graph from $ T h n^2 n$ to $ T h n$, stencil computations on a $d$-dimensional hypercubic grid of width $w$ for $h$ time steps from $ T h(d^2 h) w^(d + 2) - 1$ to $ T h h$, and LCS on two sequences of length $n$ each from $ T h n^_2 3$ to $ T h n$. In each case, the total work and cache complexity remain asymptotically optimal. Experimental measurements exhibit a $3$ --- $5$ times improvement in absolute running time, $ 10$ --- $ 20$ times improvement in burdened span by Cilkview, and approximately the same L1/L2 cache misses by PAPI.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Chabbi:2015:HPL, author = "Milind Chabbi and Michael Fagan and John Mellor-Crummey", title = "High performance locks for multi-level {NUMA} systems", journal = j-SIGPLAN, volume = "50", number = "8", pages = "215--226", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688503", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient locking mechanisms are critically important for high performance computers. On highly-threaded systems with a deep memory hierarchy, the throughput of traditional queueing locks, e.g., MCS locks, falls off due to NUMA effects. Two-level cohort locks perform better on NUMA systems, but fail to deliver top performance for deep NUMA hierarchies. In this paper, we describe a hierarchical variant of the MCS lock that adapts the principles of cohort locking for architectures with deep NUMA hierarchies. We describe analytical models for throughput and fairness of Cohort-MCS (C-MCS) and Hierarchical MCS (HMCS) locks that enable us to tailor these locks for high performance on any target platform without empirical tuning. Using these models, one can select parameters such that an HMCS lock will deliver better fairness than a C-MCS lock for a given throughput, or deliver better throughput for a given fairness. Our experiments show that, under high contention, a three-level HMCS lock delivers up to 7.6x higher lock throughput than a C-MCS lock on a 128-thread IBM Power 755 and a five-level HMCS lock delivers up to 72x higher lock throughput on a 4096-thread SGI UV 1000. On the K-means clustering code from the MineBench suit, a three-level HMCS lock reduces the running time by up to 55\% compared to the C-MCS lock on a IBM Power 755.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Majo:2015:LPC, author = "Zoltan Majo and Thomas R. Gross", title = "A library for portable and composable data locality optimizations for {NUMA} systems", journal = j-SIGPLAN, volume = "50", number = "8", pages = "227--238", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many recent multiprocessor systems are realized with a non-uniform memory architecture (NUMA) and accesses to remote memory locations take more time than local memory accesses. Optimizing NUMA memory system performance is difficult and costly for three principal reasons: (1) today's programming languages/libraries have no explicit support for NUMA systems, (2) NUMA optimizations are not~portable, and (3) optimizations are not~composable (i.e., they can become ineffective or worsen performance in environments that support composable parallel software). This paper presents TBB-NUMA, a parallel programming library based on Intel Threading Building Blocks (TBB) that supports portable and composable NUMA-aware programming. TBB-NUMA provides a model of task affinity that captures a programmer's insights on mapping tasks to resources. NUMA-awareness affects all layers of the library (i.e., resource management, task scheduling, and high-level parallel algorithm templates) and requires close coupling between all these layers. Optimizations implemented with TBB-NUMA (for a set of standard benchmark programs) result in up to 44\% performance improvement over standard TBB, but more important, optimized programs are portable across different NUMA architectures and preserve data locality also when composed with other parallel computations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Amer:2015:MRC, author = "Abdelhalim Amer and Huiwei Lu and Yanjie Wei and Pavan Balaji and Satoshi Matsuoka", title = "{MPI+Threads}: runtime contention and remedies", journal = j-SIGPLAN, volume = "50", number = "8", pages = "239--248", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hybrid MPI+Threads programming has emerged as an alternative model to the ``MPI everywhere'' model to better handle the increasing core density in cluster nodes. While the MPI standard allows multithreaded concurrent communication, such flexibility comes with the cost of maintaining thread safety within the MPI implementation, typically implemented using critical sections. In contrast to previous works that studied the importance of critical-section granularity in MPI implementations, in this paper we investigate the implication of critical-section arbitration on communication performance. We first analyze the MPI runtime when multithreaded concurrent communication takes place on hierarchical memory systems. Our results indicate that the mutex-based approach that most MPI implementations use today can incur performance penalties due to unfair arbitration. We then present methods to mitigate these penalties with a first-come, first-served arbitration and a priority locking scheme that favors threads doing useful work. Through evaluations using several benchmarks and applications, we demonstrate up to 5-fold improvement in performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{McPherson:2015:FPL, author = "Andrew J. McPherson and Vijay Nagarajan and Susmit Sarkar and Marcelo Cintra", title = "Fence placement for legacy data-race-free programs via synchronization read detection", journal = j-SIGPLAN, volume = "50", number = "8", pages = "249--250", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Fence placement is required to ensure legacy parallel programs operate correctly on relaxed architectures. The challenge is to place as few fences as possible without compromising correctness. By identifying necessary conditions for a read to be an acquire we improve upon the state of the art for legacy DRF programs by up to 2.64x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Piao:2015:JJF, author = "Xianglan Piao and Channoh Kim and Younghwan Oh and Huiying Li and Jincheon Kim and Hanjun Kim and Jae W. Lee", title = "{JAWS}: a {JavaScript} framework for adaptive {CPU--GPU} work sharing", journal = j-SIGPLAN, volume = "50", number = "8", pages = "251--252", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces jAWS, a JavaScript framework for adaptive work sharing between CPU and GPU for data-parallel workloads. Unlike conventional heterogeneous parallel programming environments for JavaScript, which use only one compute device when executing a single kernel, jAWS accelerates kernel execution by exploiting both devices to realize full performance potential of heterogeneous multicores. jAWS employs an efficient work partitioning algorithm that finds an optimal work distribution between the two devices without requiring offline profiling. The jAWS runtime provides shared arrays for multiple parallel contexts, hence eliminating extra copy overhead for input and output data. Our preliminary evaluation with both CPU-friendly and GPU-friendly benchmarks demonstrates that jAWS provides good load balancing and efficient data communication between parallel contexts, to significantly outperform best single-device execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Seo:2015:GGS, author = "Hyunseok Seo and Jinwook Kim and Min-Soo Kim", title = "{GStream}: a graph streaming processing method for large-scale graphs on {GPUs}", journal = j-SIGPLAN, volume = "50", number = "8", pages = "253--254", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Fast processing graph algorithms for large-scale graphs becomes increasingly important. Besides, there have been many attempts to process graph applications by exploiting the massive amount of parallelism of GPUs. However, most of the existing methods fail to process large-scale graphs that do not fit in GPU device memory. We propose a fast and scalable parallel processing method GStream that fully exploits the computational power of GPUs for processing large-scale graphs (e.g., billions vertices) very efficiently. It exploits the concept of nested-loop theta-join and multiple asynchronous GPU streams. Extensive experimental results show that GStream consistently and significantly outperforms the state-of-the art method.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Al-Saber:2015:SSA, author = "Nabeel Al-Saber and Milind Kulkarni", title = "{SemCache++}: semantics-aware caching for efficient multi-{GPU} offloading", journal = j-SIGPLAN, volume = "50", number = "8", pages = "255--256", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688527", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Offloading computations to multiple GPUs is not an easy task. It requires decomposing data, distributing computations and handling communication manually. Drop-in GPU libraries have made it easy to offload computations to multiple GPUs by hiding this complexity inside library calls. Such encapsulation prevents the reuse of the data between successive kernel invocations resulting in redundant communication. This limitation exists in multi-GPU libraries like CUBLASXT. In this paper, we introduce SemCache++, a semantics-aware GPU cache that automatically manages communication between the CPU and multiple GPUs in addition to optimizing communication by eliminating redundant transfers using caching. SemCache++ is used to build the first multi-GPU drop-in replacement library that (a) uses the virtual memory to automatically manage and optimize multi-GPU communication and (b) requires no program rewriting or annotations. Our caching technique is efficient; it uses a two level caching directory to track matrices and sub-matrices. Experimental results show that our system can eliminate redundant communication and deliver significant performance improvements over multi-GPU libraries like CUBLASXT.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Kim:2015:OBU, author = "Jungwon Kim and Seyong Lee and Jeffrey S. Vetter", title = "An {OpenACC}-based unified programming model for multi-accelerator systems", journal = j-SIGPLAN, volume = "50", number = "8", pages = "257--258", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes a novel SPMD programming model of OpenACC. Our model integrates the different granularities of parallelism from vector-level parallelism to node-level parallelism into a single, unified model based on OpenACC. It allows programmers to write programs for multiple accelerators using a uniform programming model whether they are in shared or distributed memory systems. We implement a prototype of our model and evaluate its performance with a GPU-based supercomputer using three benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Thomson:2015:LHB, author = "Paul Thomson and Alastair F. Donaldson", title = "The lazy happens-before relation: better partial-order reduction for systematic concurrency testing", journal = j-SIGPLAN, volume = "50", number = "8", pages = "259--260", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the lazy happens-before relation (lazy HBR), which ignores mutex-induced edges to provide a more precise notion of state equivalence compared with the traditional happens-before relation. We demonstrate experimentally that the lazy HBR has the potential to provide greater schedule reduction during systematic concurrency testing with respect to a set of 79 Java benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Haidar:2015:TBL, author = "Azzam Haidar and Tingxing Dong and Piotr Luszczek and Stanimire Tomov and Jack Dongarra", title = "Towards batched linear solvers on accelerated hardware platforms", journal = j-SIGPLAN, volume = "50", number = "8", pages = "261--262", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As hardware evolves, an increasingly effective approach to develop energy efficient, high-performance solvers, is to design them to work on many small and independent problems. Indeed, many applications already need this functionality, especially for GPUs, which are known to be currently about four to five times more energy efficient than multicore CPUs for every floating-point operation. In this paper, we describe the development of the main one-sided factorizations: LU, QR, and Cholesky; that are needed for a set of small dense matrices to work in parallel. We refer to such algorithms as batched factorizations. Our approach is based on representing the algorithms as a sequence of batched BLAS routines for GPU-contained execution. Note that this is similar in functionality to the LAPACK and the hybrid MAGMA algorithms for large-matrix factorizations. But it is different from a straightforward approach, whereby each of GPU's symmetric multiprocessors factorizes a single problem at a time. We illustrate how our performance analysis together with the profiling and tracing tools guided the development of batched factorizations to achieve up to 2-fold speedup and 3-fold better energy efficiency compared to our highly optimized batched CPU implementations based on the MKL library on a two-sockets, Intel Sandy Bridge server. Compared to a batched LU factorization featured in the NVIDIA's CUBLAS library for GPUs, we achieves up to 2.5-fold speedup on the K40 GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Muralidharan:2015:COP, author = "Saurav Muralidharan and Michael Garland and Bryan Catanzaro and Albert Sidelnik and Mary Hall", title = "A collection-oriented programming model for performance portability", journal = j-SIGPLAN, volume = "50", number = "8", pages = "263--264", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes Surge, a collection-oriented programming model that enables programmers to compose parallel computations using nested high-level data collections and operators. Surge exposes a code generation interface, decoupled from the core computation, that enables programmers and autotuners to easily generate multiple implementations of the same computation on various parallel architectures such as multi-core CPUs and GPUs. By decoupling computations from architecture-specific implementation, programmers can target multiple architectures more easily, and generate a search space that facilitates optimization and customization for specific architectures. We express in Surge four real-world benchmarks from domains such as sparse linear-algebra and machine learning and from the same performance-portable specification, generate OpenMP and CUDA C++ implementations. Surge generates efficient, scalable code which achieves up to 1.32x speedup over handcrafted, well-optimized CUDA code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Wang:2015:GHP, author = "Yangzihao Wang and Andrew Davidson and Yuechao Pan and Yuduo Wu and Andy Riffel and John D. Owens", title = "{Gunrock}: a high-performance graph processing library on the {GPU}", journal = j-SIGPLAN, volume = "50", number = "8", pages = "265--266", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688538", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For large-scale graph analytics on the GPU, the irregularity of data access and control flow and the complexity of programming GPUs have been two significant challenges for developing a programmable high-performance graph library. ``Gunrock'', our graph-processing system, uses a high-level bulk-synchronous abstraction with traversal and computation steps, designed specifically for the GPU. Gunrock couples high performance with a high-level programming model that allows programmers to quickly develop new graph primitives with less than 300 lines of code. We evaluate Gunrock on five graph primitives and show that Gunrock has at least an order of magnitude speedup over Boost and PowerGraph, comparable performance to the fastest GPU hardwired primitives, and better performance than any other GPU high-level graph library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Pearce:2015:DLB, author = "Olga Pearce and Todd Gamblin and Bronis R. de Supinski and Martin Schulz and Nancy M. Amato", title = "Decoupled load balancing", journal = j-SIGPLAN, volume = "50", number = "8", pages = "267--268", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern scientific simulations divide work between parallel processors by decomposing a spatial domain of mesh cells, particles, or other elements. A balanced assignment of the computational load is critical for parallel performance. If the computation per element changes over the simulation time, simulations can use dynamic load balance algorithms to evenly redistribute work to processes. Graph partitioners are widely used and balance very effectively, but they do not strong scale well. Typical SPMD simulations wait while a load balance algorithm runs on all processors, so a poorly scaling algorithm can itself become a bottleneck. We observe that the load balance algorithm is separate from the main application computation and has its own scaling properties. We propose to decouple the load balance algorithm from the application, and to offload the load balance computation so that it runs concurrently with the application on a smaller number of processors. We demonstrate the costs of decoupling and offloading the load balancing algorithm from a Barnes--Hut application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Jin:2015:CPI, author = "Ye Jin and Mingliang Liu and Xiaosong Ma and Qing Liu and Jeremy Logan and Norbert Podhorszki and Jong Youl Choi and Scott Klasky", title = "Combining phase identification and statistic modeling for automated parallel benchmark generation", journal = j-SIGPLAN, volume = "50", number = "8", pages = "269--270", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688541", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parallel application benchmarks are indispensable for evaluating/optimizing HPC software and hardware. However, it is very challenging and costly to obtain high-fidelity benchmarks reflecting the scale and complexity of state-of-the-art parallel applications. Hand-extracted synthetic benchmarks are time- and labor-intensive to create. Real applications themselves, while offering most accurate performance evaluation, are expensive to compile, port, reconfigure, and often plainly inaccessible due to security or ownership concerns. This work contributes APPRIME, a novel tool for trace-based automatic parallel benchmark generation. Taking as input standard communication-I/O traces of an application's execution, it couples accurate automatic phase identification with statistical regeneration of event parameters to create compact, portable, and to some degree reconfigurable parallel application benchmarks. Experiments with four NAS Parallel Benchmarks (NPB) and three real scientific simulation codes confirm the fidelity of APPRIME benchmarks. They retain the original applications' performance characteristics, in particular the relative performance across platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Shi:2015:OAG, author = "Xuanhua Shi and Junling Liang and Sheng Di and Bingsheng He and Hai Jin and Lu Lu and Zhixiang Wang and Xuan Luo and Jianlong Zhong", title = "Optimization of asynchronous graph processing on {GPU} with hybrid coloring model", journal = j-SIGPLAN, volume = "50", number = "8", pages = "271--272", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688542", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern GPUs have been widely used to accelerate the graph processing for complicated computational problems regarding graph theory. Many parallel graph algorithms adopt the asynchronous computing model to accelerate the iterative convergence. Unfortunately, the consistent asynchronous computing requires locking or the atomic operations, leading to significant penalties/overheads when implemented on GPUs. To this end, coloring algorithm is adopted to separate the vertices with potential updating conflicts, guaranteeing the consistency/correctness of the parallel processing. We propose a light-weight asynchronous processing framework called Frog with a hybrid coloring model. We find that majority of vertices (about 80\%) are colored with only a few colors, such that they can be read and updated in a very high degree of parallelism without violating the sequential consistency. Accordingly, our solution will separate the processing of the vertices based on the distribution of colors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{West:2015:ERO, author = "Scott West and Sebastian Nanz and Bertrand Meyer", title = "Efficient and reasonable object-oriented concurrency", journal = j-SIGPLAN, volume = "50", number = "8", pages = "273--274", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Making threaded programs safe and easy to reason about is one of the chief difficulties in modern programming. This work provides an efficient execution model and implementation for SCOOP, a concurrency approach that provides not only data-race freedom but also pre/postcondition reasoning guarantees between threads. The extensions we propose influence the underlying semantics to increase the amount of concurrent execution that is possible, exclude certain classes of deadlocks, and enable greater performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Vassiliadis:2015:PMR, author = "Vassilis Vassiliadis and Konstantinos Parasyris and Charalambos Chalios and Christos D. Antonopoulos and Spyros Lalis and Nikolaos Bellas and Hans Vandierendonck and Dimitrios S. Nikolopoulos", title = "A programming model and runtime system for significance-aware energy-efficient computing", journal = j-SIGPLAN, volume = "50", number = "8", pages = "275--276", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688546", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a task-based programming model and runtime system that exploit the observation that not all parts of a program are equally significant for the accuracy of the end-result, in order to trade off the quality of program outputs for increased energy-efficiency. This is done in a structured and flexible way, allowing for easy exploitation of different points in the quality/energy space, without adversely affecting application performance. The runtime system can apply a number of different policies to decide whether it will execute less-significant tasks accurately or approximately. The experimental evaluation indicates that our system can achieve an energy reduction of up to 83\% compared with a fully accurate execution and up to 35\% compared with an approximate version employing loop perforation. At the same time, our approach always results in graceful quality degradation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Wimmer:2015:LFK, author = "Martin Wimmer and Jakob Gruber and Jesper Larsson Tr{\"a}ff and Philippas Tsigas", title = "The lock-free {$k$-LSM} relaxed priority queue", journal = j-SIGPLAN, volume = "50", number = "8", pages = "277--278", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688547", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new, concurrent, lock-free priority queue that relaxes the delete-min operation to allow deletion of any of the \rho smallest keys instead of only a minimal one, where \rho is a parameter that can be configured at runtime. It is built from a logarithmic number of sorted arrays, similar to log-structured merge-trees (LSM). For keys added and removed by the same thread the behavior is identical to a non-relaxed priority queue. We compare to state-of-the-art lock-free priority queues with both relaxed and non-relaxed semantics, showing high performance and good scalability of our approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Saillard:2015:SDV, author = "Emmanuelle Saillard and Patrick Carribault and Denis Barthou", title = "Static\slash dynamic validation of {MPI} collective communications in multi-threaded context", journal = j-SIGPLAN, volume = "50", number = "8", pages = "279--280", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scientific applications mainly rely on the MPI parallel programming model to reach high performance on supercomputers. The advent of manycore architectures (larger number of cores and lower amount of memory per core) leads to mix MPI with a thread-based model like OpenMP. But integrating two different programming models inside the same application can be tricky and generate complex bugs. Thus, the correctness of hybrid programs requires a special care regarding MPI calls location. For example, identical MPI collective operations cannot be performed by multiple non-synchronized threads. To tackle this issue, this paper proposes a static analysis and a reduced dynamic instrumentation to detect bugs related to misuse of MPI collective operations inside or outside threaded regions. This work extends PARCOACH designed for MPI-only applications and keeps the compatibility with these algorithms. We validated our method on multiple hybrid benchmarks and applications with a low overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Ramachandran:2015:CFC, author = "Arunmoezhi Ramachandran and Neeraj Mittal", title = "{CASTLE}: fast concurrent internal binary search tree using edge-based locking", journal = j-SIGPLAN, volume = "50", number = "8", pages = "281--282", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688551", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new lock-based algorithm for concurrent manipulation of a binary search tree in an asynchronous shared memory system that supports search, insert and delete operations. Some of the desirable characteristics of our algorithm are: (i) a search operation uses only read and write instructions, (ii) an insert operation does not acquire any locks, and (iii) a delete operation only needs to lock up to four edges in the absence of contention. Our algorithm is based on an internal representation of a search tree and it operates at edge-level (locks edges) rather than at node-level (locks nodes); this minimizes the contention window of a write operation and improves the system throughput. Our experiments indicate that our lock-based algorithm outperforms existing algorithms for a concurrent binary search tree for medium-sized and larger trees, achieving up to 59\% higher throughput than the next best algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Das:2015:SBP, author = "Madan Das and Gabriel Southern and Jose Renau", title = "Section based program analysis to reduce overhead of detecting unsynchronized thread communication", journal = j-SIGPLAN, volume = "50", number = "8", pages = "283--284", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688552", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose Section Based Program Analysis (SBPA), a novel way to decompose programs into disjoint sections to identify non-communicating loads and stores during program compilation. We implemented SBPA for a deterministic execution runtime environment and reduced 63\% of dynamic memory access instrumentations. We also integrated SBPA with ThreadSanitizer, and achieved a speed-up of 2.74 on a geometric mean basis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Harshvardhan:2015:HAR, author = "Harshvardhan and Nancy M. Amato and Lawrence Rauchwerger", title = "A hierarchical approach to reducing communication in parallel graph algorithms", journal = j-SIGPLAN, volume = "50", number = "8", pages = "285--286", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2700994", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale graph computing has become critical due to the ever-increasing size of data. However, distributed graph computations are limited in their scalability and performance due to the heavy communication inherent in such computations. This is exacerbated in scale-free networks, such as social and web graphs, which contain hub vertices that have large degrees and therefore send a large number of messages over the network. Furthermore, many graph algorithms and computations send the same data to each of the neighbors of a vertex. Our proposed approach recognizes this, and reduces communication performed by the algorithm without change to user-code, through a hierarchical machine model imposed upon the input graph. The hierarchical model takes advantage of locale information of the neighboring vertices to reduce communication, both in message volume and total number of bytes sent. It is also able to better exploit the machine hierarchy to further reduce the communication costs, by aggregating traffic between different levels of the machine hierarchy. Results of an implementation in the STAPL GL shows improved scalability and performance over the traditional level-synchronous approach, with 2.5$ \times $-8$ \times $ improvement for a variety of graph algorithms at 12,000+ cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Chen:2015:TNL, author = "Yifeng Chen and Xiang Cui and Hong Mei", title = "{Tiles}: a new language mechanism for heterogeneous parallelism", journal = j-SIGPLAN, volume = "50", number = "8", pages = "287--288", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688555", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper studies the essence of heterogeneity from the perspective of language mechanism design. The proposed mechanism, called tiles, is a program construct that bridges two relative levels of computation: an outer level of source data in larger, slower or more distributed memory and an inner level of data blocks in smaller, faster or more localized memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Radoi:2015:WAR, author = "Cosmin Radoi and Stephan Herhut and Jaswanth Sreeram and Danny Dig", title = "Are web applications ready for parallelism?", journal = j-SIGPLAN, volume = "50", number = "8", pages = "289--290", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2700995", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In recent years, web applications have become pervasive. Their backbone is JavaScript, the only programming language supported by all major web browsers. Most browsers run on desktop or mobile devices with parallel hardware. However, JavaScript is by design sequential, and current web applications make little use of hardware parallelism. Are web applications ready to exploit parallel hardware? We answer the question in two steps: First, we survey 174 web developers about the potential and challenges of using parallelism. Then, we study the performance and computation shape of a set of web applications that are representative for the emerging web. Our findings indicate that emerging web applications do have latent data parallelism, and JavaScript developers' programming style is not a significant impediment to exploiting this parallelism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '15 conference proceedings.", } @Article{Bodik:2015:PSO, author = "Rastislav Bodik", title = "Program synthesis: opportunities for the next decade", journal = j-SIGPLAN, volume = "50", number = "9", pages = "1--1", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2789052", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program synthesis is the contemporary answer to automatic programming. It innovates in two ways: First, it replaces batch automation with interactivity, assisting the programmer in refining the understanding of the programming problem. Second, it produces programs using search in a candidate space rather than by derivation from a specification. Searching for an acceptable program means that we can accommodate incomplete specifications, such as examples. Additionally, search makes synthesis applicable to domains that lack correct-by-construction derivation rules, such as hardware design, education, end-user programming, and systems biology. The future of synthesis rests on four challenges, each presenting an opportunity to develop novel abstractions for ``programming with search.'' Larger scope: today, we synthesize small, flat programs; synthesis of large software will need constructs for modularity and stepwise refinement. New interaction modes: to solicit the specification without simply asking for more examples, we need to impose a structure on the candidate space and explore it in a dialogue. Construction: how to compile a synthesis problem to a search algorithm without building a compiler? Everything is a program: whatever can be phrased as a program can be in principle synthesized. Indeed, we will see synthesis advance from synthesis of plain programs to synthesis of compilers and languages. The latter may include DSLs, type systems, and modeling languages for biology. As such, synthesis could help mechanize the crown jewel of programming languages research --- the design of abstractions --- which has so far been done manually and only by experts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Rompf:2015:FPS, author = "Tiark Rompf and Nada Amin", title = "Functional pearl: a {SQL} to {C} compiler in 500 lines of code", journal = j-SIGPLAN, volume = "50", number = "9", pages = "2--9", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784760", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the design and implementation of a SQL query processor that outperforms existing database systems and is written in just about 500 lines of Scala code --- a convincing case study that high-level functional programming can handily beat C for systems-level programming where the last drop of performance matters. The key enabler is a shift in perspective towards generative programming. The core of the query engine is an interpreter for relational algebra operations, written in Scala. Using the open-source LMS Framework (Lightweight Modular Staging), we turn this interpreter into a query compiler with very low effort. To do so, we capitalize on an old and widely known result from partial evaluation known as Futamura projections, which state that a program that can specialize an interpreter to any given input program is equivalent to a compiler. In this pearl, we discuss LMS programming patterns such as mixed-stage data structures (e.g. data records with static schema and dynamic field components) and techniques to generate low-level C code, including specialized data structures and data loading primitives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Chlipala:2015:OCP, author = "Adam Chlipala", title = "An optimizing compiler for a purely functional web-application language", journal = j-SIGPLAN, volume = "50", number = "9", pages = "10--21", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784741", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High-level scripting languages have become tremendously popular for development of dynamic Web applications. Many programmers appreciate the productivity benefits of automatic storage management, freedom from verbose type annotations, and so on. While it is often possible to improve performance substantially by rewriting an application in C or a similar language, very few programmers bother to do so, because of the consequences for human development effort. This paper describes a compiler that makes it possible to have most of the best of both worlds, coding Web applications in a high-level language but compiling to native code with performance comparable to handwritten C code. The source language is Ur/Web, a domain-specific, purely functional, statically typed language for the Web. Through a coordinated suite of relatively straightforward program analyses and algebraic optimizations, we transform Ur/Web programs into almost-idiomatic C code, with no garbage collection, little unnecessary memory allocation for intermediate values, etc. Our compiler is in production use for commercial Web sites supporting thousands of users, and microbenchmarks demonstrate very competitive performance versus mainstream tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Bauman:2015:PTJ, author = "Spenser Bauman and Carl Friedrich Bolz and Robert Hirschfeld and Vasily Kirilichev and Tobias Pape and Jeremy G. Siek and Sam Tobin-Hochstadt", title = "{Pycket}: a tracing {JIT} for a functional language", journal = j-SIGPLAN, volume = "50", number = "9", pages = "22--34", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784740", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Pycket, a high-performance tracing JIT compiler for Racket. Pycket supports a wide variety of the sophisticated features in Racket such as contracts, continuations, classes, structures, dynamic binding, and more. On average, over a standard suite of benchmarks, Pycket outperforms existing compilers, both Racket's JIT and other highly-optimizing Scheme compilers. Further, Pycket provides much better performance for Racket proxies than existing systems, dramatically reducing the overhead of contracts and gradual typing. We validate this claim with performance evaluation on multiple existing benchmark suites. The Pycket implementation is of independent interest as an application of the RPython meta-tracing framework (originally created for PyPy), which automatically generates tracing JIT compilers from interpreters. Prior work on meta-tracing focuses on bytecode interpreters, whereas Pycket is a high-level interpreter based on the CEK abstract machine and operates directly on abstract syntax trees. Pycket supports proper tail calls and first-class continuations. In the setting of a functional language, where recursion and higher-order functions are more prevalent than explicit loops, the most significant performance challenge for a tracing JIT is identifying which control flows constitute a loop---we discuss two strategies for identifying loops and measure their impact.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Rossberg:2015:CMU, author = "Andreas Rossberg", title = "{1ML} --- core and modules united ({$F$}-ing first-class modules)", journal = j-SIGPLAN, volume = "50", number = "9", pages = "35--47", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784738", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "ML is two languages in one: there is the core, with types and expressions, and there are modules, with signatures, structures and functors. Modules form a separate, higher-order functional language on top of the core. There are both practical and technical reasons for this stratification; yet, it creates substantial duplication in syntax and semantics, and it reduces expressiveness. For example, selecting a module cannot be made a dynamic decision. Language extensions allowing modules to be packaged up as first-class values have been proposed and implemented in different variations. However, they remedy expressiveness only to some extent, are syntactically cumbersome, and do not alleviate redundancy. We propose a redesign of ML in which modules are truly first-class values, and core and module layer are unified into one language. In this ``1ML'', functions, functors, and even type constructors are one and the same construct; likewise, no distinction is made between structures, records, or tuples. Or viewed the other way round, everything is just (``a mode of use of'') modules. Yet, 1ML does not require dependent types, and its type structure is expressible in terms of plain System F \omega , in a minor variation of our F-ing modules approach. We introduce both an explicitly typed version of 1ML, and an extension with Damas/Milner-style implicit quantification. Type inference for this language is not complete, but, we argue, not substantially worse than for Standard ML. An alternative view is that 1ML is a user-friendly surface syntax for System F \omega that allows combining term and type abstraction in a more compositional manner than the bare calculus.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Vazou:2015:BRT, author = "Niki Vazou and Alexander Bakst and Ranjit Jhala", title = "Bounded refinement types", journal = j-SIGPLAN, volume = "50", number = "9", pages = "48--61", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784745", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a notion of bounded quantification for refinement types and show how it expands the expressiveness of refinement typing by using it to develop typed combinators for: (1) relational algebra and safe database access, (2) Floyd-Hoare logic within a state transformer monad equipped with combinators for branching and looping, and (3) using the above to implement a refined IO monad that tracks capabilities and resource usage. This leap in expressiveness comes via a translation to ``ghost'' functions, which lets us retain the automated and decidable SMT based checking and inference that makes refinement typing effective in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Matsuda:2015:ABP, author = "Kazutaka Matsuda and Meng Wang", title = "Applicative bidirectional programming with lenses", journal = j-SIGPLAN, volume = "50", number = "9", pages = "62--74", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784750", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A bidirectional transformation is a pair of mappings between source and view data objects, one in each direction. When the view is modified, the source is updated accordingly with respect to some laws. One way to reduce the development and maintenance effort of bidirectional transformations is to have specialized languages in which the resulting programs are bidirectional by construction---giving rise to the paradigm of bidirectional programming. In this paper, we develop a framework for applicative-style and higher-order bidirectional programming, in which we can write bidirectional transformations as unidirectional programs in standard functional languages, opening up access to the bundle of language features previously only available to conventional unidirectional languages. Our framework essentially bridges two very different approaches of bidirectional programming, namely the lens framework and Voigtl{\"a}nder's semantic bidirectionalization, creating a new programming style that is able to bag benefits from both.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Pombrio:2015:HRC, author = "Justin Pombrio and Shriram Krishnamurthi", title = "Hygienic resugaring of compositional desugaring", journal = j-SIGPLAN, volume = "50", number = "9", pages = "75--87", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784755", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Syntactic sugar is widely used in language implementation. Its benefits are, however, offset by the comprehension problems it presents to programmers once their program has been transformed. In particular, after a transformed program has begun to evaluate (or otherwise be altered by a black-box process), it can become unrecognizable. We present a new approach to _resugaring_ programs, which is the act of reflecting evaluation steps in the core language in terms of the syntactic sugar that the programmer used. Relative to prior work, our approach has two important advances: it handles hygiene, and it allows almost arbitrary rewriting rules (as opposed to restricted patterns). We do this in the context of a DAG representation of programs, rather than more traditional trees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Geneves:2015:XST, author = "Pierre Genev{\`e}s and Nils Gesbert", title = "{XQuery} and static typing: tackling the problem of backward axes", journal = j-SIGPLAN, volume = "50", number = "9", pages = "88--100", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784746", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "XQuery is a functional language dedicated to XML data querying and manipulation. As opposed to other W3C-standardized languages for XML (e.g. XSLT), it has been intended to feature strong static typing. Currently, however, some expressions of the language cannot be statically typed with any precision. We argue that this is due to a discrepancy between the semantics of the language and its type algebra: namely, the values of the language are (possibly inner) tree nodes, which may have siblings and ancestors in the data. The types on the other hand are regular tree types, as usual in the XML world: they describe sets of trees. The type associated to a node then corresponds to the subtree whose root is that node and contains no information about the rest of the data. This makes navigation expressions using `backward axes,' which return e.g. the siblings of a node, impossible to type. We discuss how to handle this discrepancy by improving the type system. We describe a logic-based language of extended types able to represent inner tree nodes and show how it can dramatically increase the precision of typing for navigation expressions. We describe how inclusion between these extended types and the classical regular tree types can be decided, allowing a hybrid system combining both type languages. The result is a net increase in precision of typing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Bowman:2015:NF, author = "William J. Bowman and Amal Ahmed", title = "Noninterference for free", journal = j-SIGPLAN, volume = "50", number = "9", pages = "101--113", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784733", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The dependency core calculus (DCC) is a framework for studying a variety of dependency analyses (e.g., secure information flow). The key property provided by DCC is noninterference, which guarantees that a low-level observer (attacker) cannot distinguish high-level (protected) computations. The proof of noninterference for DCC suggests a connection to parametricity in System F, which suggests that it should be possible to implement dependency analyses in languages with parametric polymorphism. We present a translation from DCC into F \omega and prove that the translation preserves noninterference. To express noninterference in F \omega , we define a notion of observer-sensitive equivalence that makes essential use of both first-order and higher-order polymorphism. Our translation provides insights into DCC's type system and shows how DCC can be implemented in a polymorphic language without loss of the noninterference (security) guarantees available in DCC. Our contributions include proof techniques that should be valuable when proving other secure compilation or full abstraction results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Gaboardi:2015:ACL, author = "Marco Gaboardi and Romain P{\'e}choux", title = "Algebras and coalgebras in the light affine {Lambda} calculus", journal = j-SIGPLAN, volume = "50", number = "9", pages = "114--126", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784759", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Algebra and coalgebra are widely used to model data types in functional programming languages and proof assistants. Their use permits to better structure the computations and also to enhance the expressivity of a language or of a proof system. Interestingly, parametric polymorphism {\`a} la System F provides a way to encode algebras and coalgebras in strongly normalizing languages without losing the good logical properties of the calculus. Even if these encodings are sometimes unsatisfying because they provide only limited forms of algebras and coalgebras, they give insights on the expressivity of System F in terms of functions that we can program in it. With the goal of contributing to a better understanding of the expressivity of Implicit Computational Complexity systems, we study the problem of defining algebras and coalgebras in the Light Affine Lambda Calculus, a system characterizing the complexity class FPTIME. This system limits the computational complexity of programs but it also limits the ways we can use parametric polymorphism, and in general the way we can write our programs. We show here that while the restrictions imposed by the Light Affine Lambda Calculus pose some issues to the standard System F encodings, they still permit to encode some form of algebra and coalgebra. Using the algebra encoding one can define in the Light Affine Lambda Calculus the traditional inductive types. Unfortunately, the corresponding coalgebra encoding permits only a very limited form of coinductive data types. To extend this class we study an extension of the Light Affine Lambda Calculus by distributive laws for the modality \S . This extension has been discussed but not studied before.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Downen:2015:SSR, author = "Paul Downen and Philip Johnson-Freyd and Zena M. Ariola", title = "Structures for structural recursion", journal = j-SIGPLAN, volume = "50", number = "9", pages = "127--139", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784762", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Our goal is to develop co-induction from our understanding of induction, putting them on level ground as equal partners for reasoning about programs. We investigate several structures which represent well-founded forms of recursion in programs. These simple structures encapsulate reasoning by primitive and noetherian induction principles, and can be composed together to form complex recursion schemes for programs operating over a wide class of data and co-data types. At its heart, this study is guided by duality: each structure for recursion has a dual form, giving perfectly symmetric pairs of equal and opposite data and co-data types for representing recursion in programs. Duality is brought out through a framework presented in sequent style, which inherently includes control effects that are interpreted logically as classical reasoning principles. To accommodate the presence of effects, we give a calculus parameterized by a notion of strategy, which is strongly normalizing for a wide range of strategies. We also present a more traditional calculus for representing effect-free functional programs, but at the cost of losing some of the founding dualities.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Danner:2015:DCS, author = "Norman Danner and Daniel R. Licata and Ramyaa Ramyaa", title = "Denotational cost semantics for functional languages with inductive types", journal = j-SIGPLAN, volume = "50", number = "9", pages = "140--151", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784749", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A central method for analyzing the asymptotic complexity of a functional program is to extract and then solve a recurrence that expresses evaluation cost in terms of input size. The relevant notion of input size is often specific to a datatype, with measures including the length of a list, the maximum element in a list, and the height of a tree. In this work, we give a formal account of the extraction of cost and size recurrences from higher-order functional programs over inductive datatypes. Our approach allows a wide range of programmer-specified notions of size, and ensures that the extracted recurrences correctly predict evaluation cost. To extract a recurrence from a program, we first make costs explicit by applying a monadic translation from the source language to a complexity language, and then abstract datatype values as sizes. Size abstraction can be done semantically, working in models of the complexity language, or syntactically, by adding rules to a preorder judgement. We give several different models of the complexity language, which support different notions of size. Additionally, we prove by a logical relations argument that recurrences extracted by this process are upper bounds for evaluation cost; the proof is entirely syntactic and therefore applies to all of the models we consider.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Avanzini:2015:ACF, author = "Martin Avanzini and Ugo {Dal Lago} and Georg Moser", title = "Analysing the complexity of functional programs: higher-order meets first-order", journal = j-SIGPLAN, volume = "50", number = "9", pages = "152--164", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784753", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show how the complexity of higher-order functional programs can be analysed automatically by applying program transformations to a defunctionalised versions of them, and feeding the result to existing tools for the complexity analysis of first-order term rewrite systems. This is done while carefully analysing complexity preservation and reflection of the employed transformations such that the complexity of the obtained term rewrite system reflects on the complexity of the initial program. Further, we describe suitable strategies for the application of the studied transformations and provide ample experimental data for assessing the viability of our method.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Sheeran:2015:FPH, author = "Mary Sheeran", title = "Functional programming and hardware design: still interesting after all these years", journal = j-SIGPLAN, volume = "50", number = "9", pages = "165--165", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2789053", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Higher order functions provide an elegant way to express algorithms designed for implementation in hardware. By showing examples of both classic and new algorithms, I will explain why higher order functions deserve to be studied. Next, I will consider the extent to which ideas from functional programming, and associated formal verification methods, have influenced hardware design in practice. What can we learn from looking back? You might ask ``Why are methods of hardware design still important to our community?''. Maybe we should just give up? One reason for not giving up is that hardware design is really a form of parallel programming. And here there is still a lot to do! Inspired by Blelloch's wonderful invited talk at ICFP 2010, I still believe that functional programming has much to offer in the central question of how to program the parallel machines of today, and, more particularly, of the future. I will briefly present some of the areas where I think that we are poised to make great contributions. But maybe we need to work harder on getting our act together?", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Neis:2015:PCV, author = "Georg Neis and Chung-Kil Hur and Jan-Oliver Kaiser and Craig McLaughlin and Derek Dreyer and Viktor Vafeiadis", title = "{Pilsner}: a compositionally verified compiler for a higher-order imperative language", journal = j-SIGPLAN, volume = "50", number = "9", pages = "166--178", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784764", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiler verification is essential for the construction of fully verified software, but most prior work (such as CompCert) has focused on verifying whole-program compilers. To support separate compilation and to enable linking of results from different verified compilers, it is important to develop a compositional notion of compiler correctness that is modular (preserved under linking), transitive (supports multi-pass compilation), and flexible (applicable to compilers that use different intermediate languages or employ non-standard program transformations). In this paper, building on prior work of Hur et al., we develop a novel approach to compositional compiler verification based on parametric inter-language simulations (PILS). PILS are modular: they enable compiler verification in a manner that supports separate compilation. PILS are transitive: we use them to verify Pilsner, a simple (but non-trivial) multi-pass optimizing compiler (programmed in Coq) from an ML-like source language S to an assembly-like target language T, going through a CPS-based intermediate language. Pilsner is the first multi-pass compiler for a higher-order imperative language to be compositionally verified. Lastly, PILS are flexible: we use them to additionally verify (1) Zwickel, a direct non-optimizing compiler for S, and (2) a hand-coded self-modifying T module, proven correct w.r.t. an S-level specification. The output of Zwickel and the self-modifying T module can then be safely linked together with the output of Pilsner. All together, this has been a significant undertaking, involving several person-years of work and over 55,000 lines of Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Ziliani:2015:UAC, author = "Beta Ziliani and Matthieu Sozeau", title = "A unification algorithm for {Coq} featuring universe polymorphism and overloading", journal = j-SIGPLAN, volume = "50", number = "9", pages = "179--191", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784751", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Unification is a core component of every proof assistant or programming language featuring dependent types. In many cases, it must deal with higher-order problems up to conversion. Since unification in such conditions is undecidable, unification algorithms may include several heuristics to solve common problems. However, when the stack of heuristics grows large, the result and complexity of the algorithm can become unpredictable. Our contributions are twofold: (1) We present a full description of a new unification algorithm for the Calculus of Inductive Constructions (the base logic of Coq), including universe polymorphism, canonical structures (the overloading mechanism baked into Coq's unification), and a small set of useful heuristics. (2) We implemented our algorithm, and tested it on several libraries, providing evidence that the selected set of heuristics suffices for large developments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Blanchette:2015:FEC, author = "Jasmin Christian Blanchette and Andrei Popescu and Dmitriy Traytel", title = "Foundational extensible corecursion: a proof assistant perspective", journal = j-SIGPLAN, volume = "50", number = "9", pages = "192--204", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784732", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a formalized framework for defining corecursive functions safely in a total setting, based on corecursion up-to and relational parametricity. The end product is a general corecursor that allows corecursive (and even recursive) calls under ``friendly'' operations, including constructors. Friendly corecursive functions can be registered as such, thereby increasing the corecursor's expressiveness. The metatheory is formalized in the Isabelle proof assistant and forms the core of a prototype tool. The corecursor is derived from first principles, without requiring new axioms or extensions of the logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Steuwer:2015:GPP, author = "Michel Steuwer and Christian Fensch and Sam Lindley and Christophe Dubach", title = "Generating performance portable code using rewrite rules: from high-level functional expressions to high-performance {OpenCL} code", journal = j-SIGPLAN, volume = "50", number = "9", pages = "205--217", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computers have become increasingly complex with the emergence of heterogeneous hardware combining multicore CPUs and GPUs. These parallel systems exhibit tremendous computational power at the cost of increased programming effort resulting in a tension between performance and code portability. Typically, code is either tuned in a low-level imperative language using hardware-specific optimizations to achieve maximum performance or is written in a high-level, possibly functional, language to achieve portability at the expense of performance. We propose a novel approach aiming to combine high-level programming, code portability, and high-performance. Starting from a high-level functional expression we apply a simple set of rewrite rules to transform it into a low-level functional representation, close to the OpenCL programming model, from which OpenCL code is generated. Our rewrite rules define a space of possible implementations which we automatically explore to generate hardware-specific OpenCL implementations. We formalize our system with a core dependently-typed lambda-calculus along with a denotational semantics which we use to prove the correctness of the rewrite rules. We test our design in practice by implementing a compiler which generates high performance imperative OpenCL code. Our experiments show that we can automatically derive hardware-specific implementations from simple functional high-level algorithmic expressions offering performance on a par with highly tuned code for multicore CPUs and GPUs written by experts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Newton:2015:ALF, author = "Ryan R. Newton and Peter P. Fogg and Ali Varamesh", title = "Adaptive lock-free maps: purely-functional to scalable", journal = j-SIGPLAN, volume = "50", number = "9", pages = "218--229", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784734", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Purely functional data structures stored inside a mutable variable provide an excellent concurrent data structure-obviously correct, cheap to create, and supporting snapshots. They are not, however, scalable. We provide a way to retain the benefits of these pure-in-a-box data structures while dynamically converting to a more scalable lock-free data structure under contention. Our solution scales to any pair of pure and lock-free container types with key/value set semantics, while retaining lock-freedom. We demonstrate the principle in action on two very different platforms: first in the Glasgow Haskell Compiler and second in Java. To this end we extend GHC to support lock-free data structures and introduce a new approach for safe CAS in a lazy language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Le:2015:PAT, author = "Matthew Le and Matthew Fluet", title = "Partial aborts for transactions via first-class continuations", journal = j-SIGPLAN, volume = "50", number = "9", pages = "230--242", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784736", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software transactional memory (STM) has proven to be a useful abstraction for developing concurrent applications, where programmers denote transactions with an atomic construct that delimits a collection of reads and writes to shared mutable references. The runtime system then guarantees that all transactions are observed to execute atomically with respect to each other. Traditionally, when the runtime system detects that one transaction conflicts with another, it aborts one of the transactions and restarts its execution from the beginning. This can lead to problems with both execution time and throughput. In this paper, we present a novel approach that uses first-class continuations to restart a conflicting transaction at the point of a conflict, avoiding the re-execution of any work from the beginning of the transaction that has not been compromised. In practice, this allows transactions to complete more quickly, decreasing execution time and increasing throughput. We have implemented this idea in the context of the Manticore project, an ML-family language with support for parallelism and concurrency. Crucially, we rely on constant-time continuation capturing via a continuation-passing-style (CPS) transformation and heap-allocated continuations. When comparing our STM that performs partial aborts against one that performs full aborts, we achieve a decrease in execution time of up to 31\% and an increase in throughput of up to 351\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Scherer:2015:WST, author = "Gabriel Scherer and Didier R{\'e}my", title = "Which simple types have a unique inhabitant?", journal = j-SIGPLAN, volume = "50", number = "9", pages = "243--255", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784757", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study the question of whether a given type has a unique inhabitant modulo program equivalence. In the setting of simply-typed lambda-calculus with sums, equipped with the strong --equivalence, we show that uniqueness is decidable. We present a saturating focused logic that introduces irreducible cuts on positive types ``as soon as possible''. Backward search in this logic gives an effective algorithm that returns either zero, one or two distinct inhabitants for any given type. Preliminary application studies show that such a feature can be useful in strongly-typed programs, inferring the code of highly-polymorphic library functions, or ``glue code'' inside more complex terms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Dunfield:2015:EEO, author = "Joshua Dunfield", title = "Elaborating evaluation-order polymorphism", journal = j-SIGPLAN, volume = "50", number = "9", pages = "256--268", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784744", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We classify programming languages according to evaluation order: each language fixes one evaluation order as the default, making it transparent to program in that evaluation order, and troublesome to program in the other. This paper develops a type system that is impartial with respect to evaluation order. Evaluation order is implicit in terms, and explicit in types, with by-value and by-name versions of type connectives. A form of intersection type quantifies over evaluation orders, describing code that is agnostic over (that is, polymorphic in) evaluation order. By allowing such generic code, programs can express the by-value and by-name versions of a computation without code duplication. We also formulate a type system that only has by-value connectives, plus a type that generalizes the difference between by-value and by-name connectives: it is either a suspension (by name) or a ``no-op'' (by value). We show a straightforward encoding of the impartial type system into the more economical one. Then we define an elaboration from the economical language to a call-by-value semantics, and prove that elaborating a well-typed source program, where evaluation order is implicit, produces a well-typed target program where evaluation order is explicit. We also prove a simulation between evaluation of the target program and reductions (either by-value or by-name) in the source program. Finally, we prove that typing, elaboration, and evaluation are faithful to the type annotations given in the source program: if the programmer only writes by-value types, no by-name reductions can occur at run time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Rendel:2015:ARL, author = "Tillmann Rendel and Julia Trieflinger and Klaus Ostermann", title = "Automatic refunctionalization to a language with copattern matching: with applications to the expression problem", journal = j-SIGPLAN, volume = "50", number = "9", pages = "269--279", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784763", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Defunctionalization and refunctionalization establish a correspondence between first-class functions and pattern matching, but the correspondence is not symmetric: Not all uses of pattern matching can be automatically refunctionalized to uses of higher-order functions. To remedy this asymmetry, we generalize from first-class functions to arbitrary codata. This leads us to full defunctionalization and refunctionalization between a codata language based on copattern matching and a data language based on pattern matching. We observe how programs can be written as matrices so that they are modularly extensible in one dimension but not the other. In this representation, defunctionalization and refunctionalization correspond to matrix transposition which effectively changes the dimension of extensibility a program supports. This suggests applications to the expression problem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Russo:2015:FPT, author = "Alejandro Russo", title = "Functional pearl: two can keep a secret, if one of them uses {Haskell}", journal = j-SIGPLAN, volume = "50", number = "9", pages = "280--288", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784756", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For several decades, researchers from different communities have independently focused on protecting confidentiality of data. Two distinct technologies have emerged for such purposes: Mandatory Access Control (MAC) and Information-Flow Control (IFC)-the former belonging to operating systems (OS) research, while the latter to the programming languages community. These approaches restrict how data gets propagated within a system in order to avoid information leaks. In this scenario, Haskell plays a unique privileged role: it is able to protect confidentiality via libraries. This pearl presents a monadic API which statically protects confidentiality even in the presence of advanced features like exceptions, concurrency, and mutable data structures. Additionally, we present a mechanism to safely extend the library with new primitives, where library designers only need to indicate the read and write effects of new operations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Buiras:2015:HMS, author = "Pablo Buiras and Dimitrios Vytiniotis and Alejandro Russo", title = "{HLIO}: mixing static and dynamic typing for information-flow control in {Haskell}", journal = j-SIGPLAN, volume = "50", number = "9", pages = "289--301", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784758", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Information-Flow Control (IFC) is a well-established approach for allowing untrusted code to manipulate sensitive data without disclosing it. IFC is typically enforced via type systems and static analyses or via dynamic execution monitors. The LIO Haskell library, originating in operating systems research, implements a purely dynamic monitor of the sensitivity level of a computation, particularly suitable when data sensitivity levels are only known at runtime. In this paper, we show how to give programmers the flexibility of deferring IFC checks to runtime (as in LIO), while also providing static guarantees---and the absence of runtime checks---for parts of their programs that can be statically verified (unlike LIO). We present the design and implementation of our approach, HLIO (Hybrid LIO), as an embedding in Haskell that uses a novel technique for deferring IFC checks based on singleton types and constraint polymorphism. We formalize HLIO, prove non-interference, and show how interesting IFC examples can be programmed. Although our motivation is IFC, our technique for deferring constraints goes well beyond and offers a methodology for programmer-controlled hybrid type checking in Haskell.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{vanderPloeg:2015:PPF, author = "Atze van der Ploeg and Koen Claessen", title = "Practical principled {FRP}: forget the past, change the future, {FRPNow}!", journal = j-SIGPLAN, volume = "50", number = "9", pages = "302--314", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784752", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new interface for practical Functional Reactive Programming (FRP) that (1) is close in spirit to the original FRP ideas, (2) does not have the original space-leak problems, without using arrows or advanced types, and (3) provides a simple and expressive way for performing IO actions from FRP code. We also provide a denotational semantics for this new interface, and a technique (using Kripke logical relations) for reasoning about which FRP functions may ``forget their past'', i.e. which functions do not have an inherent space-leak. Finally, we show how we have implemented this interface as a Haskell library called FRPNow.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Bahr:2015:CSM, author = "Patrick Bahr and Jost Berthold and Martin Elsman", title = "Certified symbolic management of financial multi-party contracts", journal = j-SIGPLAN, volume = "50", number = "9", pages = "315--327", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784747", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Domain-specific languages (DSLs) for complex financial contracts are in practical use in many banks and financial institutions today. Given the level of automation and pervasiveness of software in the sector, the financial domain is immensely sensitive to software bugs. At the same time, there is an increasing need to analyse (and report on) the interaction between multiple parties. In this paper, we present a multi-party contract language that rigorously relegates any artefacts of simulation and computation from its core, which leads to favourable algebraic properties, and therefore allows for formalising domain-specific analyses and transformations using a proof assistant. At the centre of our formalisation is a simple denotational semantics independent of any stochastic aspects. Based on this semantics, we devise certified contract analyses and transformations. In particular, we give a type system, with an accompanying type inference procedure, that statically ensures that contracts follow the principle of causality. Moreover, we devise a reduction semantics that allows us to evolve contracts over time, in accordance with the denotational semantics. From the verified Coq definitions, we automatically extract a Haskell implementation of an embedded contract DSL along with the formally verified contract management functionality. This approach opens a road map towards more reliable contract management software, including the possibility of analysing contracts based on symbolic instead of numeric methods.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Smolka:2015:FCN, author = "Steffen Smolka and Spiridon Eliopoulos and Nate Foster and Arjun Guha", title = "A fast compiler for {NetKAT}", journal = j-SIGPLAN, volume = "50", number = "9", pages = "328--341", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784761", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High-level programming languages play a key role in a growing number of networking platforms, streamlining application development and enabling precise formal reasoning about network behavior. Unfortunately, current compilers only handle ``local'' programs that specify behavior in terms of hop-by-hop forwarding behavior, or modest extensions such as simple paths. To encode richer ``global'' behaviors, programmers must add extra state --- something that is tricky to get right and makes programs harder to write and maintain. Making matters worse, existing compilers can take tens of minutes to generate the forwarding state for the network, even on relatively small inputs. This forces programmers to waste time working around performance issues or even revert to using hardware-level APIs. This paper presents a new compiler for the NetKAT language that handles rich features including regular paths and virtual networks, and yet is several orders of magnitude faster than previous compilers. The compiler uses symbolic automata to calculate the extra state needed to implement ``global'' programs, and an intermediate representation based on binary decision diagrams to dramatically improve performance. We describe the design and implementation of three essential compiler stages: from virtual programs (which specify behavior in terms of virtual topologies) to global programs (which specify network-wide behavior in terms of physical topologies), from global programs to local programs (which specify behavior in terms of single-switch behavior), and from local programs to hardware-level forwarding tables. We present results from experiments on real-world benchmarks that quantify performance in terms of compilation time and forwarding table size.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Stucki:2015:RVP, author = "Nicolas Stucki and Tiark Rompf and Vlad Ureche and Phil Bagwell", title = "{RRB} vector: a practical general purpose immutable sequence", journal = j-SIGPLAN, volume = "50", number = "9", pages = "342--354", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784739", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "State-of-the-art immutable collections have wildly differing performance characteristics across their operations, often forcing programmers to choose different collection implementations for each task. Thus, changes to the program can invalidate the choice of collections, making code evolution costly. It would be desirable to have a collection that performs well for a broad range of operations. To this end, we present the RRB-Vector, an immutable sequence collection that offers good performance across a large number of sequential and parallel operations. The underlying innovations are: (1) the Relaxed-Radix-Balanced (RRB) tree structure, which allows efficient structural reorganization, and (2) an optimization that exploits spatio-temporal locality on the RRB data structure in order to offset the cost of traversing the tree. In our benchmarks, the RRB-Vector speedup for parallel operations is lower bounded by 7x when executing on 4 CPUs of 8 cores each. The performance for discrete operations, such as appending on either end, or updating and removing elements, is consistently good and compares favorably to the most important immutable sequence collections in the literature and in use today. The memory footprint of RRB-Vector is on par with arrays and an order of magnitude less than competing collections.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Jaskelioff:2015:FPS, author = "Mauro Jaskelioff and Exequiel Rivas", title = "Functional pearl: a smart view on datatypes", journal = j-SIGPLAN, volume = "50", number = "9", pages = "355--361", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784743", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Left-nested list concatenations, left-nested binds on the free monad, and left-nested choices in many non-determinism monads have an algorithmically bad performance. Can we solve this problem without losing the ability to pattern-match on the computation? Surprisingly, there is a deceptively simple solution: use a smart view to pattern-match on the datatype. We introduce the notion of smart view and show how it solves the problem of slow left-nested operations. In particular, we use the technique to obtain fast and simple implementations of lists, of free monads, and of two non-determinism monads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Yang:2015:ECC, author = "Edward Z. Yang and Giovanni Campagna and {\"O}mer S. Agacan and Ahmed El-Hassany and Abhishek Kulkarni and Ryan R. Newton", title = "Efficient communication and collection with compact normal forms", journal = j-SIGPLAN, volume = "50", number = "9", pages = "362--374", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784735", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In distributed applications, the transmission of non-contiguous data structures is greatly slowed down by the need to serialize them into a buffer before sending. We describe Compact Normal Forms, an API that allows programmers to explicitly place immutable heap objects into regions, which can both be accessed like ordinary data as well as efficiently transmitted over the network. The process of placing objects into compact regions (essentially a copy) is faster than any serializer and can be amortized over a series of functional updates to the data structure in question. We implement this scheme in the Glasgow Haskell Compiler and show that even with the space expansion attendant with memory-oriented data structure representations, we achieve between x2 and x4 speedups on fast local networks with sufficiently large data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Keil:2015:BAH, author = "Matthias Keil and Peter Thiemann", title = "Blame assignment for higher-order contracts with intersection and union", journal = j-SIGPLAN, volume = "50", number = "9", pages = "375--386", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784737", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an untyped calculus of blame assignment for a higher-order contract system with two new operators: intersection and union. The specification of these operators is based on the corresponding type theoretic constructions. This connection makes intersection and union contracts their inevitable dynamic counterparts with a range of desirable properties and makes them suitable for subsequent integration in a gradual type system. A denotational specification provides the semantics of a contract in terms of two sets: a set of terms satisfying the contract and a set of contexts respecting the contract. This kind of specification for contracts is novel and interesting in its own right. A nondeterministic operational semantics serves as the specification for contract monitoring and for proving its correctness. It is complemented by a deterministic semantics that is closer to an implementation and that is connected to the nondeterministic semantics by simulation. The calculus is the formal basis of TJS, a language embedded, higher-order contract system implemented for JavaScript.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Swords:2015:ECM, author = "Cameron Swords and Amr Sabry and Sam Tobin-Hochstadt", title = "Expressing contract monitors as patterns of communication", journal = j-SIGPLAN, volume = "50", number = "9", pages = "387--399", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784742", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new approach to contract semantics which expresses myriad monitoring strategies using a small core of foundational communication primitives. This approach allows multiple existing contract monitoring approaches, ranging from Findler and Felleisen's original model of higher-order contracts to semi-eager, parallel, or asynchronous monitors, to be expressed in a single language built on well-understood constructs. We prove that this approach accurately simulates the original semantics of higher-order contracts. A straightforward implementation in Racket demonstrates the practicality of our approach which not only enriches existing Racket monitoring strategies, but also support a new style of monitoring in which collections of contracts collaborate to establish a global invariant.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Zhu:2015:LRT, author = "He Zhu and Aditya V. Nori and Suresh Jagannathan", title = "Learning refinement types", journal = j-SIGPLAN, volume = "50", number = "9", pages = "400--411", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784766", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose the integration of a random test generation system (capable of discovering program bugs) and a refinement type system (capable of expressing and verifying program invariants), for higher-order functional programs, using a novel lightweight learning algorithm as an effective intermediary between the two. Our approach is based on the well-understood intuition that useful, but difficult to infer, program properties can often be observed from concrete program states generated by tests; these properties act as likely invariants, which if used to refine simple types, can have their validity checked by a refinement type checker. We describe an implementation of our technique for a variety of benchmarks written in ML, and demonstrate its effectiveness in inferring and proving useful invariants for programs that express complex higher-order control and dataflow.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Pavlinovic:2015:PSB, author = "Zvonimir Pavlinovic and Tim King and Thomas Wies", title = "Practical {SMT}-based type error localization", journal = j-SIGPLAN, volume = "50", number = "9", pages = "412--423", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784765", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compilers for statically typed functional programming languages are notorious for generating confusing type error messages. When the compiler detects a type error, it typically reports the program location where the type checking failed as the source of the error. Since other error sources are not even considered, the actual root cause is often missed. A more adequate approach is to consider all possible error sources and report the most useful one subject to some usefulness criterion. In our previous work, we showed that this approach can be formulated as an optimization problem related to satisfiability modulo theories (SMT). This formulation cleanly separates the heuristic nature of usefulness criteria from the underlying search problem. Unfortunately, algorithms that search for an optimal error source cannot directly use principal types which are crucial for dealing with the exponential-time complexity of the decision problem of polymorphic type checking. In this paper, we present a new algorithm that efficiently finds an optimal error source in a given ill-typed program. Our algorithm uses an improved SMT encoding to cope with the high complexity of polymorphic typing by iteratively expanding the typing constraints from which principal types are derived. The algorithm preserves the clean separation between the heuristics and the actual search. We have implemented our algorithm for OCaml. In our experimental evaluation, we found that the algorithm reduces the running times for optimal type error localization from minutes to seconds and scales better than previous localization algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Karachalias:2015:GMT, author = "Georgios Karachalias and Tom Schrijvers and Dimitrios Vytiniotis and Simon Peyton Jones", title = "{GADTs} meet their match: pattern-matching warnings that account for {GADTs}, guards, and laziness", journal = j-SIGPLAN, volume = "50", number = "9", pages = "424--436", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784748", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "For ML and Haskell, accurate warnings when a function definition has redundant or missing patterns are mission critical. But today's compilers generate bogus warnings when the programmer uses guards (even simple ones), GADTs, pattern guards, or view patterns. We give the first algorithm that handles all these cases in a single, uniform framework, together with an implementation in GHC, and evidence of its utility in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '15 conference proceedings.", } @Article{Hague:2015:DRC, author = "Matthew Hague and Anthony W. Lin and C.-H. Luke Ong", title = "Detecting redundant {CSS} rules in {HTML5} applications: a tree rewriting approach", journal = j-SIGPLAN, volume = "50", number = "10", pages = "1--19", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814288", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "HTML5 applications normally have a large set of CSS (Cascading Style Sheets) rules for data display. Each CSS rule consists of a node selector and a declaration block (which assigns values to selected nodes' display attributes). As web applications evolve, maintaining CSS files can easily become problematic. Some CSS rules will be replaced by new ones, but these obsolete (hence redundant) CSS rules often remain in the applications. Not only does this ``bloat'' the applications --- increasing the bandwidth requirement --- but it also significantly increases web browsers' processing time. Most works on detecting redundant CSS rules in HTML5 applications do not consider the dynamic behaviours of HTML5 (specified in JavaScript); in fact, the only proposed method that takes these into account is dynamic analysis, which cannot soundly prove redundancy of CSS rules. In this paper, we introduce an abstraction of HTML5 applications based on monotonic tree-rewriting and study its ``redundancy problem''. We establish the precise complexity of the problem and various subproblems of practical importance (ranging from P to EXP). In particular, our algorithm relies on an efficient reduction to an analysis of symbolic pushdown systems (for which highly optimised solvers are available), which yields a fast method for checking redundancy in practice. We implemented our algorithm and demonstrated its efficacy in detecting redundant CSS rules in HTML5 applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Demsky:2015:SSD, author = "Brian Demsky and Patrick Lam", title = "{SATCheck}: {SAT}-directed stateless model checking for {SC} and {TSO}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "20--36", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814297", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing low-level concurrent code is well known to be challenging and error prone. The widespread deployment of multi-core hardware and the shift towards using low-level concurrent data structures has moved the problem into the mainstream. Finding bugs in such code may require finding a specific bug-revealing thread interleaving out of a huge space of parallel executions. Model-checking is a powerful technique for exhaustively testing code. However, scaling model checking presents a significant challenge. In this paper we present a new and more scalable technique for model checking concurrent code, based on concrete execution. Our technique observes concrete behaviors, builds a model of these behaviors, encodes the model in SAT, and leverages SAT solver technology to find executions that reveal new behaviors. It then runs the new execution, incorporates the newly observed behavior, and repeats the process until it has explored all reachable behaviors. We have implemented a prototype of our approach in the SATCheck tool. Our tool supports both the Total Store Ordering (TSO) and Sequentially Consistent (SC) memory models. We evaluate SATCheck by testing several concurrent data structure implementations and comparing its performance to the original DPOR stateless model checking algorithm implemented in CDSChecker, the source DPOR algorithm implemented in Nidhugg, and CheckFence. Our experiments show that SATCheck scales better than previous approaches while at the same time operating on concrete executions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Kuraj:2015:PES, author = "Ivan Kuraj and Viktor Kuncak and Daniel Jackson", title = "Programming with enumerable sets of structures", journal = j-SIGPLAN, volume = "50", number = "10", pages = "37--56", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814323", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an efficient, modular, and feature-rich framework for automated generation and validation of complex structures, suitable for tasks that explore a large space of structured values. Our framework is capable of exhaustive, incremental, parallel, and memoized enumeration from not only finite but also infinite domains, while providing fine-grained control over the process. Furthermore, the framework efficiently supports the inverse of enumeration (checking whether a structure can be generated and fast-forwarding to this structure to continue the enumeration) and lazy enumeration (achieving exhaustive testing without generating all structures). The foundation of efficient enumeration lies in both direct access to encoded structures, achieved with well-known and new pairing functions, and dependent enumeration, which embeds constraints into the enumeration to avoid backtracking. Our framework defines an algebra of enumerators, with combinators for their composition that preserve exhaustiveness and efficiency. We have implemented our framework as a domain-specific language in Scala. Our experiments demonstrate better performance and shorter specifications by up to a few orders of magnitude compared to existing approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Jensen:2015:SMC, author = "Casper S. Jensen and Anders M{\o}ller and Veselin Raychev and Dimitar Dimitrov and Martin Vechev", title = "Stateless model checking of event-driven applications", journal = j-SIGPLAN, volume = "50", number = "10", pages = "57--73", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814282", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern event-driven applications, such as, web pages and mobile apps, rely on asynchrony to ensure smooth end-user experience. Unfortunately, even though these applications are executed by a single event-loop thread, they can still exhibit nondeterministic behaviors depending on the execution order of interfering asynchronous events. As in classic shared-memory concurrency, this nondeterminism makes it challenging to discover errors that manifest only in specific schedules of events. In this work we propose the first stateless model checker for event-driven applications, called R4. Our algorithm systematically explores the nondeterminism in the application and concisely exposes its overall effect, which is useful for bug discovery. The algorithm builds on a combination of three key insights: (i) a dynamic partial order reduction (DPOR) technique for reducing the search space, tailored to the domain of event-driven applications, (ii) conflict-reversal bounding based on a hypothesis that most errors occur with a small number of event reorderings, and (iii) approximate replay of event sequences, which is critical for separating harmless from harmful nondeterminism. We instantiate R4 for the domain of client-side web applications and use it to analyze event interference in a number of real-world programs. The experimental results indicate that the precision and overall exploration capabilities of our system significantly exceed that of existing techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Hottelier:2015:SLE, author = "Thibaud Hottelier and Rastislav Bodik", title = "Synthesis of layout engines from relational constraints", journal = j-SIGPLAN, volume = "50", number = "10", pages = "74--88", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814291", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an algorithm for synthesizing efficient document layout engines from compact relational specifications. These specifications are compact in that a single specification can produce multiple engines, each for a distinct layout situation, i.e., a different combination of known vs. unknown attributes. Technically, our specifications are relational attribute grammars, while our engines are functional attribute grammars. By synthesizing functions from relational constraints, we obviate the need for constraint solving at runtime, because functional attribute grammars can be easily evaluated according to a fixed schedule, sidestepping the backtracking search performed by constraint solvers. Our experiments show that we can generate layout engines for non-trivial data visualizations, and that our synthesized engines are between 39- and 200-times faster than general-purpose constraint solvers. Relational specifications of layout give rise to synthesis problems that have previously proved intractable. Our algorithm exploits the hierarchical, grammar-based structure of the specification, decomposing the specification into smaller subproblems, which can be tackled with off-the-shelf synthesis procedures. The new synthesis problem then becomes the composition of the functions thus generated into a correct attribute grammar, which might be recursive. We show how to solve this problem by efficient reduction to an SMT problem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Erdweg:2015:SOI, author = "Sebastian Erdweg and Moritz Lichter and Manuel Weiel", title = "A sound and optimal incremental build system with dynamic dependencies", journal = j-SIGPLAN, volume = "50", number = "10", pages = "89--106", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814316", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Build systems are used in all but the smallest software projects to invoke the right build tools on the right files in the right order. A build system must be sound (after a build, generated files consistently reflect the latest source files) and efficient (recheck and rebuild as few build units as possible). Contemporary build systems provide limited efficiency because they lack support for expressing fine-grained file dependencies. We present a build system called pluto that supports the definition of reusable, parameterized, interconnected builders. When run, a builder notifies the build system about dynamically required and produced files as well as about other builders whose results are needed. To support fine-grained file dependencies, we generalize the traditional notion of time stamps to allow builders to declare their actual requirements on a file's content. pluto collects the requirements and products of a builder with their stamps in a build summary. This enables pluto to provides provably sound and optimal incremental rebuilding. To support dynamic dependencies, our rebuild algorithm interleaves dependency analysis and builder execution and enforces invariants on the dependency graph through a dynamic analysis. We have developed pluto as a Java API and used it to implement more than 25 builders. We describe our experience with migrating a larger Ant build script to pluto and compare the respective build times.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Polozov:2015:FFI, author = "Oleksandr Polozov and Sumit Gulwani", title = "{FlashMeta}: a framework for inductive program synthesis", journal = j-SIGPLAN, volume = "50", number = "10", pages = "107--126", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814310", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Inductive synthesis, or programming-by-examples (PBE) is gaining prominence with disruptive applications for automating repetitive tasks in end-user programming. However, designing, developing, and maintaining an effective industrial-quality inductive synthesizer is an intellectual and engineering challenge, requiring 1-2 man-years of effort. Our novel observation is that many PBE algorithms are a natural fall-out of one generic meta-algorithm and the domain-specific properties of the operators in the underlying domain-specific language (DSL). The meta-algorithm propagates example-based constraints on an expression to its subexpressions by leveraging associated witness functions, which essentially capture the inverse semantics of the underlying operator. This observation enables a novel program synthesis methodology called data-driven domain-specific deduction (D4), where domain-specific insight, provided by the DSL designer, is separated from the synthesis algorithm. Our FlashMeta framework implements this methodology, allowing synthesizer developers to generate an efficient synthesizer from the mere DSL definition (if properties of the DSL operators have been modeled). In our case studies, we found that 10+ existing industrial-quality mass-market applications based on PBE can be cast as instances of D4. Our evaluation includes reimplementation of some prior works, which in FlashMeta become more efficient, maintainable, and extensible. As a result, FlashMeta-based PBE tools are deployed in several industrial products, including Microsoft PowerShell 3.0 for Windows 10, Azure Operational Management Suite, and Microsoft Cortana digital assistant.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Zhang:2015:SYB, author = "Haoyuan Zhang and Zewei Chu and Bruno C. d. S. Oliveira and Tijs van der Storm", title = "Scrap your boilerplate with object algebras", journal = j-SIGPLAN, volume = "50", number = "10", pages = "127--146", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814279", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traversing complex Abstract Syntax Trees (ASTs) typically requires large amounts of tedious boilerplate code. For many operations most of the code simply walks the structure, and only a small portion of the code implements the functionality that motivated the traversal in the first place. This paper presents a type-safe Java framework called Shy that removes much of this boilerplate code. In Shy object algebras are used to describe complex and extensible AST structures. Using Java annotations Shy generates generic boilerplate code for various types of traversals. For a concrete traversal, users of Shy can then inherit from the generated code and override only the interesting cases. Consequently, the amount of code that users need to write is significantly smaller. Moreover, traversals using the Shy framework are also much more structure shy, becoming more adaptive to future changes or extensions to the AST structure. To prove the effectiveness of the approach, we applied Shy in the implementation of a domain-specific questionnaire language. Our results show that for a large number of traversals there was a significant reduction in the amount of user-defined code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Sharma:2015:CCS, author = "Rahul Sharma and Eric Schkufza and Berkeley Churchill and Alex Aiken", title = "Conditionally correct superoptimization", journal = j-SIGPLAN, volume = "50", number = "10", pages = "147--162", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814278", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The aggressive optimization of heavily used kernels is an important problem in high-performance computing. However, both general purpose compilers and highly specialized tools such as superoptimizers often do not have sufficient static knowledge of restrictions on program inputs that could be exploited to produce the very best code. For many applications, the best possible code is conditionally correct: the optimized kernel is equal to the code that it replaces only under certain preconditions on the kernel's inputs. The main technical challenge in producing conditionally correct optimizations is in obtaining non-trivial and useful conditions and proving conditional equivalence formally in the presence of loops. We combine abstract interpretation, decision procedures, and testing to yield a verification strategy that can address both of these problems. This approach yields a superoptimizer for x86 that in our experiments produces binaries that are often multiple times faster than those produced by production compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Blackshear:2015:SCF, author = "Sam Blackshear and Bor-Yuh Evan Chang and Manu Sridharan", title = "Selective control-flow abstraction via jumping", journal = j-SIGPLAN, volume = "50", number = "10", pages = "163--182", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814293", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present jumping, a form of selective control-flow abstraction useful for improving the scalability of goal-directed static analyses. Jumping is useful for analyzing programs with complex control-flow such as event-driven systems. In such systems, accounting for orderings between certain events is important for precision, yet analyzing the product graph of all possible event orderings is intractable. Jumping solves this problem by allowing the analysis to selectively abstract away control-flow between events irrelevant to a goal query while preserving information about the ordering of relevant events. We present a framework for designing sound jumping analyses and create an instantiation of the framework for performing precise inter-event analysis of Android applications. Our experimental evaluation showed that using jumping to augment a precise goal-directed analysis with inter-event reasoning enabled our analysis to prove 90-97\% of dereferences safe across our benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Madhavan:2015:AGC, author = "Ravichandhran Madhavan and Mika{\"e}l Mayer and Sumit Gulwani and Viktor Kuncak", title = "Automating grammar comparison", journal = j-SIGPLAN, volume = "50", number = "10", pages = "183--200", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814304", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider from a practical perspective the problem of checking equivalence of context-free grammars. We present techniques for proving equivalence, as well as techniques for finding counter-examples that establish non-equivalence. Among the key building blocks of our approach is a novel algorithm for efficiently enumerating and sampling words and parse trees from arbitrary context-free grammars; the algorithm supports polynomial time random access to words belonging to the grammar. Furthermore, we propose an algorithm for proving equivalence of context-free grammars that is complete for LL grammars, yet can be invoked on any context-free grammar, including ambiguous grammars. Our techniques successfully find discrepancies between different syntax specifications of several real-world languages, and are capable of detecting fine-grained incremental modifications performed on grammars. Our evaluation shows that our tool improves significantly on the existing available state of the art tools. In addition, we used these algorithms to develop an online tutoring system for grammars that we then used in an undergraduate course on computer language processing. On questions involving grammar constructions, our system was able to automatically evaluate the correctness of 95\% of the solutions submitted by students: it disproved 74\% of cases and proved 21\% of them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Ntzik:2015:RAP, author = "Gian Ntzik and Philippa Gardner", title = "Reasoning about the {POSIX} file system: local update and global pathnames", journal = j-SIGPLAN, volume = "50", number = "10", pages = "201--220", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814306", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a program logic for specifying a core sequential subset of the POSIX file system and for reasoning abstractly about client programs working with the file system. The challenge is to reason about the combination of local directory update and global pathname traversal (including '..' and symbolic links) which may overlap the directories being updated. Existing reasoning techniques are either based on first-order logic and do not scale, or on separation logic and can only handle linear pathnames (no '..' or symbolic links). We introduce fusion logic for reasoning about local update and global pathname traversal, introducing a novel effect frame rule to propagate the effect of a local update on overlapping pathnames. We apply our reasoning to the standard recursive remove utility ({\tt rm -r}), discovering bugs in well-known implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Ou:2015:AAI, author = "Peizhao Ou and Brian Demsky", title = "{AutoMO}: automatic inference of memory order parameters for {C\slash C++11}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "221--240", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814286", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many concurrent data structures are initially designed for the sequential consistency (SC) memory model. Developers often implement these data structures on real-world systems with weaker memory models by adding sufficient fences to ensure that their implementation on the weak memory model exhibits the same executions as the SC memory model. Recently, the C11 and C++11 standards have added a weak memory model to the C and C++ languages. Developing and debugging code for weak memory models can be extremely challenging. We present AutoMO, a framework to support porting data structures designed for the SC memory model to the C/C++11 memory model. AutoMO provides support across the porting process: (1) it automatically infers initial settings for the memory order parameters, (2) it detects whether a C/C++11 execution is equivalent to some SC execution, and (3) it simplifies traces to make them easier to understand. We have used AutoMO to successfully infer memory order parameters for a range of data structures and to check whether executions of several concurrent data structure implementations are SC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Biswas:2015:VES, author = "Swarnendu Biswas and Minjia Zhang and Michael D. Bond and Brandon Lucia", title = "{Valor}: efficient, software-only region conflict exceptions", journal = j-SIGPLAN, volume = "50", number = "10", pages = "241--259", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814292", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data races complicate programming language semantics, and a data race is often a bug. Existing techniques detect data races and define their semantics by detecting conflicts between synchronization-free regions (SFRs). However, such techniques either modify hardware or slow programs dramatically, preventing always-on use today. This paper describes Valor, a sound, precise, software-only region conflict detection analysis that achieves high performance by eliminating the costly analysis on each read operation that prior approaches require. Valor instead logs a region's reads and lazily detects conflicts for logged reads when the region ends. As a comparison, we have also developed FastRCD, a conflict detector that leverages the epoch optimization strategy of the FastTrack data race detector. We evaluate Valor, FastRCD, and FastTrack, showing that Valor dramatically outperforms FastRCD and FastTrack. Valor is the first region conflict detector to provide strong semantic guarantees for racy program executions with under 2X slowdown. Overall, Valor advances the state of the art in always-on support for strong behavioral guarantees for data races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Cohen:2015:AMR, author = "Nachshon Cohen and Erez Petrank", title = "Automatic memory reclamation for lock-free data structures", journal = j-SIGPLAN, volume = "50", number = "10", pages = "260--279", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814298", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lock-free data-structures are widely employed in practice, yet designing lock-free memory reclamation for them is notoriously difficult. In particular, all known lock-free reclamation schemes are ``manual'' in the sense that the developer has to specify when nodes have retired and may be reclaimed. Retiring nodes adequately is non-trivial and often requires the modification of the original lock-free algorithm. In this paper we present an automatic lock-free reclamation scheme for lock-free data-structures in the spirit of a mark-sweep garbage collection. The proposed algorithm works with any normalized lock-free algorithm and with no need for the programmer to retire nodes or make changes to the algorithm. Evaluation of the proposed scheme on a linked-list and a hash table shows that it performs similarly to the best manual (lock-free) memory reclamation scheme.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Lopez:2015:PBV, author = "Hugo A. L{\'o}pez and Eduardo R. B. Marques and Francisco Martins and Nicholas Ng and C{\'e}sar Santos and Vasco Thudichum Vasconcelos and Nobuko Yoshida", title = "Protocol-based verification of message-passing parallel programs", journal = j-SIGPLAN, volume = "50", number = "10", pages = "280--298", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814302", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present ParTypes, a type-based methodology for the verification of Message Passing Interface (MPI) programs written in the C programming language. The aim is to statically verify programs against protocol specifications, enforcing properties such as fidelity and absence of deadlocks. We develop a protocol language based on a dependent type system for message-passing parallel programs, which includes various communication operators, such as point-to-point messages, broadcast, reduce, array scatter and gather. For the verification of a program against a given protocol, the protocol is first translated into a representation read by VCC, a software verifier for C. We successfully verified several MPI programs in a running time that is independent of the number of processes or other input parameters. This contrasts with alternative techniques, notably model checking and runtime verification, that suffer from the state-explosion problem or that otherwise depend on parameters to the program itself. We experimentally evaluated our approach against state-of-the-art tools for MPI to conclude that our approach offers a scalable solution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Bastani:2015:IVA, author = "Osbert Bastani and Saswat Anand and Alex Aiken", title = "Interactively verifying absence of explicit information flows in {Android} apps", journal = j-SIGPLAN, volume = "50", number = "10", pages = "299--315", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814274", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "App stores are increasingly the preferred mechanism for distributing software, including mobile apps (Google Play), desktop apps (Mac App Store and Ubuntu Software Center), computer games (the Steam Store), and browser extensions (Chrome Web Store). The centralized nature of these stores has important implications for security. While app stores have unprecedented ability to audit apps, users now trust hosted apps, making them more vulnerable to malware that evades detection and finds its way onto the app store. Sound static explicit information flow analysis has the potential to significantly aid human auditors, but it is handicapped by high false positive rates. Instead, auditors currently rely on a combination of dynamic analysis (which is unsound) and lightweight static analysis (which cannot identify information flows) to help detect malicious behaviors. We propose a process for producing apps certified to be free of malicious explicit information flows. In practice, imprecision in the reachability analysis is a major source of false positive information flows that are difficult to understand and discharge. In our approach, the developer provides tests that specify what code is reachable, allowing the static analysis to restrict its search to tested code. The app hosted on the store is instrumented to enforce the provided specification (i.e., executing untested code terminates the app). We use abductive inference to minimize the necessary instrumentation, and then interact with the developer to ensure that the instrumentation only cuts unreachable code. We demonstrate the effectiveness of our approach in verifying a corpus of 77 Android apps-our interactive verification process successfully discharges 11 out of the 12 false positives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Brutschy:2015:SGD, author = "Lucas Brutschy and Pietro Ferrara and Omer Tripp and Marco Pistoia", title = "{ShamDroid}: gracefully degrading functionality in the presence of limited resource access", journal = j-SIGPLAN, volume = "50", number = "10", pages = "316--331", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814296", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Given a program whose functionality depends on access to certain external resources, we investigate the question of how to gracefully degrade functionality when a subset of those resources is unavailable. The concrete setting motivating this problem statement is mobile applications, which rely on contextual data (e.g., device identifiers, user location and contacts, etc.) to fulfill their functionality. In particular, we focus on the Android platform, which mediates access to resources via an installation-time permission model. On the one hand, granting an app the permission to access a resource (e.g., the device ID) entails privacy threats (e.g., releasing the device ID to advertising servers). On the other hand, denying access to a resource could render the app useless (e.g., if inability to read the device ID is treated as an error state). Our goal is to specialize an existing Android app in such a way that it is disabled from accessing certain sensitive resources (or contextual data) as specified by the user, while still being able to execute functionality that does not depend on those resources. We present ShamDroid, a program transformation algorithm, based on specialized forms of program slicing, backwards static analysis and constraint solving, that enables the use of Android apps with partial permissions. We rigorously state the guarantees provided by ShamDroid w.r.t. functionality maximization. We provide an evaluation over the top 500 Google Play apps and report on an extensive comparative evaluation of ShamDroid against three other state-of-the-art solutions (APM, XPrivacy, and Google App Ops) that mediate resource access at the system (rather than app) level. ShamDroid performs better than all of these tools by a significant margin, leading to abnormal behavior in only 1 out of 27 apps we manually investigated, compared to the other solutions, which cause crashes and abnormalities in 9 or more of the apps. This demonstrates the importance of performing app-sensitive mocking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Bielik:2015:SRD, author = "Pavol Bielik and Veselin Raychev and Martin Vechev", title = "Scalable race detection for {Android} applications", journal = j-SIGPLAN, volume = "50", number = "10", pages = "332--348", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814303", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a complete end-to-end dynamic analysis system for finding data races in mobile Android applications. The capabilities of our system significantly exceed the state of the art: our system can analyze real-world application interactions in minutes rather than hours, finds errors inherently beyond the reach of existing approaches, while still (critically) reporting very few false positives. Our system is based on three key concepts: (i) a thorough happens-before model of Android-specific concurrency, (ii) a scalable analysis algorithm for efficiently building and querying the happens-before graph, and (iii) an effective set of domain-specific filters that reduce the number of reported data races by several orders of magnitude. We evaluated the usability and performance of our system on 354 real-world Android applications (e.g., Facebook). Our system analyzes a minute of end-user interaction with the application in about 24 seconds, while current approaches take hours to complete. Inspecting the results for 8 large open-source applications revealed 15 harmful bugs of diverse kinds. Some of the bugs we reported were confirmed and fixed by developers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Hu:2015:VYL, author = "Yongjian Hu and Tanzirul Azim and Iulian Neamtiu", title = "Versatile yet lightweight record-and-replay for {Android}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "349--366", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814320", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recording and replaying the execution of smartphone apps is useful in a variety of contexts, from reproducing bugs to profiling and testing. Achieving effective record-and-replay is a balancing act between accuracy and overhead. On smartphones, the act is particularly complicated, because smartphone apps receive a high-bandwidth stream of input (e.g., network, GPS, camera, microphone, touchscreen) and concurrency events, but the stream has to be recorded and replayed with minimal overhead, to avoid interfering with app execution. Prior record-and-replay approaches have focused on replaying machine instructions or system calls, which is not a good fit on smartphones. We propose a novel, stream-oriented record-and-replay approach which achieves high-accuracy and low-overhead by aiming at a sweet spot: recording and replaying sensor and network input, event schedules, and inter-app communication via intents. To demonstrate the versatility of our approach, we have constructed a tool named VALERA that supports record-and-replay on the Android platform. VALERA works with apps running directly on the phone, and does not require access to the app source code. Through an evaluation on 50 popular Android apps, we show that: VALERA's replay fidelity far exceeds current record-and-replay approaches for Android; VALERA's precise timing control and low overhead (about 1\% for either record or replay) allows it to replay high-throughput, timing-sensitive apps such as video/audio capture and recognition; and VALERA's support for event schedule replay enables the construction of useful analyses, such as reproducing event-driven race bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Bender:2015:DFI, author = "John Bender and Mohsen Lesani and Jens Palsberg", title = "Declarative fence insertion", journal = j-SIGPLAN, volume = "50", number = "10", pages = "367--385", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814318", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Previous work has shown how to insert fences that enforce sequential consistency. However, for many concurrent algorithms, sequential consistency is unnecessarily strong and can lead to high execution overhead. The reason is that, often, correctness relies on the execution order of a few specific pairs of instructions. Algorithm designers can declare those execution orders and thereby enable memory-model-independent reasoning about correctness and also ease implementation of algorithms on multiple platforms. The literature has examples of such reasoning, while tool support for enforcing the orders has been lacking until now. In this paper we present a declarative approach to specify and enforce execution orders. Our fence insertion algorithm first identifies the execution orders that a given memory model enforces automatically, and then inserts fences that enforce the rest. Our benchmarks include three off-the-shelf transactional memory algorithms written in C/C++ for which we specify suitable execution orders. For those benchmarks, our experiments with the x86 and ARMv7 memory models show that our tool inserts fences that are competitive with those inserted by the original authors. Our tool is the first to insert fences into transactional memory algorithms and it solves the long-standing problem of how to easily port such algorithms to a novel memory model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Le:2015:FDC, author = "Vu Le and Chengnian Sun and Zhendong Su", title = "Finding deep compiler bugs via guided stochastic program mutation", journal = j-SIGPLAN, volume = "50", number = "10", pages = "386--399", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814319", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiler testing is important and challenging. Equivalence Modulo Inputs (EMI) is a recent promising approach for compiler validation. It is based on mutating the unexecuted statements of an existing program under some inputs to produce new equivalent test programs w.r.t. these inputs. Orion is a simple realization of EMI by only randomly deleting unexecuted statements. Despite its success in finding many bugs in production compilers, Orion's effectiveness is still limited by its simple, blind mutation strategy. To more effectively realize EMI, this paper introduces a guided, advanced mutation strategy based on Bayesian optimization. Our goal is to generate diverse programs to more thoroughly exercise compilers. We achieve this with two techniques: (1) the support of both code deletions and insertions in the unexecuted regions, leading to a much larger test program space; and (2) the use of an objective function that promotes control-flow-diverse programs for guiding Markov Chain Monte Carlo (MCMC) optimization to explore the search space. Our technique helps discover deep bugs that require elaborate mutations. Our realization, Athena, targets C compilers. In 19 months, Athena has found 72 new bugs --- many of which are deep and important bugs --- in GCC and LLVM. Developers have confirmed all 72 bugs and fixed 68 of them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Wang:2015:VAR, author = "Haichuan Wang and David Padua and Peng Wu", title = "Vectorization of {Apply} to reduce interpretation overhead of {R}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "400--415", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814273", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "R is a popular dynamic language designed for statistical computing. Despite R's huge user base, the inefficiency in R's language implementation becomes a major pain-point in everyday use as well as an obstacle to apply R to solve large scale analytics problems. The two most common approaches to improve the performance of dynamic languages are: implementing more efficient interpretation strategies and extending the interpreter with Just-In-Time (JIT) compiler. However, both approaches require significant changes to the interpreter, and complicate the adoption by development teams as a result. This paper presents a new approach to improve execution efficiency of R programs by vectorizing the widely used Apply class of operations. Apply accepts two parameters: a function and a collection of input data elements. The standard implementation of Apply iteratively invokes the input function with each element in the data collection. Our approach combines data transformation and function vectorization to convert the looping-over-data execution of the standard Apply into a single invocation of a vectorized function that contains a sequence of vector operations over the input data. This conversion can significantly speed-up the execution of Apply operations in R by reducing the number of interpretation steps. We implemented the vectorization transformation as an R package. To enable the optimization, all that is needed is to invoke the package, and the user can use a normal R interpreter without any changes. The evaluation shows that the proposed method delivers significant performance improvements for a collection of data analysis algorithm benchmarks. This is achieved without any native code generation and using only a single-thread of execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Gvero:2015:SJE, author = "Tihomir Gvero and Viktor Kuncak", title = "Synthesizing {Java} expressions from free-form queries", journal = j-SIGPLAN, volume = "50", number = "10", pages = "416--432", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814295", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new code assistance tool for integrated development environments. Our system accepts as input free-form queries containing a mixture of English and Java, and produces Java code expressions that take the query into account and respect syntax, types, and scoping rules of Java, as well as statistical usage patterns. In contrast to solutions based on code search, the results returned by our tool need not directly correspond to any previously seen code fragment. As part of our system we have constructed a probabilistic context free grammar for Java constructs and library invocations, as well as an algorithm that uses a customized natural language processing tool chain to extract information from free-form text queries. We present the results on a number of examples showing that our technique (1) often produces the expected code fragments, (2) tolerates much of the flexibility of natural language, and (3) can repair incorrect Java expressions that use, for example, the wrong syntax or missing arguments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Zheng:2015:APP, author = "Yudi Zheng and Lubom{\'\i}r Bulej and Walter Binder", title = "Accurate profiling in the presence of dynamic compilation", journal = j-SIGPLAN, volume = "50", number = "10", pages = "433--450", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814281", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many profilers based on bytecode instrumentation yield wrong results in the presence of an optimizing dynamic compiler, either due to not being aware of optimizations such as stack allocation and method inlining, or due to the inserted code disrupting such optimizations. To avoid such perturbations, we present a novel technique to make any profiler implemented at the bytecode level aware of optimizations performed by the dynamic compiler. We implement our approach in a state-of-the-art Java virtual machine and demonstrate its significance with concrete profilers. We quantify the impact of escape analysis on allocation profiling, object life-time analysis, and the impact of method inlining on callsite profiling. We illustrate how our approach enables new kinds of profilers, such as a profiler for non-inlined callsites, and a testing framework for locating performance bugs in dynamic compiler implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Aigner:2015:FMS, author = "Martin Aigner and Christoph M. Kirsch and Michael Lippautz and Ana Sokolova", title = "Fast, multicore-scalable, low-fragmentation memory allocation through large virtual memory and global data structures", journal = j-SIGPLAN, volume = "50", number = "10", pages = "451--469", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814294", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We demonstrate that general-purpose memory allocation involving many threads on many cores can be done with high performance, multicore scalability, and low memory consumption. For this purpose, we have designed and implemented scalloc, a concurrent allocator that generally performs and scales in our experiments better than other allocators while using less memory, and is still competitive otherwise. The main ideas behind the design of scalloc are: uniform treatment of small and big objects through so-called virtual spans, efficiently and effectively reclaiming free memory through fast and scalable global data structures, and constant-time (modulo synchronization) allocation and deallocation operations that trade off memory reuse and spatial locality without being subject to false sharing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Boston:2015:PTI, author = "Brett Boston and Adrian Sampson and Dan Grossman and Luis Ceze", title = "Probability type inference for flexible approximate programming", journal = j-SIGPLAN, volume = "50", number = "10", pages = "470--487", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814301", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In approximate computing, programs gain efficiency by allowing occasional errors. Controlling the probabilistic effects of this approximation remains a key challenge. We propose a new approach where programmers use a type system to communicate high-level constraints on the degree of approximation. A combination of type inference, code specialization, and optional dynamic tracking makes the system expressive and convenient. The core type system captures the probability that each operation exhibits an error and bounds the probability that each expression deviates from its correct value. Solver-aided type inference lets the programmer specify the correctness probability on only some variables-program outputs, for example-and automatically fills in other types to meet these specifications. An optional dynamic type helps cope with complex run-time behavior where static approaches are insufficient. Together, these features interact to yield a high degree of programmer control while offering a strong soundness guarantee. We use existing approximate-computing benchmarks to show how our language, DECAF, maintains a low annotation burden. Our constraint-based approach can encode hardware details, such as finite degrees of reliability, so we also use DECAF to examine implications for approximate hardware design. We find that multi-level architectures can offer advantages over simpler two-level machines and that solver-aided optimization improves efficiency.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Jantz:2015:CLM, author = "Michael R. Jantz and Forrest J. Robinson and Prasad A. Kulkarni and Kshitij A. Doshi", title = "Cross-layer memory management for managed language applications", journal = j-SIGPLAN, volume = "50", number = "10", pages = "488--504", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814322", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance and energy efficiency in memory have become critically important for a wide range of computing domains. However, it is difficult to control and optimize memory power and performance because these effects depend upon activity across multiple layers of the vertical execution stack. To address this challenge, we construct a novel and collaborative framework that employs object placement, cross-layer communication, and page-level management to effectively distribute application objects in the DRAM hardware to achieve desired power/performance goals. In this work, we describe the design and implementation of our framework, which is the first to integrate automatic object profiling and analysis at the application layer with fine-grained management of memory hardware resources in the operating system. We demonstrate the utility of our framework by employing it to more effectively control memory power consumption. We design a custom memory-intensive workload to show the potential of our approach. Next, we develop sampling and profiling-based analyses and modify the code generator in the HotSpot VM to understand object usage patterns and automatically determine and control the placement of hot and cold objects in a partitioned VM heap. This information is communicated to the operating system, which uses it to map the logical application pages to the appropriate DRAM ranks according to user-defined provisioning goals. We evaluate our framework and find that it achieves our test goal of significant DRAM energy savings across a variety of workloads, without any source code modifications or recompilations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Madsen:2015:SAE, author = "Magnus Madsen and Frank Tip and Ondrej Lhot{\'a}k", title = "Static analysis of event-driven {Node.js JavaScript} applications", journal = j-SIGPLAN, volume = "50", number = "10", pages = "505--519", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814272", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many JavaScript programs are written in an event-driven style. In particular, in server-side Node.js applications, operations involving sockets, streams, and files are typically performed in an asynchronous manner, where the execution of listeners is triggered by events. Several types of programming errors are specific to such event-based programs (e.g., unhandled events, and listeners that are registered too late). We present the event-based call graph, a program representation that can be used to detect bugs related to event handling. We have designed and implemented three analyses for constructing event-based call graphs. Our results show that these analyses are capable of detecting problems reported on StackOverflow. Moreover, we show that the number of false positives reported by the analysis on a suite of small Node.js applications is manageable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Feng:2015:EQD, author = "Yu Feng and Xinyu Wang and Isil Dillig and Calvin Lin", title = "{EXPLORER} : query- and demand-driven exploration of interprocedural control flow properties", journal = j-SIGPLAN, volume = "50", number = "10", pages = "520--534", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814284", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes a general framework and its implementation in a tool called EXPLORER for statically answering a class of interprocedural control flow queries about Java programs. EXPLORER allows users to formulate queries about feasible callstack configurations using regular expressions, and it employs a precise, demand-driven algorithm for answering such queries. Specifically, EXPLORER constructs an automaton A that is iteratively refined until either the language accepted by A is empty (meaning that the query has been refuted) or until no further refinement is possible based on a precise, context-sensitive abstraction of the program. We evaluate EXPLORER by applying it to three different program analysis tasks, namely, (1) analysis of the observer design pattern in Java, (2) identification of a class of performance bugs, and (3) analysis of inter-component communication in Android applications. Our evaluation shows that EXPLORER is both efficient and precise.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Dietrich:2015:GSE, author = "Jens Dietrich and Nicholas Hollingum and Bernhard Scholz", title = "Giga-scale exhaustive points-to analysis for {Java} in under a minute", journal = j-SIGPLAN, volume = "50", number = "10", pages = "535--551", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814307", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computing a precise points-to analysis for very large Java programs remains challenging despite the large body of research on points-to analysis. Any approach must solve an underlying dynamic graph reachability problem, for which the best algorithms have near-cubic worst-case runtime complexity, and, hence, previous work does not scale to programs with millions of lines of code. In this work, we present a novel approach for solving the field-sensitive points-to problem for Java with the means of (1) a transitive-closure data-structure, and (2) a pre-computed set of potentially matching load/store pairs to accelerate the fix-point calculation. Experimentation on Java benchmarks validates the superior performance of our approach over the standard context-free language reachability implementations. Our approach computes a points-to index for the OpenJDK with over 1.5 billion tuples in under a minute.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Darais:2015:GTM, author = "David Darais and Matthew Might and David {Van Horn}", title = "{Galois} transformers and modular abstract interpreters: reusable metatheory for program analysis", journal = j-SIGPLAN, volume = "50", number = "10", pages = "552--571", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814308", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The design and implementation of static analyzers has become increasingly systematic. Yet for a given language or analysis feature, it often requires tedious and error prone work to implement an analyzer and prove it sound. In short, static analysis features and their proofs of soundness do not compose well, causing a dearth of reuse in both implementation and metatheory. We solve the problem of systematically constructing static analyzers by introducing Galois transformers: monad transformers that transport Galois connection properties. In concert with a monadic interpreter, we define a library of monad transformers that implement building blocks for classic analysis parameters like context, path, and heap (in)sensitivity. Moreover, these can be composed together independent of the language being analyzed. Significantly, a Galois transformer can be proved sound once and for all, making it a reusable analysis component. As new analysis features and abstractions are developed and mixed in, soundness proofs need not be reconstructed, as the composition of a monad transformer stack is sound by virtue of its constituents. Galois transformers provide a viable foundation for reusable and composable metatheory for program analysis. Finally, these Galois transformers shift the level of abstraction in analysis design and implementation to a level where non-specialists have the ability to synthesize sound analyzers over a number of parameters.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Oh:2015:LSA, author = "Hakjoo Oh and Hongseok Yang and Kwangkeun Yi", title = "Learning a strategy for adapting a program analysis via {Bayesian} optimisation", journal = j-SIGPLAN, volume = "50", number = "10", pages = "572--588", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814309", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Building a cost-effective static analyser for real-world programs is still regarded an art. One key contributor to this grim reputation is the difficulty in balancing the cost and the precision of an analyser. An ideal analyser should be adaptive to a given analysis task, and avoid using techniques that unnecessarily improve precision and increase analysis cost. However, achieving this ideal is highly nontrivial, and it requires a large amount of engineering efforts. In this paper we present a new approach for building an adaptive static analyser. In our approach, the analyser includes a sophisticated parameterised strategy that decides, for each part of a given program, whether to apply a precision-improving technique to that part or not. We present a method for learning a good parameter for such a strategy from an existing codebase via Bayesian optimisation. The learnt strategy is then used for new, unseen programs. Using our approach, we developed partially flow- and context-sensitive variants of a realistic C static analyser. The experimental results demonstrate that using Bayesian optimisation is crucial for learning from an existing codebase. Also, they show that among all program queries that require flow- or context-sensitivity, our partially flow- and context-sensitive analysis answers the 75\% of them, while increasing the analysis cost only by 3.3x of the baseline flow- and context-insensitive analysis, rather than 40x or more of the fully sensitive version.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Alves:2015:RPD, author = "P{\'e}ricles Alves and Fabian Gruber and Johannes Doerfert and Alexandros Lamprineas and Tobias Grosser and Fabrice Rastello and Fernando Magno Quint{\~a}o Pereira", title = "Runtime pointer disambiguation", journal = j-SIGPLAN, volume = "50", number = "10", pages = "589--606", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814285", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To optimize code effectively, compilers must deal with memory dependencies. However, the state-of-the-art heuristics available in the literature to track memory dependencies are inherently imprecise and computationally expensive. Consequently, the most advanced code transformations that compilers have today are ineffective when applied on real-world programs. The goal of this paper is to solve this conundrum through dynamic disambiguation of pointers. We provide different ways to determine at runtime when two memory locations can overlap. We then produce two versions of a code region: one that is aliasing-free --- hence, easy to optimize --- and another that is not. Our checks let us safely branch to the optimizable region. We have applied these ideas on Polly-LLVM, a loop optimizer built on top of the LLVM compilation infrastructure. Our experiments indicate that our method is precise, effective and useful: we can disambiguate every pair of pointer in the loop intensive Polybench benchmark suite. The result of this precision is code quality: the binaries we generate are 10\% faster than those that Polly-LLVM produces without our optimization, at the {\tt -O3} optimization level of LLVM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Toffola:2015:PPY, author = "Luca Della Toffola and Michael Pradel and Thomas R. Gross", title = "Performance problems you can fix: a dynamic analysis of memoization opportunities", journal = j-SIGPLAN, volume = "50", number = "10", pages = "607--622", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814290", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance bugs are a prevalent problem and recent research proposes various techniques to identify such bugs. This paper addresses a kind of performance problem that often is easy to address but difficult to identify: redundant computations that may be avoided by reusing already computed results for particular inputs, a technique called memoization. To help developers find and use memoization opportunities, we present MemoizeIt, a dynamic analysis that identifies methods that repeatedly perform the same computation. The key idea is to compare inputs and outputs of method calls in a scalable yet precise way. To avoid the overhead of comparing objects at all method invocations in detail, MemoizeIt first compares objects without following any references and iteratively increases the depth of exploration while shrinking the set of considered methods. After each iteration, the approach ignores methods that cannot benefit from memoization, allowing it to analyze calls to the remaining methods in more detail. For every memoization opportunity that MemoizeIt detects, it provides hints on how to implement memoization, making it easy for the developer to fix the performance issue. Applying MemoizeIt to eleven real-world Java programs reveals nine profitable memoization opportunities, most of which are missed by traditional CPU time profilers, conservative compiler optimizations, and other existing approaches for finding performance bugs. Adding memoization as proposed by MemoizeIt leads to statistically significant speedups by factors between 1.04x and 12.93x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Lee:2015:RRA, author = "Wen-Chuan Lee and Tao Bao and Yunhui Zheng and Xiangyu Zhang and Keval Vora and Rajiv Gupta", title = "{RAIVE}: runtime assessment of floating-point instability by vectorization", journal = j-SIGPLAN, volume = "50", number = "10", pages = "623--638", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814299", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Floating point representation has limited precision and inputs to floating point programs may also have errors. Consequently, during execution, errors are introduced, propagated, and accumulated, leading to unreliable outputs. We call this the instability problem. We propose RAIVE, a technique that identifies output variations of a floating point execution in the presence of instability. RAIVE transforms every floating point value to a vector of multiple values --- the values added to create the vector are obtained by introducing artificial errors that are upper bounds of actual errors. The propagation of artificial errors models the propagation of actual errors. When values in vectors result in discrete execution differences (e.g., following different paths), the execution is forked to capture the resulting output variations. Our evaluation shows that RAIVE can precisely capture output variations. Its overhead (340\%) is 2.43 times lower than the state of the art", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Fu:2015:ABE, author = "Zhoulai Fu and Zhaojun Bai and Zhendong Su", title = "Automated backward error analysis for numerical code", journal = j-SIGPLAN, volume = "50", number = "10", pages = "639--654", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814317", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Numerical code uses floating-point arithmetic and necessarily suffers from roundoff and truncation errors. Error analysis is the process to quantify such uncertainty in the solution to a problem. Forward error analysis and backward error analysis are two popular paradigms of error analysis. Forward error analysis is more intuitive and has been explored and automated by the programming languages (PL) community. In contrast, although backward error analysis is more preferred by numerical analysts and the foundation for numerical stability, it is less known and unexplored by the PL community. To fill the gap, this paper presents an automated backward error analysis for numerical code to empower both numerical analysts and application developers. In addition, we use the computed backward error results to also compute the condition number, an important quantity recognized by numerical analysts for measuring how sensitive a function is to changes or errors in the input. Experimental results on Intel X87 FPU functions and widely-used GNU C Library functions demonstrate that our analysis is effective at analyzing the accuracy of floating-point programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Voelter:2015:UCL, author = "Markus Voelter and Arie van Deursen and Bernd Kolb and Stephan Eberle", title = "Using C {language} extensions for developing embedded software: a case study", journal = j-SIGPLAN, volume = "50", number = "10", pages = "655--674", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814276", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We report on an industrial case study on developing the embedded software for a smart meter using the C programming language and domain-specific extensions of C such as components, physical units, state machines, registers and interrupts. We find that the extensions help significantly with managing the complexity of the software. They improve testability mainly by supporting hardware-independent testing, as illustrated by low integration efforts. The extensions also do not incur significant overhead regarding memory consumption and performance. Our case study relies on mbeddr, an extensible version of C. mbeddr, in turn, builds on the MPS language workbench which supports modular extension of languages and IDEs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Lopes:2015:HSA, author = "Cristina V. Lopes and Joel Ossher", title = "How scale affects structure in {Java} programs", journal = j-SIGPLAN, volume = "50", number = "10", pages = "675--694", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814300", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many internal software metrics and external quality attributes of Java programs correlate strongly with program size. This knowledge has been used pervasively in quantitative studies of software through practices such as normalization on size metrics. This paper reports size-related super- and sublinear effects that have not been known before. Findings obtained on a very large collection of Java programs --- 30,911 projects hosted at Google Code as of Summer 2011 --- unveils how certain characteristics of programs vary disproportionately with program size, sometimes even non-monotonically. Many of the specific parameters of nonlinear relations are reported. This result gives further insights for the differences of ``programming in the small'' vs. ``programming in the large.'' The reported findings carry important consequences for OO software metrics, and software research in general: metrics that have been known to correlate with size can now be properly normalized so that all the information that is left in them is size-independent.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Mastrangelo:2015:UYO, author = "Luis Mastrangelo and Luca Ponzanelli and Andrea Mocci and Michele Lanza and Matthias Hauswirth and Nathaniel Nystrom", title = "Use at your own risk: the {Java} unsafe {API} in the wild", journal = j-SIGPLAN, volume = "50", number = "10", pages = "695--710", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814313", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Java is a safe language. Its runtime environment provides strong safety guarantees that any Java application can rely on. Or so we think. We show that the runtime actually does not provide these guarantees---for a large fraction of today's Java code. Unbeknownst to many application developers, the Java runtime includes a ``backdoor'' that allows expert library and framework developers to circumvent Java's safety guarantees. This backdoor is there by design, and is well known to experts, as it enables them to write high-performance ``systems-level'' code in Java. For much the same reasons that safe languages are preferred over unsafe languages, these powerful---but unsafe---capabilities in Java should be restricted. They should be made safe by changing the language, the runtime system, or the libraries. At the very least, their use should be restricted. This paper is a step in that direction. We analyzed 74 GB of compiled Java code, spread over 86,479 Java archives, to determine how Java's unsafe capabilities are used in real-world libraries and applications. We found that 25\% of Java bytecode archives depend on unsafe third-party Java code, and thus Java's safety guarantees cannot be trusted. We identify 14 different usage patterns of Java's unsafe capabilities, and we provide supporting evidence for why real-world code needs these capabilities. Our long-term goal is to provide a foundation for the design of new language features to regain safety in Java.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Achour:2015:ACO, author = "Sara Achour and Martin C. Rinard", title = "Approximate computation with outlier detection in {Topaz}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "711--730", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814314", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Topaz, a new task-based language for computations that execute on approximate computing platforms that may occasionally produce arbitrarily inaccurate results. Topaz maps tasks onto the approximate hardware and integrates the generated results into the main computation. To prevent unacceptably inaccurate task results from corrupting the main computation, Topaz deploys a novel outlier detection mechanism that recognizes and precisely reexecutes outlier tasks. Outlier detection enables Topaz to work effectively with approximate hardware platforms that have complex fault characteristics, including platforms with bit pattern dependent faults (in which the presence of faults may depend on values stored in adjacent memory cells). Our experimental results show that, for our set of benchmark applications, outlier detection enables Topaz to deliver acceptably accurate results (less than 1\% error) on our target approximate hardware platforms. Depending on the application and the hardware platform, the overall energy savings range from 5 to 13 percent. Without outlier detection, only one of the applications produces acceptably accurate results.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Wickerson:2015:RSP, author = "John Wickerson and Mark Batty and Bradford M. Beckmann and Alastair F. Donaldson", title = "Remote-scope promotion: clarified, rectified, and verified", journal = j-SIGPLAN, volume = "50", number = "10", pages = "731--747", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814283", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern accelerator programming frameworks, such as OpenCL, organise threads into work-groups. Remote-scope promotion (RSP) is a language extension recently proposed by AMD researchers that is designed to enable applications, for the first time, both to optimise for the common case of intra-work-group communication (using memory scopes to provide consistency only within a work-group) and to allow occasional inter-work-group communication (as required, for instance, to support the popular load-balancing idiom of work stealing). We present the first formal, axiomatic memory model of OpenCL extended with RSP. We have extended the Herd memory model simulator with support for OpenCL kernels that exploit RSP, and used it to discover bugs in several litmus tests and a work-stealing queue, that have been used previously in the study of RSP. We have also formalised the proposed GPU implementation of RSP. The formalisation process allowed us to identify bugs in the description of RSP that could result in well-synchronised programs experiencing memory inconsistencies. We present and prove sound a new implementation of RSP that incorporates bug fixes and requires less non-standard hardware than the original implementation. This work, a collaboration between academia and industry, clearly demonstrates how, when designing hardware support for a new concurrent language feature, the early application of formal tools and techniques can help to prevent errors, such as those we have found, from making it into silicon.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Hammer:2015:ICN, author = "Matthew A. Hammer and Joshua Dunfield and Kyle Headley and Nicholas Labich and Jeffrey S. Foster and Michael Hicks and David {Van Horn}", title = "Incremental computation with names", journal = j-SIGPLAN, volume = "50", number = "10", pages = "748--766", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814305", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the past thirty years, there has been significant progress in developing general-purpose, language-based approaches to incremental computation, which aims to efficiently update the result of a computation when an input is changed. A key design challenge in such approaches is how to provide efficient incremental support for a broad range of programs. In this paper, we argue that first-class names are a critical linguistic feature for efficient incremental computation. Names identify computations to be reused across differing runs of a program, and making them first class gives programmers a high level of control over reuse. We demonstrate the benefits of names by presenting Nominal Adapton, an ML-like language for incremental computation with names. We describe how to use Nominal Adapton to efficiently incrementalize several standard programming patterns---including maps, folds, and unfolds---and show how to build efficient, incremental probabilistic trees and tries. Since Nominal Adapton's implementation is subtle, we formalize it as a core calculus and prove it is from-scratch consistent, meaning it always produces the same answer as simply re-running the computation. Finally, we demonstrate that Nominal Adapton can provide large speedups over both from-scratch computation and Adapton, a previous state-of-the-art incremental computation system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Felgentreff:2015:CBC, author = "Tim Felgentreff and Todd Millstein and Alan Borning and Robert Hirschfeld", title = "Checks and balances: constraint solving without surprises in object-constraint programming languages", journal = j-SIGPLAN, volume = "50", number = "10", pages = "767--782", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814311", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Object-constraint programming systems integrate declarative constraint solving with imperative, object-oriented languages, seamlessly providing the power of both paradigms. However, experience with object-constraint systems has shown that giving too much power to the constraint solver opens up the potential for solutions that are surprising and unintended as well as for complex interactions between constraints and imperative code. On the other hand, systems that overly limit the power of the solver, for example by disallowing constraints involving mutable objects, object identity, or polymorphic message sends, run the risk of excluding the core object-oriented features of the language from the constraint part, and consequently not being able to express declaratively a large set of interesting problem solutions. In this paper we present design principles that tame the power of the constraint solver in object-constraint languages to avoid difficult corner cases and surprising solutions while retaining the key features of the approach, including constraints over mutable objects, constraints involving object identity, and constraints on the results of message sends. We present our solution concretely in the context of the Babelsberg object-constraint language framework, providing both an informal description of the resulting language and a formal semantics for a core subset of it. We validate the utility of this semantics with an executable version that allows us to run test programs and to verify that they provide the same results as existing implementations of Babelsberg in JavaScript, Ruby, and Smalltalk.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Steindorfer:2015:OHA, author = "Michael J. Steindorfer and Jurgen J. Vinju", title = "Optimizing hash-array mapped tries for fast and lean immutable {JVM} collections", journal = j-SIGPLAN, volume = "50", number = "10", pages = "783--800", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814312", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The data structures under-pinning collection API (e.g. lists, sets, maps) in the standard libraries of programming languages are used intensively in many applications. The standard libraries of recent Java Virtual Machine languages, such as Clojure or Scala, contain scalable and well-performing immutable collection data structures that are implemented as Hash-Array Mapped Tries (HAMTs). HAMTs already feature efficient lookup, insert, and delete operations, however due to their tree-based nature their memory footprints and the runtime performance of iteration and equality checking lag behind array-based counterparts. This particularly prohibits their application in programs which process larger data sets. In this paper, we propose changes to the HAMT design that increase the overall performance of immutable sets and maps. The resulting general purpose design increases cache locality and features a canonical representation. It outperforms Scala's and Clojure's data structure implementations in terms of memory footprint and runtime efficiency of iteration (1.3-6.7x) and equality checking (3-25.4x).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Ureche:2015:AAH, author = "Vlad Ureche and Aggelos Biboudis and Yannis Smaragdakis and Martin Odersky", title = "Automating ad hoc data representation transformations", journal = j-SIGPLAN, volume = "50", number = "10", pages = "801--820", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814271", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To maximize run-time performance, programmers often specialize their code by hand, replacing library collections and containers by custom objects in which data is restructured for efficient access. However, changing the data representation is a tedious and error-prone process that makes it hard to test, maintain and evolve the source code. We present an automated and composable mechanism that allows programmers to safely change the data representation in delimited scopes containing anything from expressions to entire class definitions. To achieve this, programmers define a transformation and our mechanism automatically and transparently applies it during compilation, eliminating the need to manually change the source code. Our technique leverages the type system in order to offer correctness guarantees on the transformation and its interaction with object-oriented language features, such as dynamic dispatch, inheritance and generics. We have embedded this technique in a Scala compiler plugin and used it in four very different transformations, ranging from improving the data layout and encoding, to retrofitting specialization and value class status, and all the way to collection deforestation. On our benchmarks, the technique obtained speedups between 1.8x and 24.5x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Marr:2015:TVP, author = "Stefan Marr and St{\'e}phane Ducasse", title = "Tracing vs. partial evaluation: comparing meta-compilation approaches for self-optimizing interpreters", journal = j-SIGPLAN, volume = "50", number = "10", pages = "821--839", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814275", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Tracing and partial evaluation have been proposed as meta-compilation techniques for interpreters to make just-in-time compilation language-independent. They promise that programs executing on simple interpreters can reach performance of the same order of magnitude as if they would be executed on state-of-the-art virtual machines with highly optimizing just-in-time compilers built for a specific language. Tracing and partial evaluation approach this meta-compilation from two ends of a spectrum, resulting in different sets of tradeoffs. This study investigates both approaches in the context of self-optimizing interpreters, a technique for building fast abstract-syntax-tree interpreters. Based on RPython for tracing and Truffle for partial evaluation, we assess the two approaches by comparing the impact of various optimizations on the performance of an interpreter for SOM, an object-oriented dynamically-typed language. The goal is to determine whether either approach yields clear performance or engineering benefits. We find that tracing and partial evaluation both reach roughly the same level of performance. SOM based on meta-tracing is on average 3x slower than Java, while SOM based on partial evaluation is on average 2.3x slower than Java. With respect to the engineering, tracing has however significant benefits, because it requires language implementers to apply fewer optimizations to reach the same level of performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Upadhyaya:2015:EML, author = "Ganesha Upadhyaya and Hridesh Rajan", title = "Effectively mapping linguistic abstractions for message-passing concurrency to threads on the {Java Virtual Machine}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "840--859", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814289", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Efficient mapping of message passing concurrency (MPC) abstractions to Java Virtual Machine (JVM) threads is critical for performance, scalability, and CPU utilization; but tedious and time consuming to perform manually. In general, this mapping cannot be found in polynomial time, but we show that by exploiting the local characteristics of MPC abstractions and their communication patterns this mapping can be determined effectively. We describe our MPC abstraction to thread mapping technique, its realization in two frameworks (Panini and Akka), and its rigorous evaluation using several benchmarks from representative MPC frameworks. We also compare our technique against four default mapping techniques: thread-all, round-robin-task-all, random-task-all and work-stealing. Our evaluation shows that our mapping technique can improve the performance by 30\%-60\% over default mapping techniques. These improvements are due to a number of challenges addressed by our technique namely: (i) balancing the computations across JVM threads, (ii) reducing the communication overheads, (iii) utilizing information about cache locality, and (iv) mapping MPC abstractions to threads in a way that reduces the contention between JVM threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Srinivasan:2015:PEM, author = "Venkatesh Srinivasan and Thomas Reps", title = "Partial evaluation of machine code", journal = j-SIGPLAN, volume = "50", number = "10", pages = "860--879", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814321", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents an algorithm for off-line partial evaluation of machine code. The algorithm follows the classical two-phase approach of binding-time analysis (BTA) followed by specialization. However, machine-code partial evaluation presents a number of new challenges, and it was necessary to devise new techniques for use in each phase. --- Our BTA algorithm makes use of an instruction-rewriting method that ``decouples'' multiple updates performed by a single instruction. This method counters the cascading imprecision that would otherwise occur with a more naive approach to BTA. --- Our specializer specializes an explicit representation of the semantics of an instruction, and emits residual code via machine-code synthesis. Moreover, to create code that allows the stack and heap to be at different positions at run-time than at specialization-time, the specializer represents specialization-time addresses using symbolic constants, and uses a symbolic state for specialization. Our experiments show that our algorithm can be used to specialize binaries with respect to commonly used inputs to produce faster binaries, as well as to extract an executable component from a bloated binary.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Erdweg:2015:CCF, author = "Sebastian Erdweg and Oliver Bracevac and Edlira Kuci and Matthias Krebs and Mira Mezini", title = "A co-contextual formulation of type rules and its application to incremental type checking", journal = j-SIGPLAN, volume = "50", number = "10", pages = "880--897", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814277", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type rules associate types to expressions given a typing context. As the type checker traverses the expression tree top-down, it extends the typing context with additional context information that becomes available. This way, the typing context coordinates type checking in otherwise independent subexpressions, which inhibits parallelization and incrementalization of type checking. We propose a co-contextual formulation of type rules that only take an expression as input and produce a type and a set of context requirements. Co-contextual type checkers traverse an expression tree bottom-up and merge context requirements of independently checked subexpressions. We describe a method for systematically constructing a co-contextual formulation of type rules from a regular context-based formulation and we show how co-contextual type rules give rise to incremental type checking. Using our method, we derive incremental type checkers for PCF and for extensions that introduce records, parametric polymorphism, and subtyping. Our performance evaluation shows that co-contextual type checking has performance comparable to standard context-based type checking, and incrementalization can improve performance significantly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Brandauer:2015:DDF, author = "Stephan Brandauer and Dave Clarke and Tobias Wrigstad", title = "Disjointness domains for fine-grained aliasing", journal = j-SIGPLAN, volume = "50", number = "10", pages = "898--916", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814280", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aliasing is crucial for supporting useful implementation patterns, but it makes reasoning about programs difficult. To deal with this problem, numerous type-based aliasing control mechanisms have been proposed, expressing properties such as uniqueness. Uniqueness, however, is black-and-white: either a reference is unique or it can be arbitrarily aliased; and global: excluding aliases throughout the entire system, making code brittle to changing requirements. Disjointness domains, a new approach to alias control, address this problem by enabling more graduations between uniqueness and arbitrary reference sharing. They allow expressing aliasing constraints local to a certain set of variables (either stack variables or fields) for instance that no aliasing occurs between variables within some set of variables but between such sets or the opposite, that aliasing occurs within that set but not between different sets. A hierarchy of disjointness domains controls the flow of references through a program, helping the programmer reason about disjointness and enforce local alias invariants. The resulting system supports fine-grained control of aliasing between both variables and objects, making aliasing explicit to programmers, compilers, and tooling. This paper presents a formal account of disjointness domains along with examples. Disjointness domains provide novel means of expressing may-alias kinds of constraints, which may prove useful in compiler optimisation and verification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Crafa:2015:CAT, author = "Silvia Crafa and Luca Padovani", title = "The chemical approach to typestate-oriented programming", journal = j-SIGPLAN, volume = "50", number = "10", pages = "917--934", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814287", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study a novel approach to typestate-oriented programming based on the chemical metaphor: state and operations on objects are molecules of messages and state transformations are chemical reactions. This approach allows us to investigate typestate in an inherently concurrent setting, whereby objects can be accessed and modified concurrently by several processes, each potentially changing only part of their state. We introduce a simple behavioral type theory to express in a uniform way both the private and the public interfaces of objects, to describe and enforce structured object protocols consisting of possibilities, prohibitions, and obligations, and to control object sharing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Toro:2015:CGP, author = "Mat{\'\i}as Toro and {\'E}ric Tanter", title = "Customizable gradual polymorphic effects for {Scala}", journal = j-SIGPLAN, volume = "50", number = "10", pages = "935--953", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814315", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite their obvious advantages in terms of static reasoning, the adoption of effect systems is still rather limited in practice. Recent advances such as generic effect systems, lightweight effect polymorphism, and gradual effect checking, all represent promising steps towards making effect systems suitable for widespread use. However, no existing system combines these approaches: the theory of gradual polymorphic effects has not been developed, and there are no implementations of gradual effect checking. In addition, a limiting factor in the adoption of effect systems is their unsuitability for localized and customized effect disciplines. This paper addresses these issues by presenting the first implementation of gradual effect checking, for Scala, which supports both effect polymorphism and a domain-specific language called Effscript to declaratively define and customize effect disciplines. We report on the theory, implementation, and practical application of the system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '15 conference proceedings.", } @Article{Kim:2015:CPM, author = "Sang-Hoon Kim and Sejun Kwon and Jin-Soo Kim and Jinkyu Jeong", title = "Controlling physical memory fragmentation in mobile systems", journal = j-SIGPLAN, volume = "50", number = "11", pages = "1--14", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754179", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Since the adoption of hardware-accelerated features (e.g., hardware codec) improves the performance and quality of mobile devices, it revives the need for contiguous memory allocation. However, physical memory in mobile systems is highly fragmented due to the frequent spawn and exit of processes and the lack of proactive anti-fragmentation scheme. As a result, the memory allocation for large and contiguous I/O buffers suffer from the highly fragmented memory, thereby incurring high CPU usage and power consumption. This paper presents a proactive anti-fragmentation approach that groups pages with the same lifetime, and stores them contiguously in fixed-size contiguous regions. When a process is killed to secure free memory, a set of contiguous regions are freed and subsequent contiguous memory allocations can be easily satisfied without incurring additional overhead. Our prototype implementation on a Nexus 10 tablet with the Android kernel shows that the proposed scheme greatly alleviates fragmentation, thereby reducing the I/O buffer allocation time, associated CPU usage, and energy consumption.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Hussein:2015:DRM, author = "Ahmed Hussein and Antony L. Hosking and Mathias Payer and Christopher A. Vick", title = "Don't race the memory bus: taming the {GC} leadfoot", journal = j-SIGPLAN, volume = "50", number = "11", pages = "15--27", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754182", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic voltage and frequency scaling (DVFS) is ubiquitous on mobile devices as a mechanism for saving energy. Reducing the clock frequency of a processor allows a corresponding reduction in power consumption, as does turning off idle cores. Garbage collection is a canonical example of the sort of memory-bound workload that best responds to such scaling. Here, we explore the impact of frequency scaling for garbage collection in a real mobile device running Android's Dalvik virtual machine, which uses a concurrent collector. By controlling the frequency of the core on which the concurrent collector thread runs we can reduce power significantly. Running established multi-threaded benchmarks shows that total processor energy can be reduced up to 30\%, with end-to-end performance loss of at most 10\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Cohen:2015:DSA, author = "Nachshon Cohen and Erez Petrank", title = "Data structure aware garbage collector", journal = j-SIGPLAN, volume = "50", number = "11", pages = "28--40", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754176", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Garbage collection may benefit greatly from knowledge about program behavior, but most managed languages do not provide means for the programmer to deliver such knowledge. In this work we propose a very simple interface that requires minor programmer effort and achieves substantial performance and scalability improvements. In particular, we focus on the common use of data structures or collections for organizing data on the heap. We let the program notify the collector which classes represent nodes of data structures and also when such nodes are being removed from their data structures. The data-structure aware (DSA) garbage collector uses this information to improve performance, locality, and load balancing. Experience shows that this interface requires a minor modification of the application. Measurements show that for some significant benchmarks this interface can dramatically reduce the time spent on garbage collection and also improve the overall program performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Kuszmaul:2015:SSF, author = "Bradley C. Kuszmaul", title = "{SuperMalloc}: a super fast multithreaded {\tt malloc} for 64-bit machines", journal = j-SIGPLAN, volume = "50", number = "11", pages = "41--55", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754178", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "SuperMalloc is an implementation of malloc(3) originally designed for X86 Hardware Transactional Memory (HTM)@. It turns out that the same design decisions also make it fast even without HTM@. For the malloc-test benchmark, which is one of the most difficult workloads for an allocator, with one thread SuperMalloc is about 2.1 times faster than the best of DLmalloc, JEmalloc, Hoard, and TBBmalloc; with 8 threads and HTM, SuperMalloc is 2.75 times faster; and on 32 threads without HTM SuperMalloc is 3.4 times faster. SuperMalloc generally compares favorably with the other allocators on speed, scalability, speed variance, memory footprint, and code size. SuperMalloc achieves these performance advantages using less than half as much code as the alternatives. SuperMalloc exploits the fact that although physical memory is always precious, virtual address space on a 64-bit machine is relatively cheap. It allocates 2 chunks which contain objects all the same size. To translate chunk numbers to chunk metadata, SuperMalloc uses a simple array (most of which is uncommitted to physical memory). SuperMalloc takes care to avoid associativity conflicts in the cache: most of the size classes are a prime number of cache lines, and nonaligned huge accesses are randomly aligned within a page. Objects are allocated from the fullest non-full page in the appropriate size class. For each size class, SuperMalloc employs a 10-object per-thread cache, a per-CPU cache that holds about a level-2-cache worth of objects per size class, and a global cache that is organized to allow the movement of many objects between a per-CPU cache and the global cache using $ O(1) $ instructions. SuperMalloc prefetches everything it can before starting a critical section, which makes the critical sections run fast, and for HTM improves the odds that the transaction will commit.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Osterlund:2015:CCU, author = "Erik {\"O}sterlund and Welf L{\"o}we", title = "Concurrent compaction using a field pinning protocol", journal = j-SIGPLAN, volume = "50", number = "11", pages = "56--69", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754177", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compaction of memory in long running systems has always been important. The latency of compaction increases in today's systems with high memory demands and large heaps. To deal with this problem, we present a lock-free protocol allowing for copying concurrent with the application running, which reduces the latencies of compaction radically. It provides theoretical progress guarantees for copying and application threads without making it practically infeasible, with performance overheads of 15\% on average. The algorithm paves the way for a future lock-free Garbage Collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Lin:2015:SGU, author = "Yi Lin and Kunshan Wang and Stephen M. Blackburn and Antony L. Hosking and Michael Norrish", title = "Stop and go: understanding yieldpoint behavior", journal = j-SIGPLAN, volume = "50", number = "11", pages = "70--80", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754187", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Yieldpoints are critical to the implementation of high performance garbage collected languages, yet the design space is not well understood. Yieldpoints allow a running program to be interrupted at well-defined points in its execution, facilitating exact garbage collection, biased locking, on-stack replacement, profiling, and other important virtual machine behaviors. In this paper we identify and evaluate yieldpoint design choices, including previously undocumented designs and optimizations. One of the designs we identify opens new opportunities for very low overhead profiling. We measure the frequency with which yieldpoints are executed and establish a methodology for evaluating the common case execution time overhead. We also measure the median and worst case time-to-yield. We find that Java benchmarks execute about 100M yieldpoints per second, of which about 1/20000 are taken. The average execution time overhead for untaken yieldpoints on the VM we use ranges from 2.5\% to close to zero on modern hardware, depending on the design, and we find that the designs trade off total overhead with worst case time-to-yield. This analysis gives new insight into a critical but overlooked aspect of garbage collector implementation, and identifies a new optimization and new opportunities for very low overhead profiling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Stancu:2015:SEH, author = "Codrut Stancu and Christian Wimmer and Stefan Brunthaler and Per Larsen and Michael Franz", title = "Safe and efficient hybrid memory management for {Java}", journal = j-SIGPLAN, volume = "50", number = "11", pages = "81--92", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754185", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Java uses automatic memory management, usually implemented as a garbage-collected heap. That lifts the burden of manually allocating and deallocating memory, but it can incur significant runtime overhead and increase the memory footprint of applications. We propose a hybrid memory management scheme that utilizes region-based memory management to deallocate objects automatically on region exits. Static program analysis detects allocation sites that are safe for region allocation, i.e., the static analysis proves that the objects allocated at such a site are not reachable after the region exit. A regular garbage-collected heap is used for objects that are not region allocatable. The region allocation exploits the temporal locality of object allocation. Our analysis uses coarse-grain source code annotations to disambiguate objects with non-overlapping lifetimes, and maps them to different memory scopes. Region-allocated memory does not require garbage collection as the regions are simply deallocated when they go out of scope. The region allocation technique is backed by a garbage collector that manages memory that is not region allocated. We provide a detailed description of the analysis, provide experimental results showing that as much as 78\% of the memory is region allocatable and discuss how our hybrid memory management system can be implemented efficiently with respect to both space and time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Miranda:2015:PRB, author = "Eliot Miranda and Cl{\'e}ment B{\'e}ra", title = "A partial read barrier for efficient support of live object-oriented programming", journal = j-SIGPLAN, volume = "50", number = "11", pages = "93--104", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754186", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Live programming, originally introduced by Smalltalk and Lisp, and now gaining popularity in contemporary systems such as Swift, requires on-the-fly support for object schema migration, such that the layout of objects may be changed while the program is at one and the same time being run and developed. In Smalltalk schema migration is supported by two primitives, one that answers a collection of all instances of a class, and one that exchanges the identities of pairs of objects, called the become primitive. Existing instances are collected, copies using the new schema created, state copied from old to new, and the two exchanged with become, effecting the schema migration. Historically the implementation of become has either required an extra level of indirection between an object's address and its body, slowing down slot access, or has required a sweep of all objects, a very slow operation on large heaps. Spur, a new object representation and memory manager for Smalltalk-like languages, has neither of these deficiencies. It uses direct pointers but still provides a fast become operation in large heaps, thanks to forwarding objects that when read conceptually answer another object and a partial read barrier that avoids the cost of explicitly checking for forwarding objects on the vast majority of object accesses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Clifford:2015:MMD, author = "Daniel Clifford and Hannes Payer and Michael Stanton and Ben L. Titzer", title = "Memento mori: dynamic allocation-site-based optimizations", journal = j-SIGPLAN, volume = "50", number = "11", pages = "105--117", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754181", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Languages that lack static typing are ubiquitous in the world of mobile and web applications. The rapid rise of larger applications like interactive web GUIs, games, and cryptography presents a new range of implementation challenges for modern virtual machines to close the performance gap between typed and untyped languages. While all languages can benefit from efficient automatic memory management, languages like JavaScript present extra thrill with innocent-looking but difficult features like dynamically-sized arrays, deletable properties, and prototypes. Optimizing such languages requires complex dynamic techniques with more radical object layout strategies such as dynamically evolving representations for arrays. This paper presents a general approach for gathering temporal allocation site feedback that tackles both the general problem of object lifetime estimation and improves optimization of these problematic language features. We introduce a new implementation technique where allocation mementos processed by the garbage collector and runtime system efficiently tie objects back to allocation sites in the program and dynamically estimate object lifetime, representation, and size to inform three optimizations: pretenuring, pretransitioning, and presizing. Unlike previous work on pretenuring, our system utilizes allocation mementos to achieve fully dynamic allocation-site-based pretenuring in a production system. We implement all of our techniques in V8, a high performance virtual machine for JavaScript, and demonstrate solid performance improvements across a range of benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Shidal:2015:RTC, author = "Jonathan Shidal and Ari J. Spilo and Paul T. Scheid and Ron K. Cytron and Krishna M. Kavi", title = "Recycling trash in cache", journal = j-SIGPLAN, volume = "50", number = "11", pages = "118--130", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754183", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The disparity between processing and storage speeds can be bridged in part by reducing the traffic into and out of the slower memory components. Some recent studies reduce such traffic by determining dead data in cache, showing that a significant fraction of writes can be squashed before they make the trip toward slower memory. In this paper, we examine a technique for eliminating traffic in the other direction, specifically the traffic induced by dynamic storage allocation. We consider recycling dead storage in cache to satisfy a program's storage-allocation requests. We first evaluate the potential for recycling under favorable circumstances, where the associated logic can run at full speed with no impact on the cache's normal behavior. We then consider a more practical implementation, in which the associated logic executes independently from the cache's critical path. Here, the cache's performance is unfettered by recycling, but the operations necessary to determine dead storage and recycle such storage execute as time is available. Finally, we present the design and analysis of a hardware implementation that scales well with cache size without sacrificing too much performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Cutler:2015:RPT, author = "Cody Cutler and Robert Morris", title = "Reducing pause times with clustered collection", journal = j-SIGPLAN, volume = "50", number = "11", pages = "131--142", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754184", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Each full garbage collection in a program with millions of objects can pause the program for multiple seconds. Much of this work is typically repeated, as the collector re-traces parts of the object graph that have not changed since the last collection. Clustered Collection reduces full collection pause times by eliminating much of this repeated work. Clustered Collection identifies clusters: regions of the object graph that are reachable from a single ``head'' object, so that reachability of the head implies reachability of the whole cluster. As long as it is not written, a cluster need not be re-traced by successive full collections. The main design challenge is coping with program writes to clusters while ensuring safe, complete, and fast collections. In some cases program writes require clusters to be dissolved, but in most cases Clustered Collection can handle writes without having to re-trace the affected cluster. Clustered Collection chooses clusters likely to suffer few writes and to yield high savings from re-trace avoidance. Clustered Collection is implemented as modifications to the Racket collector. Measurements of the code and data from the Hacker News web site (which suffers from significant garbage collection pauses) and a Twitter-like application show that Clustered Collection decreases full collection pause times by a factor of three and six respectively. This improvement is possible because both applications have gigabytes of live data, modify only a small fraction of it, and usually write in ways that do not result in cluster dissolution. Identifying clusters takes more time than a full collection, but happens much less frequently than full collection.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Cameron:2015:JFE, author = "Callum Cameron and Jeremy Singer and David Vengerov", title = "The judgment of {FORSETI}: economic utility for dynamic heap sizing of multiple runtimes", journal = j-SIGPLAN, volume = "50", number = "11", pages = "143--156", month = nov, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887746.2754180", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We introduce the FORSETI system, which is a principled approach for holistic memory management. It permits a sysadmin to specify the total physical memory resource that may be shared between all concurrent virtual machines on a physical node. FORSETI models the heap size versus application throughput for each virtual machine, and seeks to maximize the combined throughput of the set of VMs based on concepts from economic utility theory. We evaluate the FORSETI system using a standard Java managed runtime, i.e. OpenJDK. Our results demonstrate that FORSETI enables dramatic reductions (up to 5x) in heap footprint without compromising application execution times.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '15 conference proceedings.", } @Article{Diatchki:2015:IHT, author = "Iavor S. Diatchki", title = "Improving {Haskell} types with {SMT}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "1--10", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804307", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a technique for integrating GHC's type-checker with an SMT solver. The technique was developed to add support for reasoning about type-level functions on natural numbers, and so our implementation uses the theory of linear arithmetic. However, the approach is not limited to this theory, and makes it possible to experiment with other external decision procedures, such as reasoning about type-level booleans, bit-vectors, or any other theory supported by SMT solvers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Gundry:2015:TPU, author = "Adam Gundry", title = "A typechecker plugin for units of measure: domain-specific constraint solving in {GHC Haskell}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "11--22", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804305", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Typed functional programming and units of measure are a natural combination, as F\# ably demonstrates. However, encoding statically-checked units in Haskell's type system leads to inevitable disappointment with the usability of the resulting system. Extending the language itself would produce a much better result, but it would be a lot of work! In this paper, I demonstrate how typechecker plugins in the Glasgow Haskell Compiler allow users to define domain-specific constraint solving behaviour, making it possible to implement units of measure as a type system extension without rebuilding the compiler. This paves the way for a more modular treatment of constraint solving in GHC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Farmer:2015:RHT, author = "Andrew Farmer and Neil Sculthorpe and Andy Gill", title = "Reasoning with the {HERMIT}: tool support for equational reasoning on {GHC} core programs", journal = j-SIGPLAN, volume = "50", number = "12", pages = "23--34", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804303", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A benefit of pure functional programming is that it encourages equational reasoning. However, the Haskell language has lacked direct tool support for such reasoning. Consequently, reasoning about Haskell programs is either performed manually, or in another language that does provide tool support (e.g. Agda or Coq). HERMIT is a Haskell-specific toolkit designed to support equational reasoning and user-guided program transformation, and to do so as part of the GHC compilation pipeline. This paper describes HERMIT's recently developed support for equational reasoning, and presents two case studies of HERMIT usage: checking that type-class laws hold for specific instance declarations, and mechanising textbook equational reasoning.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Breitner:2015:FPC, author = "Joachim Breitner", title = "Formally proving a compiler transformation safe", journal = j-SIGPLAN, volume = "50", number = "12", pages = "35--46", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804312", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We prove that the Call Arity analysis and transformation, as implemented in the Haskell compiler GHC, is safe, i.e. does not impede the performance of the program. We formalized syntax, semantics, the analysis and the transformation in the interactive theorem prover Isabelle to obtain a machine-checked proof and hence a level of rigor rarely obtained for compiler optimization safety theorems. The proof is modular and introduces trace trees as a suitable abstraction in abstract cardinality analyses. We discuss the breadth of the formalization gap.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Perez:2015:BGG, author = "Ivan Perez and Henrik Nilsson", title = "Bridging the {GUI} gap with reactive values and relations", journal = j-SIGPLAN, volume = "50", number = "12", pages = "47--58", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804316", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There are at present two ways to write GUIs for functional code. One is to use standard GUI toolkits, with all the benefits they bring in terms of feature completeness, choice of platform, conformance to platform-specific look-and-feel, long-term viability, etc. However, such GUI APIs mandate an imperative programming style for the GUI and related parts of the application. Alternatively, we can use a functional GUI toolkit. The GUI can then be written in a functional style, but at the cost of foregoing many advantages of standard toolkits that often will be of critical importance. This paper introduces a light-weight framework structured around the notions of reactive values and reactive relations. It allows standard toolkits to be used from functional code written in a functional style. We thus bridge the gap between the two worlds, bringing the advantages of both to the developer. Our framework is available on Hackage and has been been validated through the development of non-trivial applications in a commercial context, and with different standard GUI toolkits.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Gill:2015:RMD, author = "Andy Gill and Neil Sculthorpe and Justin Dawson and Aleksander Eskilson and Andrew Farmer and Mark Grebe and Jeffrey Rosenbluth and Ryan Scott and James Stanton", title = "The remote monad design pattern", journal = j-SIGPLAN, volume = "50", number = "12", pages = "59--70", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804311", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Remote Procedure Calls are expensive. This paper demonstrates how to reduce the cost of calling remote procedures from Haskell by using the remote monad design pattern, which amortizes the cost of remote calls. This gives the Haskell community access to remote capabilities that are not directly supported, at a surprisingly inexpensive cost. We explore the remote monad design pattern through six models of remote execution patterns, using a simulated Internet of Things toaster as a running example. We consider the expressiveness and optimizations enabled by each remote execution model, and assess the feasibility of our approach. We then present a full-scale case study: a Haskell library that provides a Foreign Function Interface to the JavaScript Canvas API. Finally, we discuss existing instances of the remote monad design pattern found in Haskell libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Morris:2015:VV, author = "J. Garrett Morris", title = "Variations on variants", journal = j-SIGPLAN, volume = "50", number = "12", pages = "71--81", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804320", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Extensible variants improve the modularity and expressiveness of programming languages: they allow program functionality to be decomposed into independent blocks, and allow seamless extension of existing code with both new cases of existing data types and new operations over those data types. This paper considers three approaches to providing extensible variants in Haskell. Row typing is a long understood mechanism for typing extensible records and variants, but its adoption would require extension of Haskell's core type system. Alternatively, we might hope to encode extensible variants in terms of existing mechanisms, such as type classes. We describe an encoding of extensible variants using instance chains, a proposed extension of the class system. Unlike many previous encodings of extensible variants, ours does not require the definition of a new type class for each function that consumes variants. Finally, we translate our encoding to use closed type families, an existing feature of GHC. Doing so demonstrates the interpretation of instances chains and functional dependencies in closed type families. One concern with encodings like ours is how completely they match the encoded system. We compare the expressiveness of our encodings with each other and with systems based on row types. We find that, while equivalent terms are typable in each system, both encodings require explicit type annotations to resolve ambiguities in typing not present in row type systems, and the type family implementation retains more constraints in principal types than does the instance chain implementation. We propose a general mechanism to guide the instantiation of ambiguous type variables, show that it eliminates the need for type annotations in our encodings, and discuss conditions under which it preserves coherence.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Oliveira:2015:MRM, author = "Bruno C. d. S. Oliveira and Shin-Cheng Mu and Shu-Hung You", title = "Modular reifiable matching: a list-of-functors approach to two-level types", journal = j-SIGPLAN, volume = "50", number = "12", pages = "82--93", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804315", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "This paper presents Modular Reifiable Matching (MRM): a new approach to two level types using a fixpoint of list-of-functors representation. MRM allows the modular definition of datatypes and functions by pattern matching, using a style similar to the widely popular Datatypes a la Carte (DTC) approach. However, unlike DTC, MRM uses a fixpoint of list-of-functors approach to two-level types. This approach has advantages that help with various aspects of extensibility, modularity and reuse. Firstly, modular pattern matching definitions are collected using a list of matches that is fully reifiable. This allows for extensible pattern matching definitions to be easily reused/inherited, and particular matches to be overridden. Such flexibility is used, among other things, to implement extensible generic traversals. Secondly, the subtyping relation between lists of functors is quite simple, does not require backtracking, and is easy to model in languages like Haskell. MRM is implemented as a Haskell library, and its use and applicability are illustrated through various examples in the paper.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Kiselyov:2015:FMM, author = "Oleg Kiselyov and Hiromi Ishii", title = "Freer monads, more extensible effects", journal = j-SIGPLAN, volume = "50", number = "12", pages = "94--105", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804319", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a rational reconstruction of extensible effects, the recently proposed alternative to monad transformers, as the confluence of efforts to make effectful computations compose. Free monads and then extensible effects emerge from the straightforward term representation of an effectful computation, as more and more boilerplate is abstracted away. The generalization process further leads to freer monads, constructed without the Functor constraint. The continuation exposed in freer monads can then be represented as an efficient type-aligned data structure. The end result is the algorithmically efficient extensible effects library, which is not only more comprehensible but also faster than earlier implementations. As an illustration of the new library, we show three surprisingly simple applications: non-determinism with committed choice (LogicT), catching IO exceptions in the presence of other effects, and the semi-automatic management of file handles and other resources through monadic regions. We extensively use and promote the new sort of `laziness', which underlies the left Kan extension: instead of performing an operation, keep its operands and pretend it is done.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Foner:2015:FPG, author = "Kenneth Foner", title = "Functional pearl: getting a quick fix on comonads", journal = j-SIGPLAN, volume = "50", number = "12", pages = "106--117", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804310", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A piece of functional programming folklore due to Piponi provides L{\"o}b's theorem from modal provability logic with a computational interpretation as an unusual fixed point. Interpreting modal necessity as an arbitrary Functor in Haskell, the ``type'' of L{\"o}b's theorem is inhabited by a fixed point function allowing each part of a structure to refer to the whole. However, Functor's logical interpretation may be used to prove L{\"o}b's theorem only by relying on its implicit functorial strength, an axiom not available in the provability modality. As a result, the well known Loeb fixed point ``cheats'' by using functorial strength to implement its recursion. Rather than Functor, a closer Curry analogue to modal logic's Howard inspiration is a closed (semi-)comonad, of which Haskell's ComonadApply typeclass provides analogous structure. Its computational interpretation permits the definition of a novel fixed point function allowing each part of a structure to refer to its own context within the whole. This construction further guarantees maximal sharing and asymptotic efficiency superior to Loeb for locally contextual computations upon a large class of structures. With the addition of a distributive law, closed comonads may be composed into spaces of arbitrary dimensionality while preserving the performance guarantees of this new fixed point. From these elements, we construct a small embedded domain-specific language to elegantly express and evaluate multidimensional ``spreadsheet-like'' recurrences for a variety of cellular automata.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Stolarek:2015:ITF, author = "Jan Stolarek and Simon Peyton Jones and Richard A. Eisenberg", title = "Injective type families for {Haskell}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "118--128", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804314", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Haskell, as implemented by the Glasgow Haskell Compiler (GHC), allows expressive type-level programming. The most popular type-level programming extension is TypeFamilies, which allows users to write functions on types. Yet, using type functions can cripple type inference in certain situations. In particular, lack of injectivity in type functions means that GHC can never infer an instantiation of a type variable appearing only under type functions. In this paper, we describe a small modification to GHC that allows type functions to be annotated as injective. GHC naturally must check validity of the injectivity annotations. The algorithm to do so is surprisingly subtle. We prove soundness for a simplification of our algorithm, and state and prove a completeness property, though the algorithm is not fully complete. As much of our reasoning surrounds functions defined by a simple pattern-matching structure, we believe our results extend beyond just Haskell. We have implemented our solution on a branch of GHC and plan to make it available to regular users with the next stable release of the compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Serrano:2015:TFC, author = "Alejandro Serrano and Jurriaan Hage and Patrick Bahr", title = "Type families with class, type classes with family", journal = j-SIGPLAN, volume = "50", number = "12", pages = "129--140", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804304", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type classes and type families are key ingredients in Haskell programming. Type classes were introduced to deal with ad-hoc polymorphism, although with the introduction of functional dependencies, their use expanded to type-level programming. Type families also allow encoding type-level functions, but more directly in the form of rewrite rules. In this paper we show that type families are powerful enough to simulate type classes (without overlapping instances), and we provide a formal proof of the soundness and completeness of this simulation. Encoding instance constraints as type families eases the path to proposed extensions to type classes, like closed sets of instances, instance chains, and control over the search procedure. The only feature which type families cannot simulate is elaboration, that is, generating code from the derivation of a rewriting. We look at ways to solve this problem in current Haskell, and propose an extension to allow elaboration during the rewriting phase.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Walker:2015:DFC, author = "Michael Walker and Colin Runciman", title = "{D{\'e}j{\`a} Fu}: a concurrency testing library for {Haskell}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "141--152", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804306", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Systematic concurrency testing (SCT) is an approach to testing potentially nondeterministic concurrent programs. SCT avoids potentially unrepeatable results that may arise from unit testing concurrent programs. It seems to have received little attention from Haskell programmers. This paper introduces a generalisation of Haskell's concurrency abstraction in the form of typeclasses, and a library for testing concurrent programs. A number of examples are provided, some of which come from pre-existing packages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Trilla:2015:IIP, author = "Jos{\'e} Manuel Calder{\'o}n Trilla and Colin Runciman", title = "Improving implicit parallelism", journal = j-SIGPLAN, volume = "50", number = "12", pages = "153--164", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804308", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Using static analysis techniques compilers for lazy functional languages can be used to identify parts of a program that can be legitimately evaluated in parallel and ensure that those expressions are executed concurrently with the main thread of execution. These techniques can produce improvements in the runtime performance of a program, but are limited by the static analyses' poor prediction of runtime performance. This paper outlines the development of a system that uses iterative profile-directed improvement in addition to well-studied static analysis techniques. This allows us to achieve higher performance gains than through static analysis alone.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Scibior:2015:PPP, author = "Adam {\'S}cibior and Zoubin Ghahramani and Andrew D. Gordon", title = "Practical probabilistic programming with monads", journal = j-SIGPLAN, volume = "50", number = "12", pages = "165--176", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804317", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The machine learning community has recently shown a lot of interest in practical probabilistic programming systems that target the problem of Bayesian inference. Such systems come in different forms, but they all express probabilistic models as computational processes using syntax resembling programming languages. In the functional programming community monads are known to offer a convenient and elegant abstraction for programming with probability distributions, but their use is often limited to very simple inference problems. We show that it is possible to use the monad abstraction to construct probabilistic models for machine learning, while still offering good performance of inference in challenging models. We use a GADT as an underlying representation of a probability distribution and apply Sequential Monte Carlo-based methods to achieve efficient inference. We define a formal semantics via measure theory. We demonstrate a clean and elegant implementation that achieves performance comparable with Anglican, a state-of-the-art probabilistic programming system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Polakow:2015:EFL, author = "Jeff Polakow", title = "Embedding a full linear lambda calculus in {Haskell}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "177--188", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804309", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an encoding of full linear lambda calculus in Haskell using higher order abstract syntax. By making use of promoted data kinds, multi-parameter type classes and functional dependencies, the encoding allows Haskell to do both linear type checking and linear type inference.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{Elliott:2015:GFI, author = "Trevor Elliott and Lee Pike and Simon Winwood and Pat Hickey and James Bielman and Jamey Sharp and Eric Seidel and John Launchbury", title = "Guilt free {Ivory}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "189--200", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804318", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Ivory is a language that enforces memory safety and avoids most undefined behaviors while providing low-level control of memory-manipulation. Ivory is embedded in a modern variant of Haskell, as implemented by the GHC compiler. The main contributions of the paper are two-fold. First, we demonstrate how to embed the type-system of a safe-C language into the type extensions of GHC. Second, Ivory is of interest in its own right, as a powerful language for writing high-assurance embedded programs. Beyond invariants enforced by its type-system, Ivory has direct support for model-checking, theorem-proving, and property-based testing. Ivory's semantics have been formalized and proved to guarantee memory safety.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{McDonell:2015:TSR, author = "Trevor L. McDonell and Manuel M. T. Chakravarty and Vinod Grover and Ryan R. Newton", title = "Type-safe runtime code generation: accelerate to {LLVM}", journal = j-SIGPLAN, volume = "50", number = "12", pages = "201--212", month = dec, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2887747.2804313", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:44 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded languages are often compiled at application runtime; thus, embedded compile-time errors become application runtime errors. We argue that advanced type system features, such as GADTs and type families, play a crucial role in minimising such runtime errors. Specifically, a rigorous type discipline reduces runtime errors due to bugs in both embedded language applications and the implementation of the embedded language compiler itself. In this paper, we focus on the safety guarantees achieved by type preserving compilation. We discuss the compilation pipeline of Accelerate, a high-performance array language targeting both multicore CPUs and GPUs, where we are able to preserve types from the source language down to a low-level register language in SSA form. Specifically, we demonstrate the practicability of our approach by creating a new type-safe interface to the industrial-strength LLVM compiler infrastructure, which we used to build two new Accelerate backends that show competitive runtimes on a set of benchmarks across both CPUs and GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '15 conference proceedings.", } @Article{McKinley:2016:PWU, author = "Kathryn S. McKinley", title = "Programming the world of uncertain things (keynote)", journal = j-SIGPLAN, volume = "51", number = "1", pages = "1--2", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2843895", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computing has entered the era of uncertain data, in which hardware and software generate and reason about estimates. Applications use estimates from sensors, machine learning, big data, humans, and approximate hardware and software. Unfortunately, developers face pervasive correctness, programmability, and optimization problems due to estimates. Most programming languages unfortunately make these problems worse. We propose a new programming abstraction called {Uncertain$<$T$>$} embedded into languages, such as C\#, C++, Java, Python, and JavaScript. Applications that consume estimates use familiar discrete operations for their estimates; overloaded conditional operators specify hypothesis tests and applications use them control false positives and negatives; and new compositional operators express domain knowledge. By carefully restricting the expressiveness, the runtime automatically implements correct statistical reasoning at conditionals, relieving developers of the need to implement or deeply understand statistics. We demonstrate substantial programmability, correctness, and efficiency benefits of this programming model for GPS sensor navigation, approximate computing, machine learning, and xBox.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Murray:2016:SRC, author = "Richard M. Murray", title = "Synthesis of reactive controllers for hybrid systems (keynote)", journal = j-SIGPLAN, volume = "51", number = "1", pages = "3--3", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2843894", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Decision-making logic in hybrid systems is responsible for selecting modes of operation for the underlying (continuous) control system, reacting to external events and failures in the system, and insuring that the overall control system is satisfying safety and performance specifications. Tools from computer science, such as model-checking and logic synthesis, combined with design patterns from feedback control theory provide new approaches to solving these problems. A major shift is the move from ``design then verify'' to ``specify then synthesize'' approaches to controller design that allow simultaneous synthesis of high-performance, robust control laws and correct-by-construction decision-making logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Walker:2016:CPL, author = "David Walker", title = "Confluences in programming languages research (keynote)", journal = j-SIGPLAN, volume = "51", number = "1", pages = "4--4", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2843896", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A confluence occurs when two rivers flow together; downstream the combined forces gather strength and propel their waters forward with increased vigor. In academic research, according to Varghese, a confluence occurs after some trigger, perhaps a discovery or a change in technology, and brings two previously separate branches of research together. In this talk, I will discuss confluences in programming languages research. Here, confluences often occur when basic research finds application in some important new domain. Two prime examples from my own career involve the confluence of research in type theory and systems security, triggered by new theoretical tools for reasoning about programming language safety, and the confluence of formal methods and networking, triggered by the rise of data centers. These experiences may shed light on what to teach our students and what is next for programming languages research.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Brown:2016:BTN, author = "Matt Brown and Jens Palsberg", title = "Breaking through the normalization barrier: a self-interpreter for {F$_{\rm omega}$}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "5--17", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837623", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "According to conventional wisdom, a self-interpreter for a strongly normalizing lambda-calculus is impossible. We call this the normalization barrier. The normalization barrier stems from a theorem in computability theory that says that a total universal function for the total computable functions is impossible. In this paper we break through the normalization barrier and define a self-interpreter for System F_omega, a strongly normalizing lambda-calculus. After a careful analysis of the classical theorem, we show that static type checking in F_omega can exclude the proof's diagonalization gadget, leaving open the possibility for a self-interpreter. Along with the self-interpreter, we program four other operations in F_omega, including a continuation-passing style transformation. Our operations rely on a new approach to program representation that may be useful in theorem provers and compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Altenkirch:2016:TTT, author = "Thorsten Altenkirch and Ambrus Kaposi", title = "Type theory in type theory using quotient inductive types", journal = j-SIGPLAN, volume = "51", number = "1", pages = "18--29", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837638", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an internal formalisation of a type heory with dependent types in Type Theory using a special case of higher inductive types from Homotopy Type Theory which we call quotient inductive types (QITs). Our formalisation of type theory avoids referring to preterms or a typability relation but defines directly well typed objects by an inductive definition. We use the elimination principle to define the set-theoretic and logical predicate interpretation. The work has been formalized using the Agda system extended with QITs using postulates.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Cai:2016:SFE, author = "Yufei Cai and Paolo G. Giarrusso and Klaus Ostermann", title = "System {F$_{\rm omega}$} with equirecursive types for datatype-generic programming", journal = j-SIGPLAN, volume = "51", number = "1", pages = "30--43", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837660", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traversing an algebraic datatype by hand requires boilerplate code which duplicates the structure of the datatype. Datatype-generic programming (DGP) aims to eliminate such boilerplate code by decomposing algebraic datatypes into type constructor applications from which generic traversals can be synthesized. However, different traversals require different decompositions, which yield isomorphic but unequal types. This hinders the interoperability of different DGP techniques. In this paper, we propose F \omega \mu , an extension of the higher-order polymorphic lambda calculus F \omega with records, variants, and equirecursive types. We prove the soundness of the type system, and show that type checking for first-order recursive types is decidable with a practical type checking algorithm. In our soundness proof we define type equality by interpreting types as infinitary \lambda -terms (in particular, Berarducci-trees). To decide type equality we \beta -normalize types, and then use an extension of equivalence checking for usual equirecursive types. Thanks to equirecursive types, new decompositions for a datatype can be added modularly and still interoperate with each other, allowing multiple DGP techniques to work together. We sketch how generic traversals can be synthesized, and apply these components to some examples. Since the set of datatype decomposition becomes extensible, System F \omega \mu enables using DGP techniques incrementally, instead of planning for them upfront or doing invasive refactoring.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Curien:2016:TER, author = "Pierre-Louis Curien and Marcelo Fiore and Guillaume Munch-Maccagnoni", title = "A theory of effects and resources: adjunction models and polarised calculi", journal = j-SIGPLAN, volume = "51", number = "1", pages = "44--56", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837652", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the Curry--Howard-Lambek correspondence for effectful computation and resource management, specifically proposing polarised calculi together with presheaf-enriched adjunction models as the starting point for a comprehensive semantic theory relating logical systems, typed calculi, and categorical models in this context. Our thesis is that the combination of effects and resources should be considered orthogonally. Model theoretically, this leads to an understanding of our categorical models from two complementary perspectives: (i) as a linearisation of CBPV (Call-by-Push-Value) adjunction models, and (ii) as an extension of linear/non-linear adjunction models with an adjoint resolution of computational effects. When the linear structure is cartesian and the resource structure is trivial we recover Levy's notion of CBPV adjunction model, while when the effect structure is trivial we have Benton's linear/non-linear adjunction models. Further instances of our model theory include the dialogue categories with a resource modality of Melli{\`e}s and Tabareau, and the [E]EC ([Enriched] Effect Calculus) models of Egger, M{\o}gelberg and Simpson. Our development substantiates the approach by providing a lifting theorem of linear models into cartesian ones. To each of our categorical models we systematically associate a typed term calculus, each of which corresponds to a variant of the sequent calculi LJ (Intuitionistic Logic) or ILL (Intuitionistic Linear Logic). The adjoint resolution of effects corresponds to polarisation whereby, syntactically, types locally determine a strict or lazy evaluation order and, semantically, the associativity of cuts is relaxed. In particular, our results show that polarisation provides a computational interpretation of CBPV in direct style. Further, we characterise depolarised models: those where the cut is associative, and where the evaluation order is unimportant. We explain possible advantages of this style of calculi for the operational semantics of effects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Murase:2016:TVH, author = "Akihiro Murase and Tachio Terauchi and Naoki Kobayashi and Ryosuke Sato and Hiroshi Unno", title = "Temporal verification of higher-order functional programs", journal = j-SIGPLAN, volume = "51", number = "1", pages = "57--68", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837667", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an automated approach to verifying arbitrary omega-regular properties of higher-order functional programs. Previous automated methods proposed for this class of programs could only handle safety properties or termination, and our approach is the first to be able to verify arbitrary omega-regular liveness properties. Our approach is automata-theoretic, and extends our recent work on binary-reachability-based approach to automated termination verification of higher-order functional programs to fair termination published in ESOP 2014. In that work, we have shown that checking disjunctive well-foundedness of (the transitive closure of) the ``calling relation'' is sound and complete for termination. The extension to fair termination is tricky, however, because the straightforward extension that checks disjunctive well-foundedness of the fair calling relation turns out to be unsound, as we shall show in the paper. Roughly, our solution is to check fairness on the transition relation instead of the calling relation, and propagate the information to determine when it is necessary and sufficient to check for disjunctive well-foundedness on the calling relation. We prove that our approach is sound and complete. We have implemented a prototype of our approach, and confirmed that it is able to automatically verify liveness properties of some non-trivial higher-order programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Plotkin:2016:SNV, author = "Gordon D. Plotkin and Nikolaj Bj{\o}rner and Nuno P. Lopes and Andrey Rybalchenko and George Varghese", title = "Scaling network verification using symmetry and surgery", journal = j-SIGPLAN, volume = "51", number = "1", pages = "69--83", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837657", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "On the surface, large data centers with about 100,000 stations and nearly a million routing rules are complex and hard to verify. However, these networks are highly regular by design; for example they employ fat tree topologies with backup routers interconnected by redundant patterns. To exploit these regularities, we introduce network transformations: given a reachability formula and a network, we transform the network into a simpler to verify network and a corresponding transformed formula, such that the original formula is valid in the network if and only if the transformed formula is valid in the transformed network. Our network transformations exploit network surgery (in which irrelevant or redundant sets of nodes, headers, ports, or rules are ``sliced'' away) and network symmetry (say between backup routers). The validity of these transformations is established using a formal theory of networks. In particular, using Van Benthem--Hennessy--Milner style bisimulation, we show that one can generally associate bisimulations to transformations connecting networks and formulas with their transforms. Our work is a development in an area of current wide interest: applying programming language techniques (in our case bisimulation and modal logic) to problems in switching networks. We provide experimental evidence that our network transformations can speed up by 65x the task of verifying the communication between all pairs of Virtual Machines in a large datacenter network with about 100,000 VMs. An all-pair reachability calculation, which formerly took 5.5 days, can be done in 2 hours, and can be easily parallelized to complete in minutes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Brotherston:2016:MCS, author = "James Brotherston and Nikos Gorogiannis and Max Kanovich and Reuben Rowe", title = "Model checking for symbolic-heap separation logic with inductive predicates", journal = j-SIGPLAN, volume = "51", number = "1", pages = "84--96", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837621", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We investigate the *model checking* problem for symbolic-heap separation logic with user-defined inductive predicates, i.e., the problem of checking that a given stack-heap memory state satisfies a given formula in this language, as arises e.g. in software testing or runtime verification. First, we show that the problem is *decidable*; specifically, we present a bottom-up fixed point algorithm that decides the problem and runs in exponential time in the size of the problem instance. Second, we show that, while model checking for the full language is EXPTIME-complete, the problem becomes NP-complete or PTIME-solvable when we impose natural syntactic restrictions on the schemata defining the inductive predicates. We additionally present NP and PTIME algorithms for these restricted fragments. Finally, we report on the experimental performance of our procedures on a variety of specifications extracted from programs, exercising multiple combinations of syntactic restrictions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Koskinen:2016:RCR, author = "Eric Koskinen and Junfeng Yang", title = "Reducing crash recoverability to reachability", journal = j-SIGPLAN, volume = "51", number = "1", pages = "97--108", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837648", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software applications run on a variety of platforms (filesystems, virtual slices, mobile hardware, etc.) that do not provide 100\% uptime. As such, these applications may crash at any unfortunate moment losing volatile data and, when re-launched, they must be able to correctly recover from potentially inconsistent states left on persistent storage. From a verification perspective, crash recovery bugs can be particularly frustrating because, even when it has been formally proved for a program that it satisfies a property, the proof is foiled by these external events that crash and restart the program. In this paper we first provide a hierarchical formal model of what it means for a program to be crash recoverable. Our model captures the recoverability of many real world programs, including those in our evaluation which use sophisticated recovery algorithms such as shadow paging and write-ahead logging. Next, we introduce a novel technique capable of automatically proving that a program correctly recovers from a crash via a reduction to reachability. Our technique takes an input control-flow automaton and transforms it into an encoding that blends the capture of snapshots of pre-crash states into a symbolic search for a proof that recovery terminates and every recovered execution simulates some crash-free execution. Our encoding is designed to enable one to apply existing abstraction techniques in order to do the work that is necessary to prove recoverability. We have implemented our technique in a tool called Eleven82, capable of analyzing C programs to detect recoverability bugs or prove their absence. We have applied our tool to benchmark examples drawn from industrial file systems and databases, including GDBM, LevelDB, LMDB, PostgreSQL, SQLite, VMware and ZooKeeper. Within minutes, our tool is able to discover bugs or prove that these fragments are crash recoverable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Zhang:2016:QGM, author = "Xin Zhang and Ravi Mangal and Aditya V. Nori and Mayur Naik", title = "Query-guided maximum satisfiability", journal = j-SIGPLAN, volume = "51", number = "1", pages = "109--122", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837658", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new optimization problem ``Q-MaxSAT'', an extension of the well-known Maximum Satisfiability or MaxSAT problem. In contrast to MaxSAT, which aims to find an assignment to all variables in the formula, Q-MaxSAT computes an assignment to a desired subset of variables (or queries) in the formula. Indeed, many problems in diverse domains such as program reasoning, information retrieval, and mathematical optimization can be naturally encoded as Q-MaxSAT instances. We describe an iterative algorithm for solving Q-MaxSAT. In each iteration, the algorithm solves a subproblem that is relevant to the queries, and applies a novel technique to check whether the partial assignment found is a solution to the Q-MaxSAT problem. If the check fails, the algorithm grows the subproblem with a new set of clauses identified as relevant to the queries. Our empirical evaluation shows that our Q-MaxSAT solver Pilot achieves significant improvements in runtime and memory consumption over conventional MaxSAT solvers on several Q-MaxSAT instances generated from real-world problems in program analysis and information retrieval.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Lin:2016:SSW, author = "Anthony W. Lin and Pablo Barcel{\'o}", title = "String solving with word equations and transducers: towards a logic for analysing mutation {XSS}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "123--136", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837641", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study the fundamental issue of decidability of satisfiability over string logics with concatenations and finite-state transducers as atomic operations. Although restricting to one type of operations yields decidability, little is known about the decidability of their combined theory, which is especially relevant when analysing security vulnerabilities of dynamic web pages in a more realistic browser model. On the one hand, word equations (string logic with concatenations) cannot precisely capture sanitisation functions (e.g. htmlescape) and implicit browser transductions (e.g. innerHTML mutations). On the other hand, transducers suffer from the reverse problem of being able to model sanitisation functions and browser transductions, but not string concatenations. Naively combining word equations and transducers easily leads to an undecidable logic. Our main contribution is to show that the ``straight-line fragment'' of the logic is decidable (complexity ranges from PSPACE to EXPSPACE). The fragment can express the program logics of straight-line string-manipulating programs with concatenations and transductions as atomic operations, which arise when performing bounded model checking or dynamic symbolic executions. We demonstrate that the logic can naturally express constraints required for analysing mutation XSS in web applications. Finally, the logic remains decidable in the presence of length, letter-counting, regular, indexOf, and disequality constraints.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Cardelli:2016:SCD, author = "Luca Cardelli and Mirco Tribastone and Max Tschaikowski and Andrea Vandin", title = "Symbolic computation of differential equivalences", journal = j-SIGPLAN, volume = "51", number = "1", pages = "137--150", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837649", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Ordinary differential equations (ODEs) are widespread in many natural sciences including chemistry, ecology, and systems biology, and in disciplines such as control theory and electrical engineering. Building on the celebrated molecules-as-processes paradigm, they have become increasingly popular in computer science, with high-level languages and formal methods such as Petri nets, process algebra, and rule-based systems that are interpreted as ODEs. We consider the problem of comparing and minimizing ODEs automatically. Influenced by traditional approaches in the theory of programming, we propose differential equivalence relations. We study them for a basic intermediate language, for which we have decidability results, that can be targeted by a class of high-level specifications. An ODE implicitly represents an uncountable state space, hence reasoning techniques cannot be borrowed from established domains such as probabilistic programs with finite-state Markov chain semantics. We provide novel symbolic procedures to check an equivalence and compute the largest one via partition refinement algorithms that use satisfiability modulo theories. We illustrate the generality of our framework by showing that differential equivalences include (i) well-known notions for the minimization of continuous-time Markov chains (lumpability), (ii)~bisimulations for chemical reaction networks recently proposed by Cardelli et al., and (iii) behavioral relations for process algebra with ODE semantics. With a prototype implementation we are able to detect equivalences in biochemical models from the literature that cannot be reduced using competing automatic techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Hague:2016:UDC, author = "Matthew Hague and Jonathan Kochems and C.-H. Luke Ong", title = "Unboundedness and downward closures of higher-order pushdown automata", journal = j-SIGPLAN, volume = "51", number = "1", pages = "151--163", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837627", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show the diagonal problem for higher-order pushdown automata (HOPDA), and hence the simultaneous unboundedness problem, is decidable. From recent work by Zetzsche this means that we can construct the downward closure of the set of words accepted by a given HOPDA. This also means we can construct the downward closure of the Parikh image of a HOPDA. Both of these consequences play an important role in verifying concurrent higher-order programs expressed as HOPDA or safe higher-order recursion schemes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Devriese:2016:FAC, author = "Dominique Devriese and Marco Patrignani and Frank Piessens", title = "Fully-abstract compilation by approximate back-translation", journal = j-SIGPLAN, volume = "51", number = "1", pages = "164--177", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837618", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A compiler is fully-abstract if the compilation from source language programs to target language programs reflects and preserves behavioural equivalence. Such compilers have important security benefits, as they limit the power of an attacker interacting with the program in the target language to that of an attacker interacting with the program in the source language. Proving compiler full-abstraction is, however, rather complicated. A common proof technique is based on the back-translation of target-level program contexts to behaviourally-equivalent source-level contexts. However, constructing such a back-translation is problematic when the source language is not strong enough to embed an encoding of the target language. For instance, when compiling from the simply-typed $ \lambda $-calculus ($ \lambda \tau $) to the untyped $ \lambda $-calculus ($ \lambda u$), the lack of recursive types in \lambda \tau prevents such a back-translation. We propose a general and elegant solution for this problem. The key insight is that it suffices to construct an approximate back-translation. The approximation is only accurate up to a certain number of steps and conservative beyond that, in the sense that the context generated by the back-translation may diverge when the original would not, but not vice versa. Based on this insight, we describe a general technique for proving compiler full-abstraction and demonstrate it on a compiler from $ \lambda \tau $ to $ \lambda u$. The proof uses asymmetric cross-language logical relations and makes innovative use of step-indexing to express the relation between a context and its approximate back-translation. We believe this proof technique can scale to challenging settings and enable simpler, more scalable proofs of compiler full-abstraction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Kang:2016:LVS, author = "Jeehoon Kang and Yoonseung Kim and Chung-Kil Hur and Derek Dreyer and Viktor Vafeiadis", title = "Lightweight verification of separate compilation", journal = j-SIGPLAN, volume = "51", number = "1", pages = "178--190", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837642", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Major compiler verification efforts, such as the CompCert project, have traditionally simplified the verification problem by restricting attention to the correctness of whole-program compilation, leaving open the question of how to verify the correctness of separate compilation. Recently, a number of sophisticated techniques have been proposed for proving more flexible, compositional notions of compiler correctness, but these approaches tend to be quite heavyweight compared to the simple ``closed simulations'' used in verifying whole-program compilation. Applying such techniques to a compiler like CompCert, as Stewart et al. have done, involves major changes and extensions to its original verification. In this paper, we show that if we aim somewhat lower---to prove correctness of separate compilation, but only for a *single* compiler---we can drastically simplify the proof effort. Toward this end, we develop several lightweight techniques that recast the compositional verification problem in terms of whole-program compilation, thereby enabling us to largely reuse the closed-simulation proofs from existing compiler verifications. We demonstrate the effectiveness of these techniques by applying them to CompCert 2.4, converting its verification of whole-program compilation into a verification of separate compilation in less than two person-months. This conversion only required a small number of changes to the original proofs, and uncovered two compiler bugs along the way. The result is SepCompCert, the first verification of separate compilation for the full CompCert compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Robbins:2016:MMS, author = "Ed Robbins and Andy King and Tom Schrijvers", title = "From {MinX} to {MinC}: semantics-driven decompilation of recursive datatypes", journal = j-SIGPLAN, volume = "51", number = "1", pages = "191--203", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837633", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reconstructing the meaning of a program from its binary executable is known as reverse engineering; it has a wide range of applications in software security, exposing piracy, legacy systems, etc. Since reversing is ultimately a search for meaning, there is much interest in inferring a type (a meaning) for the elements of a binary in a consistent way. Unfortunately existing approaches do not guarantee any semantic relevance for their reconstructed types. This paper presents a new and semantically-founded approach that provides strong guarantees for the reconstructed types. Key to our approach is the derivation of a witness program in a high-level language alongside the reconstructed types. This witness has the same semantics as the binary, is type correct by construction, and it induces a (justifiable) type assignment on the binary. Moreover, the approach effectively yields a type-directed decompiler. We formalise and implement the approach for reversing MinX, an abstraction of x86, to MinC, a type-safe dialect of C with recursive datatypes. Our evaluation compiles a range of textbook C algorithms to MinX and then recovers the original structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Lorenzen:2016:STD, author = "Florian Lorenzen and Sebastian Erdweg", title = "Sound type-dependent syntactic language extension", journal = j-SIGPLAN, volume = "51", number = "1", pages = "204--216", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837644", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Syntactic language extensions can introduce new facilities into a programming language while requiring little implementation effort and modest changes to the compiler. It is typical to desugar language extensions in a distinguished compiler phase after parsing or type checking, not affecting any of the later compiler phases. If desugaring happens before type checking, the desugaring cannot depend on typing information and type errors are reported in terms of the generated code. If desugaring happens after type checking, the code generated by the desugaring is not type checked and may introduce vulnerabilities. Both options are undesirable. We propose a system for syntactic extensibility where desugaring happens after type checking and desugarings are guaranteed to only generate well-typed code. A major novelty of our work is that desugarings operate on typing derivations instead of plain syntax trees. This provides desugarings access to typing information and forms the basis for the soundness guarantee we provide, namely that a desugaring generates a valid typing derivation. We have implemented our system for syntactic extensibility in a language-independent fashion and instantiated it for a substantial subset of Java, including generics and inheritance. We provide a sound Java extension for Scala-like for-comprehensions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Padon:2016:DII, author = "Oded Padon and Neil Immerman and Sharon Shoham and Aleksandr Karbyshev and Mooly Sagiv", title = "Decidability of inferring inductive invariants", journal = j-SIGPLAN, volume = "51", number = "1", pages = "217--231", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837640", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Induction is a successful approach for verification of hardware and software systems. A common practice is to model a system using logical formulas, and then use a decision procedure to verify that some logical formula is an inductive safety invariant for the system. A key ingredient in this approach is coming up with the inductive invariant, which is known as invariant inference. This is a major difficulty, and it is often left for humans or addressed by sound but incomplete abstract interpretation. This paper is motivated by the problem of inductive invariants in shape analysis and in distributed protocols. This paper approaches the general problem of inferring first-order inductive invariants by restricting the language L of candidate invariants. Notice that the problem of invariant inference in a restricted language L differs from the safety problem, since a system may be safe and still not have any inductive invariant in L that proves safety. Clearly, if L is finite (and if testing an inductive invariant is decidable), then inferring invariants in L is decidable. This paper presents some interesting cases when inferring inductive invariants in L is decidable even when L is an infinite language of universal formulas. Decidability is obtained by restricting L and defining a suitable well-quasi-order on the state space. We also present some undecidability results that show that our restrictions are necessary. We further present a framework for systematically constructing infinite languages while keeping the invariant inference problem decidable. We illustrate our approach by showing the decidability of inferring invariants for programs manipulating linked-lists, and for distributed protocols.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Lavaee:2016:HDP, author = "Rahman Lavaee", title = "The hardness of data packing", journal = j-SIGPLAN, volume = "51", number = "1", pages = "232--242", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837669", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A program can benefit from improved cache block utilization when contemporaneously accessed data elements are placed in the same memory block. This can reduce the program's memory block working set and thereby, reduce the capacity miss rate. We formally define the problem of data packing for arbitrary number of blocks in the cache and packing factor (the number of data objects fitting in a cache block) and study how well the optimal solution can be approximated for two dual problems. On the one hand, we show that the cache hit maximization problem is approximable within a constant factor, for every fixed number of blocks in the cache. On the other hand, we show that unless P=NP, the cache miss minimization problem cannot be efficiently approximated.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Gimenez:2016:CI, author = "St{\'e}phane Gimenez and Georg Moser", title = "The complexity of interaction", journal = j-SIGPLAN, volume = "51", number = "1", pages = "243--255", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837646", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we analyze the complexity of functional programs written in the interaction-net computation model, an asynchronous, parallel and confluent model that generalizes linear-logic proof nets. Employing user-defined sized and scheduled types, we certify concrete time, space and space-time complexity bounds for both sequential and parallel reductions of interaction-net programs by suitably assigning complexity potentials to typed nodes. The relevance of this approach is illustrated on archetypal programming examples. The provided analysis is precise, compositional and is, in theory, not restricted to particular complexity classes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Swamy:2016:DTM, author = "Nikhil Swamy and Catalin Hritcu and Chantal Keller and Aseem Rastogi and Antoine Delignat-Lavaud and Simon Forest and Karthikeyan Bhargavan and C{\'e}dric Fournet and Pierre-Yves Strub and Markulf Kohlweiss and Jean-Karim Zinzindohoue and Santiago Zanella-B{\'e}guelin", title = "Dependent types and multi-monadic effects in {F*}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "256--270", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837655", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new, completely redesigned, version of F*, a language that works both as a proof assistant as well as a general-purpose, verification-oriented, effectful programming language. In support of these complementary roles, F* is a dependently typed, higher-order, call-by-value language with _primitive_ effects including state, exceptions, divergence and IO. Although primitive, programmers choose the granularity at which to specify effects by equipping each effect with a monadic, predicate transformer semantics. F* uses this to efficiently compute weakest preconditions and discharges the resulting proof obligations using a combination of SMT solving and manual proofs. Isolated from the effects, the core of F* is a language of pure functions used to write specifications and proof terms---its consistency is maintained by a semantic termination check based on a well-founded order. We evaluate our design on more than 55,000 lines of F* we have authored in the last year, focusing on three main case studies. Showcasing its use as a general-purpose programming language, F* is programmed (but not verified) in F*, and bootstraps in both OCaml and F\#. Our experience confirms F*'s pay-as-you-go cost model: writing idiomatic ML-like code with no finer specifications imposes no user burden. As a verification-oriented language, our most significant evaluation of F* is in verifying several key modules in an implementation of the TLS-1.2 protocol standard. For the modules we considered, we are able to prove more properties, with fewer annotations using F* than in a prior verified implementation of TLS-1.2. Finally, as a proof assistant, we discuss our use of F* in mechanizing the metatheory of a range of lambda calculi, starting from the simply typed lambda calculus to System F-omega and even micro-F*, a sizeable fragment of F* itself---these proofs make essential use of F*'s flexible combination of SMT automation and constructive proofs, enabling a tactic-free style of programming and proving at a relatively large scale.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Borgstrom:2016:FRF, author = "Johannes Borgstr{\"o}m and Andrew D. Gordon and Long Ouyang and Claudio Russo and Adam {\'S}cibior and Marcin Szymczak", title = "{Fabular}: regression formulas as probabilistic programming", journal = j-SIGPLAN, volume = "51", number = "1", pages = "271--283", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837653", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Regression formulas are a domain-specific language adopted by several R packages for describing an important and useful class of statistical models: hierarchical linear regressions. Formulas are succinct, expressive, and clearly popular, so are they a useful addition to probabilistic programming languages? And what do they mean? We propose a core calculus of hierarchical linear regression, in which regression coefficients are themselves defined by nested regressions (unlike in R). We explain how our calculus captures the essence of the formula DSL found in R. We describe the design and implementation of Fabular, a version of the Tabular schema-driven probabilistic programming language, enriched with formulas based on our regression calculus. To the best of our knowledge, this is the first formal description of the core ideas of R's formula notation, the first development of a calculus of regression formulas, and the first demonstration of the benefits of composing regression formulas and latent variables in a probabilistic programming language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Grathwohl:2016:KCN, author = "Bj{\o}rn Bugge Grathwohl and Fritz Henglein and Ulrik Terp Rasmussen and Kristoffer Aalund S{\o}holm and Sebastian Paaske T{\o}rholm", title = "{Kleenex}: compiling nondeterministic transducers to deterministic streaming transducers", journal = j-SIGPLAN, volume = "51", number = "1", pages = "284--297", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837647", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present and illustrate Kleenex, a language for expressing general nondeterministic finite transducers, and its novel compilation to streaming string transducers with essentially optimal streaming behavior, worst-case linear-time performance and sustained high throughput. Its underlying theory is based on transducer decomposition into oracle and action machines: the oracle machine performs streaming greedy disambiguation of the input; the action machine performs the output actions. In use cases Kleenex achieves consistently high throughput rates around the 1 Gbps range on stock hardware. It performs well, especially in complex use cases, in comparison to both specialized and related tools such as GNUawk, GNUsed, GNUgrep, RE2, Ragel and regular-expression libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Long:2016:APG, author = "Fan Long and Martin Rinard", title = "Automatic patch generation by learning correct code", journal = j-SIGPLAN, volume = "51", number = "1", pages = "298--312", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837617", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Prophet, a novel patch generation system that works with a set of successful human patches obtained from open- source software repositories to learn a probabilistic, application-independent model of correct code. It generates a space of candidate patches, uses the model to rank the candidate patches in order of likely correctness, and validates the ranked patches against a suite of test cases to find correct patches. Experimental results show that, on a benchmark set of 69 real-world defects drawn from eight open-source projects, Prophet significantly outperforms the previous state-of-the-art patch generation system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Katz:2016:ETB, author = "Omer Katz and Ran El-Yaniv and Eran Yahav", title = "Estimating types in binaries using predictive modeling", journal = j-SIGPLAN, volume = "51", number = "1", pages = "313--326", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837674", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reverse engineering is an important tool in mitigating vulnerabilities in binaries. As a lot of software is developed in object-oriented languages, reverse engineering of object-oriented code is of critical importance. One of the major hurdles in reverse engineering binaries compiled from object-oriented code is the use of dynamic dispatch. In the absence of debug information, any dynamic dispatch may seem to jump to many possible targets, posing a significant challenge to a reverse engineer trying to track the program flow. We present a novel technique that allows us to statically determine the likely targets of virtual function calls. Our technique uses object tracelets --- statically constructed sequences of operations performed on an object --- to capture potential runtime behaviors of the object. Our analysis automatically pre-labels some of the object tracelets by relying on instances where the type of an object is known. The resulting type-labeled tracelets are then used to train a statistical language model (SLM) for each type.We then use the resulting ensemble of SLMs over unlabeled tracelets to generate a ranking of their most likely types, from which we deduce the likely targets of dynamic dispatches.We have implemented our technique and evaluated it over real-world C++ binaries. Our evaluation shows that when there are multiple alternative targets, our approach can drastically reduce the number of targets that have to be considered by a reverse engineer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Chatterjee:2016:AAQ, author = "Krishnendu Chatterjee and Hongfei Fu and Petr Novotn{\'y} and Rouzbeh Hasheminezhad", title = "Algorithmic analysis of qualitative and quantitative termination problems for affine probabilistic programs", journal = j-SIGPLAN, volume = "51", number = "1", pages = "327--342", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837639", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we consider termination of probabilistic programs with real-valued variables. The questions concerned are: 1. qualitative ones that ask (i) whether the program terminates with probability 1 (almost-sure termination) and (ii) whether the expected termination time is finite (finite termination); 2. quantitative ones that ask (i) to approximate the expected termination time (expectation problem) and (ii) to compute a bound B such that the probability to terminate after B steps decreases exponentially (concentration problem). To solve these questions, we utilize the notion of ranking supermartingales which is a powerful approach for proving termination of probabilistic programs. In detail, we focus on algorithmic synthesis of linear ranking-supermartingales over affine probabilistic programs (APP's) with both angelic and demonic non-determinism. An important subclass of APP's is LRAPP which is defined as the class of all APP's over which a linear ranking-supermartingale exists. Our main contributions are as follows. Firstly, we show that the membership problem of LRAPP (i) can be decided in polynomial time for APP's with at most demonic non-determinism, and (ii) is NP-hard and in PSPACE for APP's with angelic non-determinism; moreover, the NP-hardness result holds already for APP's without probability and demonic non-determinism. Secondly, we show that the concentration problem over LRAPP can be solved in the same complexity as for the membership problem of LRAPP. Finally, we show that the expectation problem over LRAPP can be solved in 2EXPTIME and is PSPACE-hard even for APP's without probability and non-determinism (i.e., deterministic programs). Our experimental results demonstrate the effectiveness of our approach to answer the qualitative and quantitative questions over APP's with at most demonic non-determinism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Singh:2016:TSD, author = "Rishabh Singh and Sumit Gulwani", title = "Transforming spreadsheet data types using examples", journal = j-SIGPLAN, volume = "51", number = "1", pages = "343--356", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837668", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cleaning spreadsheet data types is a common problem faced by millions of spreadsheet users. Data types such as date, time, name, and units are ubiquitous in spreadsheets, and cleaning transformations on these data types involve parsing and pretty printing their string representations. This presents many challenges to users because cleaning such data requires some background knowledge about the data itself and moreover this data is typically non-uniform, unstructured, and ambiguous. Spreadsheet systems and Programming Languages provide some UI-based and programmatic solutions for this problem but they are either insufficient for the user's needs or are beyond their expertise. In this paper, we present a programming by example methodology of cleaning data types that learns the desired transformation from a few input-output examples. We propose a domain specific language with probabilistic semantics that is parameterized with declarative data type definitions. The probabilistic semantics is based on three key aspects: (i) approximate predicate matching, (ii) joint learning of data type interpretation, and (iii) weighted branches. This probabilistic semantics enables the language to handle non-uniform, unstructured, and ambiguous data. We then present a synthesis algorithm that learns the desired program in this language from a set of input-output examples. We have implemented our algorithm as an Excel add-in and present its successful evaluation on 55 benchmark problems obtained from online help forums and Excel product team.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Lesani:2016:CCC, author = "Mohsen Lesani and Christian J. Bell and Adam Chlipala", title = "{Chapar}: certified causally consistent distributed key-value stores", journal = j-SIGPLAN, volume = "51", number = "1", pages = "357--370", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837622", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's Internet services are often expected to stay available and render high responsiveness even in the face of site crashes and network partitions. Theoretical results state that causal consistency is one of the strongest consistency guarantees that is possible under these requirements, and many practical systems provide causally consistent key-value stores. In this paper, we present a framework called Chapar for modular verification of causal consistency for replicated key-value store implementations and their client programs. Specifically, we formulate separate correctness conditions for key-value store implementations and for their clients. The interface between the two is a novel operational semantics for causal consistency. We have verified the causal consistency of two key-value store implementations from the literature using a novel proof technique. We have also implemented a simple automatic model checker for the correctness of client programs. The two independently verified results for the implementations and clients can be composed to conclude the correctness of any of the programs when executed with any of the implementations. We have developed and checked our framework in Coq, extracted it to OCaml, and built executable stores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Gotsman:2016:CIS, author = "Alexey Gotsman and Hongseok Yang and Carla Ferreira and Mahsa Najafzadeh and Marc Shapiro", title = "'{Cause} {I}'m strong enough': Reasoning about consistency choices in distributed systems", journal = j-SIGPLAN, volume = "51", number = "1", pages = "371--384", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837625", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale distributed systems often rely on replicated databases that allow a programmer to request different data consistency guarantees for different operations, and thereby control their performance. Using such databases is far from trivial: requesting stronger consistency in too many places may hurt performance, and requesting it in too few places may violate correctness. To help programmers in this task, we propose the first proof rule for establishing that a particular choice of consistency guarantees for various operations on a replicated database is enough to ensure the preservation of a given data integrity invariant. Our rule is modular: it allows reasoning about the behaviour of every operation separately under some assumption on the behaviour of other operations. This leads to simple reasoning, which we have automated in an SMT-based tool. We present a nontrivial proof of soundness of our rule and illustrate its use on several examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Liang:2016:PLC, author = "Hongjin Liang and Xinyu Feng", title = "A program logic for concurrent objects under fair scheduling", journal = j-SIGPLAN, volume = "51", number = "1", pages = "385--399", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837635", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing work on verifying concurrent objects is mostly concerned with safety only, e.g., partial correctness or linearizability. Although there has been recent work verifying lock-freedom of non-blocking objects, much less efforts are focused on deadlock-freedom and starvation-freedom, progress properties of blocking objects. These properties are more challenging to verify than lock-freedom because they allow the progress of one thread to depend on the progress of another, assuming fair scheduling. We propose LiLi, a new rely-guarantee style program logic for verifying linearizability and progress together for concurrent objects under fair scheduling. The rely-guarantee style logic unifies thread-modular reasoning about both starvation-freedom and deadlock-freedom in one framework. It also establishes progress-aware abstraction for concurrent objects, which can be applied when verifying safety and liveness of client code. We have successfully applied the logic to verify starvation-freedom or deadlock-freedom of representative algorithms such as ticket locks, queue locks, lock-coupling lists, optimistic lists and lazy lists.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Dragoi:2016:PPS, author = "Cezara Dragoi and Thomas A. Henzinger and Damien Zufferey", title = "{PSync}: a partially synchronous language for fault-tolerant distributed algorithms", journal = j-SIGPLAN, volume = "51", number = "1", pages = "400--415", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837650", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Fault-tolerant distributed algorithms play an important role in many critical/high-availability applications. These algorithms are notoriously difficult to implement correctly, due to asynchronous communication and the occurrence of faults, such as the network dropping messages or computers crashing. We introduce PSync, a domain specific language based on the Heard-Of model, which views asynchronous faulty systems as synchronous ones with an adversarial environment that simulates asynchrony and faults by dropping messages. We define a runtime system for PSync that efficiently executes on asynchronous networks. We formalise the relation between the runtime system and PSync in terms of observational refinement. The high-level lockstep abstraction introduced by PSync simplifies the design and implementation of fault-tolerant distributed algorithms and enables automated formal verification. We have implemented an embedding of PSync in the Scala programming language with a runtime system for partially synchronous networks. We show the applicability of PSync by implementing several important fault-tolerant distributed algorithms and we compare the implementation of consensus algorithms in PSync against implementations in other languages in terms of code size, runtime efficiency, and verification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Chen:2016:PTI, author = "Sheng Chen and Martin Erwig", title = "Principal type inference for {GADTs}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "416--428", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837665", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new method for GADT type inference that improves the precision of previous approaches. In particular, our approach accepts more type-correct programs than previous approaches when they do not employ type annotations. A side benefit of our approach is that it can detect a wide range of runtime errors that are missed by previous approaches. Our method is based on the idea to represent type refinements in pattern-matching branches by choice types, which facilitate a separation of the typing and reconciliation phases and thus support case expressions. This idea is formalized in a type system, which is both sound and a conservative extension of the classical Hindley--Milner system. We present the results of an empirical evaluation that compares our algorithm with previous approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Garcia:2016:AGT, author = "Ronald Garcia and Alison M. Clark and {\'E}ric Tanter", title = "Abstracting gradual typing", journal = j-SIGPLAN, volume = "51", number = "1", pages = "429--442", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837670", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Language researchers and designers have extended a wide variety of type systems to support gradual typing, which enables languages to seamlessly combine dynamic and static checking. These efforts consistently demonstrate that designing a satisfactory gradual counterpart to a static type system is challenging, and this challenge only increases with the sophistication of the type system. Gradual type system designers need more formal tools to help them conceptualize, structure, and evaluate their designs. In this paper, we propose a new formal foundation for gradual typing, drawing on principles from abstract interpretation to give gradual types a semantics in terms of pre-existing static types. Abstracting Gradual Typing (AGT for short) yields a formal account of consistency---one of the cornerstones of the gradual typing approach---that subsumes existing notions of consistency, which were developed through intuition and ad hoc reasoning. Given a syntax-directed static typing judgment, the AGT approach induces a corresponding gradual typing judgment. Then the type safety proof for the underlying static discipline induces a dynamic semantics for gradual programs defined over source-language typing derivations. The AGT approach does not resort to an externally justified cast calculus: instead, run-time checks naturally arise by deducing evidence for consistent judgments during proof reduction. To illustrate the approach, we develop a novel gradually-typed counterpart for a language with record subtyping. Gradual languages designed with the AGT approach satisfy by construction the refined criteria for gradual typing set forth by Siek and colleagues.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Cimini:2016:GMA, author = "Matteo Cimini and Jeremy G. Siek", title = "The gradualizer: a methodology and algorithm for generating gradual type systems", journal = j-SIGPLAN, volume = "51", number = "1", pages = "443--455", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837632", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many languages are beginning to integrate dynamic and static typing. Siek and Taha offered gradual typing as an approach to this integration that provides a coherent and full-span migration between the two disciplines. However, the literature lacks a general methodology for designing gradually typed languages. Our first contribution is to provide a methodology for deriving the gradual type system and the compilation to the cast calculus. Based on this methodology, we present the Gradualizer, an algorithm that generates a gradual type system from a well-formed type system and also generates a compiler to the cast calculus. Our algorithm handles a large class of type systems and generates systems that are correct with respect to the formal criteria of gradual typing. We also report on an implementation of the Gradualizer that takes a type system expressed in lambda-prolog and outputs its gradually typed version and a compiler to the cast calculus in lambda-prolog.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Takikawa:2016:SGT, author = "Asumu Takikawa and Daniel Feltey and Ben Greenman and Max S. New and Jan Vitek and Matthias Felleisen", title = "Is sound gradual typing dead?", journal = j-SIGPLAN, volume = "51", number = "1", pages = "456--468", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837630", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers have come to embrace dynamically-typed languages for prototyping and delivering large and complex systems. When it comes to maintaining and evolving these systems, the lack of explicit static typing becomes a bottleneck. In response, researchers have explored the idea of gradually-typed programming languages which allow the incremental addition of type annotations to software written in one of these untyped languages. Some of these new, hybrid languages insert run-time checks at the boundary between typed and untyped code to establish type soundness for the overall system. With sound gradual typing, programmers can rely on the language implementation to provide meaningful error messages when type invariants are violated. While most research on sound gradual typing remains theoretical, the few emerging implementations suffer from performance overheads due to these checks. None of the publications on this topic comes with a comprehensive performance evaluation. Worse, a few report disastrous numbers. In response, this paper proposes a method for evaluating the performance of gradually-typed programming languages. The method hinges on exploring the space of partial conversions from untyped to typed. For each benchmark, the performance of the different versions is reported in a synthetic metric that associates runtime overhead to conversion effort. The paper reports on the results of applying the method to Typed Racket, a mature implementation of sound gradual typing, using a suite of real-world programs of various sizes and complexities. Based on these results the paper concludes that, given the current state of implementation technologies, sound gradual typing faces significant challenges. Conversely, it raises the question of how implementations could reduce the overheads associated with soundness and how tools could be used to steer programmers clear from pathological cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Octeau:2016:CSA, author = "Damien Octeau and Somesh Jha and Matthew Dering and Patrick McDaniel and Alexandre Bartel and Li Li and Jacques Klein and Yves {Le Traon}", title = "Combining static analysis with probabilistic models to enable market-scale {Android} inter-component analysis", journal = j-SIGPLAN, volume = "51", number = "1", pages = "469--484", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837661", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static analysis has been successfully used in many areas, from verifying mission-critical software to malware detection. Unfortunately, static analysis often produces false positives, which require significant manual effort to resolve. In this paper, we show how to overlay a probabilistic model, trained using domain knowledge, on top of static analysis results, in order to triage static analysis results. We apply this idea to analyzing mobile applications. Android application components can communicate with each other, both within single applications and between different applications. Unfortunately, techniques to statically infer Inter-Component Communication (ICC) yield many potential inter-component and inter-application links, most of which are false positives. At large scales, scrutinizing all potential links is simply not feasible. We therefore overlay a probabilistic model of ICC on top of static analysis results. Since computing the inter-component links is a prerequisite to inter-component analysis, we introduce a formalism for inferring ICC links based on set constraints. We design an efficient algorithm for performing link resolution. We compute all potential links in a corpus of 11,267 applications in 30 minutes and triage them using our probabilistic approach. We find that over 95.1\% of all 636 million potential links are associated with probability values below 0.01 and are thus likely unfeasible links. Thus, it is possible to consider only a small subset of all links without significant loss of information. This work is the first significant step in making static inter-application analysis more tractable, even at large scales.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Grigore:2016:ARG, author = "Radu Grigore and Hongseok Yang", title = "Abstraction refinement guided by a learnt probabilistic model", journal = j-SIGPLAN, volume = "51", number = "1", pages = "485--498", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837663", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The core challenge in designing an effective static program analysis is to find a good program abstraction --- one that retains only details relevant to a given query. In this paper, we present a new approach for automatically finding such an abstraction. Our approach uses a pessimistic strategy, which can optionally use guidance from a probabilistic model. Our approach applies to parametric static analyses implemented in Datalog, and is based on counterexample-guided abstraction refinement. For each untried abstraction, our probabilistic model provides a probability of success, while the size of the abstraction provides an estimate of its cost in terms of analysis time. Combining these two metrics, probability and cost, our refinement algorithm picks an optimal abstraction. Our probabilistic model is a variant of the Erdos--Renyi random graph model, and it is tunable by what we call hyperparameters. We present a method to learn good values for these hyperparameters, by observing past runs of the analysis on an existing codebase. We evaluate our approach on an object sensitive pointer analysis for Java programs, with two client analyses (PolySite and Downcast).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Garg:2016:LIU, author = "Pranav Garg and Daniel Neider and P. Madhusudan and Dan Roth", title = "Learning invariants using decision trees and implication counterexamples", journal = j-SIGPLAN, volume = "51", number = "1", pages = "499--512", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837664", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Inductive invariants can be robustly synthesized using a learning model where the teacher is a program verifier who instructs the learner through concrete program configurations, classified as positive, negative, and implications. We propose the first learning algorithms in this model with implication counter-examples that are based on machine learning techniques. In particular, we extend classical decision-tree learning algorithms in machine learning to handle implication samples, building new scalable ways to construct small decision trees using statistical measures. We also develop a decision-tree learning algorithm in this model that is guaranteed to converge to the right concept (invariant) if one exists. We implement the learners and an appropriate teacher, and show that the resulting invariant synthesis is efficient and convergent for a large suite of programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Emmi:2016:SAD, author = "Michael Emmi and Constantin Enea", title = "Symbolic abstract data type inference", journal = j-SIGPLAN, volume = "51", number = "1", pages = "513--525", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837645", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Formal specification is a vital ingredient to scalable verification of software systems. In the case of efficient implementations of concurrent objects like atomic registers, queues, and locks, symbolic formal representations of their abstract data types (ADTs) enable efficient modular reasoning, decoupling clients from implementations. Writing adequate formal specifications, however, is a complex task requiring rare expertise. In practice, programmers write reference implementations as informal specifications. In this work we demonstrate that effective symbolic ADT representations can be automatically generated from the executions of reference implementations. Our approach exploits two key features of naturally-occurring ADTs: violations can be decomposed into a small set of representative patterns, and these patterns manifest in executions with few operations. By identifying certain algebraic properties of naturally-occurring ADTs, and exhaustively sampling executions up to a small number of operations, we generate concise symbolic ADT representations which are complete in practice, enabling the application of efficient symbolic verification algorithms without the burden of manual specification. Furthermore, the concise ADT violation patterns we generate are human-readable, and can serve as useful, formal documentation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Bhaskaracharya:2016:SIA, author = "Somashekaracharya G. Bhaskaracharya and Uday Bondhugula and Albert Cohen", title = "{SMO}: an integrated approach to intra-array and inter-array storage optimization", journal = j-SIGPLAN, volume = "51", number = "1", pages = "526--538", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837636", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The polyhedral model provides an expressive intermediate representation that is convenient for the analysis and subsequent transformation of affine loop nests. Several heuristics exist for achieving complex program transformations in this model. However, there is also considerable scope to utilize this model to tackle the problem of automatic memory footprint optimization. In this paper, we present a new automatic storage optimization technique which can be used to achieve both intra-array as well as inter-array storage reuse with a pre-determined schedule for the computation. Our approach works by finding statement-wise storage partitioning hyperplanes that partition a unified global array space so that values with overlapping live ranges are not mapped to the same partition. Our heuristic is driven by a fourfold objective function which not only minimizes the dimensionality and storage requirements of arrays required for each high-level statement, but also maximizes inter-statement storage reuse. The storage mappings obtained using our heuristic can be asymptotically better than those obtained by any existing technique. We implement our technique and demonstrate its practical impact by evaluating its effectiveness on several benchmarks chosen from the domains of image processing, stencil computations, and high-performance computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Bao:2016:PDV, author = "Wenlei Bao and Sriram Krishnamoorthy and Louis-No{\"e}l Pouchet and Fabrice Rastello and P. Sadayappan", title = "{PolyCheck}: dynamic verification of iteration space transformations on affine programs", journal = j-SIGPLAN, volume = "51", number = "1", pages = "539--554", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837656", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High-level compiler transformations, especially loop transformations, are widely recognized as critical optimizations to restructure programs to improve data locality and expose parallelism. Guaranteeing the correctness of program transformations is essential, and to date three main approaches have been developed: proof of equivalence of affine programs, matching the execution traces of programs, and checking bit-by-bit equivalence of program outputs. Each technique suffers from limitations in the kind of transformations supported, space complexity, or the sensitivity to the testing dataset. In this paper, we take a novel approach that addresses all three limitations to provide an automatic bug checker to verify any iteration reordering transformations on affine programs, including non-affine transformations, with space consumption proportional to the original program data and robust to arbitrary datasets of a given size. We achieve this by exploiting the structure of affine program control- and data-flow to generate at compile-time lightweight checker code to be executed within the transformed program. Experimental results assess the correctness and effectiveness of our method and its increased coverage over previous approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Andrysco:2016:PFP, author = "Marc Andrysco and Ranjit Jhala and Sorin Lerner", title = "Printing floating-point numbers: a faster, always correct method", journal = j-SIGPLAN, volume = "51", number = "1", pages = "555--567", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837654", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Floating-point numbers are an essential part of modern software, recently gaining particular prominence on the web as the exclusive numeric format of Javascript. To use floating-point numbers, we require a way to convert binary machine representations into human readable decimal outputs. Existing conversion algorithms make trade-offs between completeness and performance. The classic Dragon4 algorithm by Steele and White and its later refinements achieve completeness --- i.e. produce correct and optimal outputs on all inputs --- by using arbitrary precision integer (bignum) arithmetic which leads to a high performance cost. On the other hand, the recent Grisu3 algorithm by Loitsch shows how to recover performance by using native integer arithmetic but sacrifices optimality for 0.5\% of all inputs. We present Errol, a new complete algorithm that is guaranteed to produce correct and optimal results for all inputs while simultaneously being 2x faster than the incomplete Grisu3 and 4x faster than previous complete methods.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Orchard:2016:ESS, author = "Dominic Orchard and Nobuko Yoshida", title = "Effects as sessions, sessions as effects", journal = j-SIGPLAN, volume = "51", number = "1", pages = "568--581", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837634", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effect and session type systems are two expressive behavioural type systems. The former is usually developed in the context of the lambda-calculus and its variants, the latter for the pi-calculus. In this paper we explore their relative expressive power. Firstly, we give an embedding from PCF, augmented with a parameterised effect system, into a session-typed pi-calculus (session calculus), showing that session types are powerful enough to express effects. Secondly, we give a reverse embedding, from the session calculus back into PCF, by instantiating PCF with concurrency primitives and its effect system with a session-like effect algebra; effect systems are powerful enough to express sessions. The embedding of session types into an effect system is leveraged to give a new implementation of session types in Haskell, via an effect system encoding. The correctness of this implementation follows from the second embedding result. We also discuss various extensions to our embeddings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Jia:2016:MBA, author = "Limin Jia and Hannah Gommerstadt and Frank Pfenning", title = "Monitors and blame assignment for higher-order session types", journal = j-SIGPLAN, volume = "51", number = "1", pages = "582--594", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837662", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Session types provide a means to prescribe the communication behavior between concurrent message-passing processes. However, in a distributed setting, some processes may be written in languages that do not support static typing of sessions or may be compromised by a malicious intruder, violating invariants of the session types. In such a setting, dynamically monitoring communication between processes becomes a necessity for identifying undesirable actions. In this paper, we show how to dynamically monitor communication to enforce adherence to session types in a higher-order setting. We present a system of blame assignment in the case when the monitor detects an undesirable action and an alarm is raised. We prove that dynamic monitoring does not change system behavior for welltyped processes, and that one of an indicated set of possible culprits must have been compromised in case of an alarm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Sangiorgi:2016:EBP, author = "Davide Sangiorgi and Valeria Vignudelli", title = "Environmental bisimulations for probabilistic higher-order languages", journal = j-SIGPLAN, volume = "51", number = "1", pages = "595--607", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837651", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Environmental bisimulations for probabilistic higher-order languages are studied. In contrast with applicative bisimulations, environmental bisimulations are known to be more robust and do not require sophisticated techniques such as Howe's in the proofs of congruence. As representative calculi, call-by-name and call-by-value \lambda - calculus, and a (call-by-value) \lambda -calculus extended with references (i.e., a store) are considered. In each case full abstraction results are derived for probabilistic environmental similarity and bisimilarity with respect to contextual preorder and contextual equivalence, respectively. Some possible enhancements of the (bi)simulations, as `up-to techniques', are also presented. Probabilities force a number of modifications to the definition of environmental bisimulations in non-probabilistic languages. Some of these modifications are specific to probabilities, others may be seen as general refinements of environmental bisimulations, applicable also to non-probabilistic languages. Several examples are presented, to illustrate the modifications and the differences.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Flur:2016:MAA, author = "Shaked Flur and Kathryn E. Gray and Christopher Pulte and Susmit Sarkar and Ali Sezgin and Luc Maranget and Will Deacon and Peter Sewell", title = "Modelling the {ARMv8} architecture, operationally: concurrency and {ISA}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "608--621", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837615", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we develop semantics for key aspects of the ARMv8 multiprocessor architecture: the concurrency model and much of the 64-bit application-level instruction set (ISA). Our goal is to clarify what the range of architecturally allowable behaviour is, and thereby to support future work on formal verification, analysis, and testing of concurrent ARM software and hardware. Establishing such models with high confidence is intrinsically difficult: it involves capturing the vendor's architectural intent, aspects of which (especially for concurrency) have not previously been precisely defined. We therefore first develop a concurrency model with a microarchitectural flavour, abstracting from many hardware implementation concerns but still close to hardware-designer intuition. This means it can be discussed in detail with ARM architects. We then develop a more abstract model, better suited for use as an architectural specification, which we prove sound w.r.t.~the first. The instruction semantics involves further difficulties, handling the mass of detail and the subtle intensional information required to interface to the concurrency model. We have a novel ISA description language, with a lightweight dependent type system, letting us do both with a rather direct representation of the ARM reference manual instruction descriptions. We build a tool from the combined semantics that lets one explore, either interactively or exhaustively, the full range of architecturally allowed behaviour, for litmus tests and (small) ELF executables. We prove correctness of some optimisations needed for tool performance. We validate the models by discussion with ARM staff, and by comparison against ARM hardware behaviour, for ISA single- instruction tests and concurrent litmus tests.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Pichon-Pharabod:2016:CSR, author = "Jean Pichon-Pharabod and Peter Sewell", title = "A concurrency semantics for relaxed atomics that permits optimisation and avoids thin-air executions", journal = j-SIGPLAN, volume = "51", number = "1", pages = "622--633", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837616", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite much research on concurrent programming languages, especially for Java and C/C++, we still do not have a satisfactory definition of their semantics, one that admits all common optimisations without also admitting undesired behaviour. Especially problematic are the ``thin-air'' examples involving high-performance concurrent accesses, such as C/C++11 relaxed atomics. The C/C++11 model is in a per-candidate-execution style, and previous work has identified a tension between that and the fact that compiler optimisations do not operate over single candidate executions in isolation; rather, they operate over syntactic representations that represent all executions. In this paper we propose a novel approach that circumvents this difficulty. We define a concurrency semantics for a core calculus, including relaxed-atomic and non-atomic accesses, and locks, that admits a wide range of optimisation while still forbidding the classic thin-air examples. It also addresses other problems relating to undefined behaviour. The basic idea is to use an event-structure representation of the current state of each thread, capturing all of its potential executions, and to permit interleaving of execution and transformation steps over that to reflect optimisation (possibly dynamic) of the code. These are combined with a non-multi-copy-atomic storage subsystem, to reflect common hardware behaviour. The semantics is defined in a mechanised and executable form, and designed to be implementable above current relaxed hardware and strong enough to support the programming idioms that C/C++11 does for this fragment. It offers a potential way forward for concurrent programming language semantics, beyond the current C/C++11 and Java models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Batty:2016:OSA, author = "Mark Batty and Alastair F. Donaldson and John Wickerson", title = "Overhauling {SC} atomics in {C11} and {OpenCL}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "634--648", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837637", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the conceptual simplicity of sequential consistency (SC), the semantics of SC atomic operations and fences in the C11 and OpenCL memory models is subtle, leading to convoluted prose descriptions that translate to complex axiomatic formalisations. We conduct an overhaul of SC atomics in C11, reducing the associated axioms in both number and complexity. A consequence of our simplification is that the SC operations in an execution no longer need to be totally ordered. This relaxation enables, for the first time, efficient and exhaustive simulation of litmus tests that use SC atomics. We extend our improved C11 model to obtain the first rigorous memory model formalisation for OpenCL (which extends C11 with support for heterogeneous many-core programming). In the OpenCL setting, we refine the SC axioms still further to give a sensible semantics to SC operations that employ a `memory scope' to restrict their visibility to specific threads. Our overhaul requires slight strengthenings of both the C11 and the OpenCL memory models, causing some behaviours to become disallowed. We argue that these strengthenings are natural, and that all of the formalised C11 and OpenCL compilation schemes of which we are aware (Power and x86 CPUs for C11, AMD GPUs for OpenCL) remain valid in our revised models. Using the HERD memory model simulator, we show that our overhaul leads to an exponential improvement in simulation time for C11 litmus tests compared with the original model, making *exhaustive* simulation competitive, time-wise, with the *non-exhaustive* CDSChecker tool.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Lahav:2016:TRA, author = "Ori Lahav and Nick Giannarakis and Viktor Vafeiadis", title = "Taming release-acquire consistency", journal = j-SIGPLAN, volume = "51", number = "1", pages = "649--662", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837643", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a strengthening of the release-acquire fragment of the C11 memory model that (i) forbids dubious behaviors that are not observed in any implementation; (ii) supports fence instructions that restore sequential consistency; and (iii) admits an equivalent intuitive operational semantics based on point-to-point communication. This strengthening has no additional implementation cost: it allows the same local optimizations as C11 release and acquire accesses, and has exactly the same compilation schemes to the x86-TSO and Power architectures. In fact, the compilation to Power is complete with respect to a recent axiomatic model of Power; that is, the compiled program exhibits exactly the same behaviors as the source one. Moreover, we provide criteria for placing enough fence instructions to ensure sequential consistency, and apply them to an efficient RCU implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Reps:2016:NPA, author = "Thomas Reps and Emma Turetsky and Prathmesh Prabhu", title = "{Newtonian} program analysis via tensor product", journal = j-SIGPLAN, volume = "51", number = "1", pages = "663--677", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837659", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recently, Esparza et al. generalized Newton's method --- a numerical-analysis algorithm for finding roots of real-valued functions---to a method for finding fixed-points of systems of equations over semirings. Their method provides a new way to solve interprocedural dataflow-analysis problems. As in its real-valued counterpart, each iteration of their method solves a simpler ``linearized'' problem. One of the reasons this advance is exciting is that some numerical analysts have claimed that ```all' effective and fast iterative [numerical] methods are forms (perhaps very disguised) of Newton's method.'' However, there is an important difference between the dataflow-analysis and numerical-analysis contexts: when Newton's method is used on numerical-analysis problems, multiplicative commutativity is relied on to rearrange expressions of the form ``c*X + X*d'' into ``(c+d) * X.'' Such equations correspond to path problems described by regular languages. In contrast, when Newton's method is used for interprocedural dataflow analysis, the ``multiplication'' operation involves function composition, and hence is non-commutative: ``c*X + X*d'' cannot be rearranged into ``(c+d) * X.'' Such equations correspond to path problems described by linear context-free languages (LCFLs). In this paper, we present an improved technique for solving the LCFL sub-problems produced during successive rounds of Newton's method. Our method applies to predicate abstraction, on which most of today's software model checkers rely.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Wu:2016:CEA, author = "Rongxin Wu and Xiao Xiao and Shing-Chi Cheung and Hongyu Zhang and Charles Zhang", title = "{Casper}: an efficient approach to call trace collection", journal = j-SIGPLAN, volume = "51", number = "1", pages = "678--690", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837619", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Call traces, i.e., sequences of function calls and returns, are fundamental to a wide range of program analyses such as bug reproduction, fault diagnosis, performance analysis, and many others. The conventional approach to collect call traces that instruments each function call and return site incurs large space and time overhead. Our approach aims at reducing the recording overheads by instrumenting only a small amount of call sites while keeping the capability of recovering the full trace. We propose a call trace model and a logged call trace model based on an LL(1) grammar, which enables us to define the criteria of a feasible solution to call trace collection. Based on the two models, we prove that to collect call traces with minimal instrumentation is an NP-hard problem. We then propose an efficient approach to obtaining a suboptimal solution. We implemented our approach as a tool Casper and evaluated it using the DaCapo benchmark suite. The experiment results show that our approach causes significantly lower runtime (and space) overhead than two state-of-the-arts approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Gilray:2016:PCF, author = "Thomas Gilray and Steven Lyde and Michael D. Adams and Matthew Might and David {Van Horn}", title = "Pushdown control-flow analysis for free", journal = j-SIGPLAN, volume = "51", number = "1", pages = "691--704", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837631", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traditional control-flow analysis (CFA) for higher-order languages introduces spurious connections between callers and callees, and different invocations of a function may pollute each other's return flows. Recently, three distinct approaches have been published that provide perfect call-stack precision in a computable manner: CFA2, PDCFA, and AAC. Unfortunately, implementing CFA2 and PDCFA requires significant engineering effort. Furthermore, all three are computationally expensive. For a monovariant analysis, CFA2 is in O(2^n), PDCFA is in O(n^6), and AAC is in O(n^8). In this paper, we describe a new technique that builds on these but is both straightforward to implement and computationally inexpensive. The crucial insight is an unusual state-dependent allocation strategy for the addresses of continuations. Our technique imposes only a constant-factor overhead on the underlying analysis and costs only O(n^3) in the monovariant case. We present the intuitions behind this development, benchmarks demonstrating its efficacy, and a proof of the precision of this analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Flatt:2016:BSS, author = "Matthew Flatt", title = "Binding as sets of scopes", journal = j-SIGPLAN, volume = "51", number = "1", pages = "705--717", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837620", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Our new macro expander for Racket builds on a novel approach to hygiene. Instead of basing macro expansion on variable renamings that are mediated by expansion history, our new expander tracks binding through a set of scopes that an identifier acquires from both binding forms and macro expansions. The resulting model of macro expansion is simpler and more uniform than one based on renaming, and it is sufficiently compatible with Racket's old expander to be practical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Hasuo:2016:LTP, author = "Ichiro Hasuo and Shunsuke Shimizu and Corina C{\^\i}rstea", title = "Lattice-theoretic progress measures and coalgebraic model checking", journal = j-SIGPLAN, volume = "51", number = "1", pages = "718--732", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837673", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the context of formal verification in general and model checking in particular, parity games serve as a mighty vehicle: many problems are encoded as parity games, which are then solved by the seminal algorithm by Jurdzinski. In this paper we identify the essence of this workflow to be the notion of progress measure, and formalize it in general, possibly infinitary, lattice-theoretic terms. Our view on progress measures is that they are to nested/alternating fixed points what invariants are to safety/greatest fixed points, and what ranking functions are to liveness/least fixed points. That is, progress measures are combination of the latter two notions (invariant and ranking function) that have been extensively studied in the context of (program) verification. We then apply our theory of progress measures to a general model-checking framework, where systems are categorically presented as coalgebras. The framework's theoretical robustness is witnessed by a smooth transfer from the branching-time setting to the linear-time one. Although the framework can be used to derive some decision procedures for finite settings, we also expect the proposed framework to form a basis for sound proof methods for some undecidable/infinitary problems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Chatterjee:2016:AAP, author = "Krishnendu Chatterjee and Amir Kafshdar Goharshady and Rasmus Ibsen-Jensen and Andreas Pavlogiannis", title = "Algorithms for algebraic path properties in concurrent systems of constant treewidth components", journal = j-SIGPLAN, volume = "51", number = "1", pages = "733--747", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837624", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study algorithmic questions for concurrent systems where the transitions are labeled from a complete, closed semiring, and path properties are algebraic with semiring operations. The algebraic path properties can model dataflow analysis problems, the shortest path problem, and many other natural problems that arise in program analysis. We consider that each component of the concurrent system is a graph with constant treewidth, a property satisfied by the controlflow graphs of most programs. We allow for multiple possible queries, which arise naturally in demand driven dataflow analysis. The study of multiple queries allows us to consider the tradeoff between the resource usage of the one-time preprocessing and for each individual query. The traditional approach constructs the product graph of all components and applies the best-known graph algorithm on the product. In this approach, even the answer to a single query requires the transitive closure (i.e., the results of all possible queries), which provides no room for tradeoff between preprocessing and query time. Our main contributions are algorithms that significantly improve the worst-case running time of the traditional approach, and provide various tradeoffs depending on the number of queries. For example, in a concurrent system of two components, the traditional approach requires hexic time in the worst case for answering one query as well as computing the transitive closure, whereas we show that with one-time preprocessing in almost cubic time, each subsequent query can be answered in at most linear time, and even the transitive closure can be computed in almost quartic time. Furthermore, we establish conditional optimality results showing that the worst-case running time of our algorithms cannot be improved without achieving major breakthroughs in graph algorithms (i.e., improving the worst-case bound for the shortest path problem in general graphs). Preliminary experimental results show that our algorithms perform favorably on several benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Muroya:2016:MGI, author = "Koko Muroya and Naohiko Hoshino and Ichiro Hasuo", title = "Memoryful geometry of interaction {II}: recursion and adequacy", journal = j-SIGPLAN, volume = "51", number = "1", pages = "748--760", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837672", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A general framework of Memoryful Geometry of Interaction (mGoI) is introduced recently by the authors. It provides a sound translation of lambda-terms (on the high-level) to their realizations by stream transducers (on the low-level), where the internal states of the latter (called memories) are exploited for accommodating algebraic effects of Plotkin and Power. The translation is compositional, hence ``denotational,'' where transducers are inductively composed using an adaptation of Barbosa's coalgebraic component calculus. In the current paper we extend the mGoI framework and provide a systematic treatment of recursion---an essential feature of programming languages that was however missing in our previous work. Specifically, we introduce two new fixed-point operators in the coalgebraic component calculus. The two follow the previous work on recursion in GoI and are called Girard style and Mackie style: the former obviously exhibits some nice domain-theoretic properties, while the latter allows simpler construction. Their equivalence is established on the categorical (or, traced monoidal) level of abstraction, and is therefore generic with respect to the choice of algebraic effects. Our main result is an adequacy theorem of our mGoI translation, against Plotkin and Power's operational semantics for algebraic effects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Raychev:2016:LPN, author = "Veselin Raychev and Pavol Bielik and Martin Vechev and Andreas Krause", title = "Learning programs from noisy data", journal = j-SIGPLAN, volume = "51", number = "1", pages = "761--774", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837671", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new approach for learning programs from noisy datasets. Our approach is based on two new concepts: a regularized program generator which produces a candidate program based on a small sample of the entire dataset while avoiding overfitting, and a dataset sampler which carefully samples the dataset by leveraging the candidate program's score on that dataset. The two components are connected in a continuous feedback-directed loop. We show how to apply this approach to two settings: one where the dataset has a bound on the noise, and another without a noise bound. The second setting leads to a new way of performing approximate empirical risk minimization on hypotheses classes formed by a discrete search space. We then present two new kinds of program synthesizers which target the two noise settings. First, we introduce a novel regularized bitstream synthesizer that successfully generates programs even in the presence of incorrect examples. We show that the synthesizer can detect errors in the examples while combating overfitting --- a major problem in existing synthesis techniques. We also show how the approach can be used in a setting where the dataset grows dynamically via new examples (e.g., provided by a human). Second, we present a novel technique for constructing statistical code completion systems. These are systems trained on massive datasets of open source programs, also known as ``Big Code''. The key idea is to introduce a domain specific language (DSL) over trees and to learn functions in that DSL directly from the dataset. These learned functions then condition the predictions made by the system. This is a flexible and powerful technique which generalizes several existing works as we no longer need to decide a priori on what the prediction should be conditioned (another benefit is that the learned functions are a natural mechanism for explaining the prediction). As a result, our code completion system surpasses the prediction capabilities of existing, hard-wired systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Bornholt:2016:OSM, author = "James Bornholt and Emina Torlak and Dan Grossman and Luis Ceze", title = "Optimizing synthesis with metasketches", journal = j-SIGPLAN, volume = "51", number = "1", pages = "775--788", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837666", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many advanced programming tools---for both end-users and expert developers---rely on program synthesis to automatically generate implementations from high-level specifications. These tools often need to employ tricky, custom-built synthesis algorithms because they require synthesized programs to be not only correct, but also optimal with respect to a desired cost metric, such as program size. Finding these optimal solutions efficiently requires domain-specific search strategies, but existing synthesizers hard-code the strategy, making them difficult to reuse. This paper presents metasketches, a general framework for specifying and solving optimal synthesis problems. metasketches make the search strategy a part of the problem definition by specifying a fragmentation of the search space into an ordered set of classic sketches. We provide two cooperating search algorithms to effectively solve metasketches. A global optimizing search coordinates the activities of local searches, informing them of the costs of potentially-optimal solutions as they explore different regions of the candidate space in parallel. The local searches execute an incremental form of counterexample-guided inductive synthesis to incorporate information sent from the global search. We present Synapse, an implementation of these algorithms, and show that it effectively solves optimal synthesis problems with a variety of different cost functions. In addition, metasketches can be used to accelerate classic (non-optimal) synthesis by explicitly controlling the search strategy, and we show that Synapse solves classic synthesis problems that state-of-the-art tools cannot.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Albarghouthi:2016:MSS, author = "Aws Albarghouthi and Isil Dillig and Arie Gurfinkel", title = "Maximal specification synthesis", journal = j-SIGPLAN, volume = "51", number = "1", pages = "789--801", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837628", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many problems in program analysis, verification, and synthesis require inferring specifications of unknown procedures. Motivated by a broad range of applications, we formulate the problem of maximal specification inference: Given a postcondition Phi and a program P calling a set of unknown procedures F_1,...,F_n, what are the most permissive specifications of procedures F_i that ensure correctness of P? In other words, we are looking for the smallest number of assumptions we need to make about the behaviours of F_i in order to prove that $P$ satisfies its postcondition. To solve this problem, we present a novel approach that utilizes a counterexample-guided inductive synthesis loop and reduces the maximal specification inference problem to multi-abduction. We formulate the novel notion of multi-abduction as a generalization of classical logical abduction and present an algorithm for solving multi-abduction problems. On the practical side, we evaluate our specification inference technique on a range of benchmarks and demonstrate its ability to synthesize specifications of kernel routines invoked by device drivers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Frankle:2016:EDS, author = "Jonathan Frankle and Peter-Michael Osera and David Walker and Steve Zdancewic", title = "Example-directed synthesis: a type-theoretic interpretation", journal = j-SIGPLAN, volume = "51", number = "1", pages = "802--815", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837629", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Input-output examples have emerged as a practical and user-friendly specification mechanism for program synthesis in many environments. While example-driven tools have demonstrated tangible impact that has inspired adoption in industry, their underlying semantics are less well-understood: what are ``examples'' and how do they relate to other kinds of specifications? This paper demonstrates that examples can, in general, be interpreted as refinement types. Seen in this light, program synthesis is the task of finding an inhabitant of such a type. This insight provides an immediate semantic interpretation for examples. Moreover, it enables us to exploit decades of research in type theory as well as its correspondence with intuitionistic logic rather than designing ad hoc theoretical frameworks for synthesis from scratch. We put this observation into practice by formalizing synthesis as proof search in a sequent calculus with intersection and union refinements that we prove to be sound with respect to a conventional type system. In addition, we show how to handle negative examples, which arise from user feedback or counterexample-guided loops. This theory serves as the basis for a prototype implementation that extends our core language to support ML-style algebraic data types and structurally inductive functions. Users can also specify synthesis goals using polymorphic refinements and import monomorphic libraries. The prototype serves as a vehicle for empirically evaluating a number of different strategies for resolving the nondeterminism of the sequent calculus---bottom-up theorem-proving, term enumeration with refinement type checking, and combinations of both---the results of which classify, explain, and validate the design choices of existing synthesis systems. It also provides a platform for measuring the practical value of a specification language that combines ``examples'' with the more general expressiveness of refinements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '16 conference proceedings.", } @Article{Homer:2016:ALG, author = "Michael Homer and Timothy Jones and James Noble", title = "From {APIs} to languages: generalising method names", journal = j-SIGPLAN, volume = "51", number = "2", pages = "1--12", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816708", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Method names with multiple separate parts are a feature of many dynamic languages derived from Smalltalk. Generalising the syntax of method names to allow parts to be repeated, optional, or alternatives, means a single definition can respond to a whole family of method requests. We show how generalising method names can support flexible APIs for domain-specific languages, complex initialisation tasks, and control structures defined in libraries. We describe how we have extended Grace to support generalised method names, and prove that such an extension can be integrated into a gradually-typed language while preserving type soundness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Maidl:2016:FTL, author = "Andr{\'e} Murbach Maidl and Fabio Mascarenhas and Roberto Ierusalimschy", title = "A formalization of typed {\tt lua}", journal = j-SIGPLAN, volume = "51", number = "2", pages = "13--25", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816709", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers often migrate from a dynamically typed to a statically typed language when their simple scripts evolve into complex programs. Optional type systems are one way of having both static and dynamic typing in the same language, while keeping its dynamically typed semantics. This makes evolving a program from dynamic to static typing a matter of describing the implied types that it is using and adding annotations to make those types explicit. Designing an optional type system for an existing dynamically typed language is challenging, as its types should feel natural to programmers that are already familiar with this language. In this work, we give a formal description of Typed Lua, an optional type system for Lua, with a focus on two of its novel type system features: incremental evolution of imperative record and object types that is both lightweight and type-safe, and projection types, a combination of flow typing, functions that return multiple values, and multiple assignment. While our type system is tailored to the features and idioms of Lua, its features can be adapted to other imperative scripting languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Tanter:2016:GCP, author = "{\'E}ric Tanter and Nicolas Tabareau", title = "Gradual certified programming in {\tt coq}", journal = j-SIGPLAN, volume = "51", number = "2", pages = "26--40", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816710", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Expressive static typing disciplines are a powerful way to achieve high-quality software. However, the adoption cost of such techniques should not be under-estimated. Just like gradual typing allows for a smooth transition from dynamically-typed to statically-typed programs, it seems desirable to support a gradual path to certified programming. We explore gradual certified programming in Coq, providing the possibility to postpone the proofs of selected properties, and to check ``at runtime'' whether the properties actually hold. Casts can be integrated with the implicit coercion mechanism of Coq to support implicit cast insertion {\`a} la gradual typing. Additionally, when extracting Coq functions to mainstream languages, our encoding of casts supports lifting assumed properties into runtime checks. Much to our surprise, it is not necessary to extend Coq in any way to support gradual certified programming. A simple mix of type classes and axioms makes it possible to bring gradual certified programming to Coq in a straightforward manner.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Ernst:2016:MSD, author = "Erik Ernst and Anders M{\o}ller and Mathias Schwarz and Fabio Strocco", title = "Message safety in {Dart}", journal = j-SIGPLAN, volume = "51", number = "2", pages = "41--53", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816711", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Unlike traditional static type checking, the type system in the Dart programming language is unsound by design, even for fully annotated programs. The rationale has been that this allows compile-time detection of likely errors and enables code completion in integrated development environments, without being restrictive on programmers. Despite unsoundness, judicious use of type annotations can ensure useful properties of the runtime behavior of Dart programs. We present a formal model of a core of Dart with a focus on its type system, which allows us to elucidate the causes of unsoundness. Our main contribution is a characterization of message-safe programs and a theorem stating that such programs will never encounter 'message not understood' errors at runtime. Message safety is less restrictive than traditional type soundness, and we argue that it forms a natural intermediate point between dynamically typed and statically typed Dart programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Lyde:2016:CFA, author = "Steven Lyde and William E. Byrd and Matthew Might", title = "Control-flow analysis of dynamic languages via pointer analysis", journal = j-SIGPLAN, volume = "51", number = "2", pages = "54--62", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816712", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We demonstrate how to map a control-flow analysis for a higher-order language (dynamic languages are typically higher-order) into a pointer analysis for a first-order language, such as C. This allows us to use existing pointer analysis tools to perform a control-flow analysis, exploiting their technical advancements and the engineering effort that went into developing them. We compare the results of two recent parallel pointer analysis tools with a parallel control-flow analysis tool. While it has been known that a control-flow analysis of higher-order languages and a pointer analysis of first-order languages are very similar, we demonstrate that these two analyses are actually more similar than previously thought. We present the first mapping between a high-order control-flow analysis and a pointer analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Feeley:2016:CML, author = "Marc Feeley", title = "Compiling for multi-language task migration", journal = j-SIGPLAN, volume = "51", number = "2", pages = "63--77", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816713", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Task migration allows a running program to continue its execution in a different destination environment. Increasingly, execution environments are defined by combinations of cultural and technological constraints, affecting the choice of host language, libraries and tools. A compiler supporting multiple target environments and task migration must be able to marshal continuations and then unmarshal and continue their execution, ideally, even if the language of the destination environment is different. In this paper, we propose a compilation approach based on a virtual machine that strikes a balance between implementation portability and efficiency. We explain its implementation within a Scheme compiler targeting JavaScript, PHP, Python, Ruby and Java --- some of the most popular host languages for web applications. As our experiments show, this approach compares well with other Scheme compilers targeting high-level languages in terms of execution speed, being sometimes up to 3 orders of magnitude faster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Grimmer:2016:HPC, author = "Matthias Grimmer and Chris Seaton and Roland Schatz and Thomas W{\"u}rthinger and Hanspeter M{\"o}ssenb{\"o}ck", title = "High-performance cross-language interoperability in a multi-language runtime", journal = j-SIGPLAN, volume = "51", number = "2", pages = "78--90", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816714", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers combine different programming languages because it allows them to use the most suitable language for a given problem, to gradually migrate existing projects from one language to another, or to reuse existing source code. However, existing cross-language mechanisms suffer from complex interfaces, insufficient flexibility, or poor performance. We present the TruffleVM, a multi-language runtime that allows composing different language implementations in a seamless way. It reduces the amount of required boiler-plate code to a minimum by allowing programmers to access foreign functions or objects by using the notation of the host language. We compose language implementations that translate source code to an intermediate representation (IR), which is executed on top of a shared runtime system. Language implementations use language-independent messages that the runtime resolves at their first execution by transforming them to efficient foreign-language-specific operations. The TruffleVM avoids conversion or marshaling of foreign objects at the language boundary and allows the dynamic compiler to perform its optimizations across language boundaries, which guarantees high performance. This paper presents an implementation of our ideas based on the Truffle system and its guest language implementations JavaScript, Ruby, and C.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Leopoldseder:2016:JJT, author = "David Leopoldseder and Lukas Stadler and Christian Wimmer and Hanspeter M{\"o}ssenb{\"o}ck", title = "{Java-to-JavaScript} translation via structured control flow reconstruction of compiler {IR}", journal = j-SIGPLAN, volume = "51", number = "2", pages = "91--103", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816715", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an approach to cross-compile Java bytecodes to Java-Script, building on existing Java optimizing compiler technology. Static analysis determines which Java classes and methods are reachable. These are then translated to JavaScript using a re-configured Java just-in-time compiler with a new back end that generates JavaScript instead of machine code. Standard compiler optimizations such as method inlining and global value numbering, as well as advanced optimizations such as escape analysis, lead to compact and optimized JavaScript code. Compiler IR is unstructured, so structured control flow needs to be reconstructed before code generation is possible. We present details of our control flow reconstruction algorithm. Our system is based on Graal, an open-source optimizing compiler for the Java HotSpot VM and other VMs. The modular and VM-independent architecture of Graal allows us to reuse the intermediate representation, the bytecode parser, and the high-level optimizations. Our custom back end first performs control flow reconstruction and then JavaScript code generation. The generated JavaScript undergoes a set of optimizations to increase readability and performance. Static analysis is performed on the Graal intermediate representation as well. Benchmark results for medium-sized Java benchmarks such as SPECjbb2005 run with acceptable performance on the V8 JavaScript VM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Pape:2016:LIS, author = "Tobias Pape and Tim Felgentreff and Robert Hirschfeld and Anton Gulenko and Carl Friedrich Bolz", title = "Language-independent storage strategies for tracing {JIT}-based virtual machines", journal = j-SIGPLAN, volume = "51", number = "2", pages = "104--113", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816716", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Storage strategies have been proposed as a run-time optimization for the PyPy Python implementation and have shown promising results for optimizing execution speed and memory requirements. However, it remained unclear whether the approach works equally well in other dynamic languages. Furthermore, while PyPy is based on RPython, a language to write VMs with reusable components such as a tracing just-in-time compiler and garbage collection, the strategies design itself was not generalized to be reusable across languages implemented using that same toolchain. In this paper, we present a general design and implementation for storage strategies and show how they can be reused across different RPython-based languages. We evaluate the performance of our implementation for RSqueak, an RPython-based VM for Squeak/Smalltalk and show that storage strategies may indeed offer performance benefits for certain workloads in other dynamic programming languages.We furthermore evaluate the generality of our implementation by applying it to Topaz, a Ruby VM, and Pycket, a Racket implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Aakerblom:2016:MPP, author = "Beatrice {\AA}kerblom and Tobias Wrigstad", title = "Measuring polymorphism in {Python} programs", journal = j-SIGPLAN, volume = "51", number = "2", pages = "114--128", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816717", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Following the increased popularity of dynamic languages and their increased use in critical software, there have been many proposals to retrofit static type system to these languages to improve possibilities to catch bugs and improve performance. A key question for any type system is whether the types should be structural, for more expressiveness, or nominal, to carry more meaning for the programmer. For retrofitted type systems, it seems the current trend is using structural types. This paper attempts to answer the question to what extent this extra expressiveness is needed, and how the possible polymorphism in dynamic code is used in practise. We study polymorphism in 36 real-world open source Python programs and approximate to what extent nominal and structural types could be used to type these programs. The study is based on collecting traces from multiple runs of the programs and analysing the polymorphic degrees of targets at more than 7 million call-sites. Our results show that while polymorphism is used in all programs, the programs are to a great extent monomorphic. The polymorphism found is evenly distributed across libraries and program-specific code and occur both during program start-up and normal execution. Most programs contain a few ``megamorphic'' call-sites where receiver types vary widely. The non-monomorphic parts of the programs can to some extent be typed with nominal or structural types, but none of the approaches can type entire programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Alcocer:2016:TPV, author = "Juan Pablo Sandoval Alcocer and Alexandre Bergel", title = "Tracking down performance variation against source code evolution", journal = j-SIGPLAN, volume = "51", number = "2", pages = "129--139", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816718", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Little is known about how software performance evolves across software revisions. The severity of this situation is high since (i) most performance variations seem to happen accidentally and (ii) addressing a performance regression is challenging, especially when functional code is stacked on it. This paper reports an empirical study on the performance evolution of 19 applications, totaling over 19 MLOC. It took 52 days to run our 49 benchmarks. By relating performance variation with source code revisions, we found out that: (i) 1 out of every 3 application revisions introduces a performance variation, (ii) performance variations may be classified into 9 patterns, (iii) the most prominent cause of performance regression involves loops and collections. We carefully describe the patterns we identified, and detail how we addressed the numerous challenges we faced to complete our experiment.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Kedlaya:2016:SST, author = "Madhukar N. Kedlaya and Behnam Robatmili and Ben Hardekopf", title = "Server-side type profiling for optimizing client-side {JavaScript} engines", journal = j-SIGPLAN, volume = "51", number = "2", pages = "140--153", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816719", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern JavaScript engines optimize hot functions using a JIT compiler along with type information gathered by an online profiler. However, the profiler's information can be unsound and when unexpected types are encountered the engine must recover using an expensive mechanism called deoptimization. In this paper we describe a method to significantly reduce the number of deoptimizations observed by client-side JavaScript engines by using ahead-of-time profiling on the server-side. Unlike previous work on ahead-of-time profiling for statically-typed languages such as Java, our technique must operate on a dynamically-typed language, which significantly changes the required insights and methods to make the technique effective. We implement our proposed technique using the SpiderMonkey JavaScript engine, and we evaluate our implementation using three different kinds of benchmarks: the industry-standard Octane benchmark suite, a set of JavaScript physics engines, and a set of real-world websites from the Membench50 benchmark suite. We show that using ahead-of-time profiling provides significant performance benefits over the baseline vanilla SpiderMonkey engine.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Fischer:2016:EIE, author = "Lars Fischer and Stefan Hanenberg", title = "An empirical investigation of the effects of type systems and code completion on {API} usability using {TypeScript} and {JavaScript} in {MS Visual Studio}", journal = j-SIGPLAN, volume = "51", number = "2", pages = "154--167", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816720", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent empirical studies that compared static and dynamic type systems on API usability showed a positive impact of static type systems on developer productivity in most cases. Nevertheless, it is unclear how large this effect is in comparison to other factors. One obvious factor in programming is tooling: It is commonly accepted that modern IDEs have a large positive impact on developers, although it is not clear which parts of modern IDEs are responsible for that. One possible---and for most developers obvious candidate---is code completion. This paper describes a 2x2 randomized trial that compares JavaScript and Microsoft's statically typed alternative TypeScript with and without code completion in MS Visual Studio. While the experiment shows (in correspondence to previous experiments) a large positive effect of the statically typed language TypeScript, the code completion effect is not only marginal, but also just approaching statistical significance. This seems to be an indicator that the effect of static type systems is larger than often assumed, at least in comparison to code completion.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Teruel:2016:ACR, author = "Camille Teruel and St{\'e}phane Ducasse and Damien Cassou and Marcus Denker", title = "Access control to reflection with object ownership", journal = j-SIGPLAN, volume = "51", number = "2", pages = "168--176", month = feb, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936313.2816721", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reflection is a powerful programming language feature that enables language extensions, generic code, dynamic analyses, development tools, etc. However, uncontrolled reflection breaks object encapsulation and considerably increases the attack surface of programs e.g., malicious libraries can use reflection to attack their client applications. To bring reflection and object encapsulation back together, we use dynamic object ownership to design an access control policy to reflective operations. This policy grants objects full reflective power over the objects they own but limited reflective power over other objects. Code is still able to use advanced reflective operations but reflection cannot be used as an attack vector anymore.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '15 conference proceedings.", } @Article{Narasimhan:2016:NGS, author = "Priya Narasimhan and Utsav Drolia and Jiaqi Tan and Nathan D. Mickulicz and Rajeev Gandhi", title = "The next-generation in-stadium experience (keynote)", journal = j-SIGPLAN, volume = "51", number = "3", pages = "1--10", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814205", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "YinzCam is a cloud-hosted service that provides sports fans with real-time scores, news, photos, statistics, live radio, streaming video, etc., on their mobile devices. YinzCam's infrastructure is currently hosted on Amazon Web Services (AWS) and supports over 30 million installs of the official mobile apps of 140+ NHL/NFL/NBA/NRL/NCAA sports teams and venues. YinzCam's workload is necessarily multi-modal (e.g., pre-game, in-game, post-game, game-day, non-gameday), with normal game-time traffic being twenty-fold of that on non-game days. This paper describes the evolution of YinzCam's production architecture and distributed infrastructure, from its beginnings in 2009, when it was used to support thousands of concurrent users, to today's system that supports millions of concurrent users on any game day. We also discuss key new opportunities to improve the fan experience inside the stadium of the future, without impacting the available bandwidth, by crowd-sourcing the thousands of mobile devices that are in fans' hands inside these venues. We present Krowd, a novel distributed key-value store for promoting efficient content sharing, discovery and retrieval across the mobile devices inside a stadium. We present CHIPS, a system that ensures that users' privacy is maintained while their devices participate in the crowdsourced infrastructure.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Leissa:2016:SED, author = "Roland Lei{\ss}a and Klaas Boesche and Sebastian Hack and Richard Membarth and Philipp Slusallek", title = "Shallow embedding of {DSLs} via online partial evaluation", journal = j-SIGPLAN, volume = "51", number = "3", pages = "11--20", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814208", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper investigates shallow embedding of DSLs by means of online partial evaluation. To this end, we present a novel online partial evaluator for continuation-passing style languages. We argue that it has, in contrast to prior work, a predictable termination policy that works well in practice. We present our approach formally using a continuation-passing variant of PCF and prove its termination properties. We evaluate our technique experimentally in the field of visual and high-performance computing and show that our evaluator produces highly specialized and efficient code for CPUs as well as GPUs that matches the performance of hand-tuned expert code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Scherr:2016:AFC, author = "Maximilian Scherr and Shigeru Chiba", title = "Almost first-class language embedding: taming staged embedded {DSLs}", journal = j-SIGPLAN, volume = "51", number = "3", pages = "21--30", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814217", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded domain-specific languages (EDSLs), inheriting a general-purpose language's features as well as look-and-feel, have traditionally been second-class or rather non-citizens in terms of host-language design. This makes sense when one regards them to be on the same level as traditional, non-EDSL library interfaces. However, this equivalence only applies to the simplest of EDSLs. In this paper we illustrate why this is detrimental when moving on to EDSLs that employ staging, i.e. program reification, by example of various issues that affect authors and users alike. We believe that if EDSLs are to be considered a reliable, language-like interface abstraction, they require exceptional attention and design scrutiny. Instead of unenforceable conventions, we advocate the acceptance of EDSLs as proper, i.e. almost first-class, citizens while retaining most advantages of pure embeddings. As a small step towards this goal, we present a pragmatic framework prototype for Java. It is based on annotations that explicate and document membership to explicit EDSL entities. In a nutshell, our framework identifies (annotated) method calls and field accesses as EDSL terms and dynamically constructs an abstract-syntax representation, which is eventually passed to a semantics-defining back end implemented by the EDSL author.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Reynders:2016:GSB, author = "Bob Reynders and Dominique Devriese and Frank Piessens", title = "Generating safe boundary {APIs} between typed {EDSLs} and their environments", journal = j-SIGPLAN, volume = "51", number = "3", pages = "31--34", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814219", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded domain specific languages (EDSLs) are used to represent special-purpose code in a general-purpose language and they are used for applications like vector calculations and run-time code generation. Often, code in an EDSL is compiled to a target (e.g. GPU languages, JVM bytecode, assembly, JavaScript) and needs to interface with other code that is available at that level but uses other data representations or calling conventions. We present an approach for safely making available such APIs in a typed EDSL, guaranteeing correct conversions between data representations and the respect for calling conventions. When the code being interfaced with is the result of static compilation of host language code, we propose a way to auto-generate the needed boilerplate using meta-programming. We instantiate our technique with JavaScript as the target language, JS-Scala as the EDSL, Scala.js as the static compiler and Scala macros to generate the boilerplate, but our design is more generally applicable. We provide evidence of usefulness of our approach through a prototype implementation that we have applied in a non-trivial code base.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Medeiros:2016:ESC, author = "Fl{\'a}vio Medeiros and Iran Rodrigues and M{\'a}rcio Ribeiro and Leopoldo Teixeira and Rohit Gheyi", title = "An empirical study on configuration-related issues: investigating undeclared and unused identifiers", journal = j-SIGPLAN, volume = "51", number = "3", pages = "35--44", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814206", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The variability of configurable systems may lead to configuration-related issues (i.e., faults and warnings) that appear only when we select certain configuration options. Previous studies found that issues related to configurability are harder to detect than issues that appear in all configurations, because variability increases the complexity. However, little effort has been put into understanding configuration-related faults (e.g., undeclared functions and variables) and warnings (e.g., unused functions and variables). To better understand the peculiarities of configuration-related undeclared/unused variables and functions, in this paper we perform an empirical study of 15 systems to answer research questions related to how developers introduce these issues, the number of configuration options involved, and the time that these issues remain in source files. To make the analysis of several projects feasible, we propose a strategy that minimizes the initial setup problems of variability-aware tools. We detect and confirm 2 undeclared variables, 14 undeclared functions, 16 unused variables, and 7 unused functions related to configurability. We submit 30 patches to fix issues not fixed by developers. Our findings support the effectiveness of sampling (i.e., analysis of only a subset of valid configurations) because most issues involve two or less configuration options. Nevertheless, by analyzing the version history of the projects, we observe that a number of issues remain in the code for several years. Furthermore, the corpus of undeclared/unused variables and functions gathered is a valuable source to study these issues, compare sampling algorithms, and test and improve variability-aware tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{El-Sharkawy:2016:AKS, author = "Sascha El-Sharkawy and Adam Krafczyk and Klaus Schmid", title = "Analysing the {Kconfig} semantics and its analysis tools", journal = j-SIGPLAN, volume = "51", number = "3", pages = "45--54", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814222", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Linux kernel is often used as a real world case study to demonstrate novel Software Product Line Engineering research methods. An important point in this is often the analysis of the Kconfig semantics. However, we detected that the semantics of Kconfig is rather unclear and has many special cases, which are not documented in its short specification. We performed a systematic analysis to uncover the correct behaviour of Kconfig and present the results, which are necessary for applying semantically correct analyses. Further, we analyse existing analysis tools of the research community whether they are aware of the correct semantics of Kconfig. These analyses can be used for improving existing analysis tools as well as decision support for selecting an appropriate tool for a specific analysis. In summary we contribute to a better understanding of Kconfig in the research community to improve the validity of evaluations based on Linux.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Alshara:2016:MLO, author = "Zakarea Alshara and Abdelhak-Djamel Seriai and Chouki Tibermacine and Hinde Lilia Bouziane and Christophe Dony and Anas Shatnawi", title = "Migrating large object-oriented applications into component-based ones: instantiation and inheritance transformation", journal = j-SIGPLAN, volume = "51", number = "3", pages = "55--64", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814223", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large object-oriented applications have complex and numerous dependencies, and usually do not have explicit software architectures. Therefore they are hard to maintain, and parts of them are difficult to reuse. Component-based development paradigm emerged for improving these aspects and for supporting effective maintainability and reuse. It provides better understandability through a high-level architecture view of the application. Thereby migrating object-oriented applications to component-based ones will contribute to improve these characteristics (maintainability and reuse). In this paper, we propose an approach to automatically transform object-oriented applications to component-based ones. More particularly, the input of the approach is the result provided by software architecture recovery: a component-based architecture description. Then, our approach transforms the object-oriented source code in order to produce deployable components. We focus in this paper on the transformation of source code related to instantiation and inheritance dependencies between classes that are in different components. We experimented the proposed solution in the transformation of a collection of Java applications into the OSGi framework. The experimental results are discussed in this paper.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Lopez:2016:SSP, author = "Michael Lopez and C. Jasson Casey and Gabriel {Dos Reis} and Colton Chojnacki", title = "Safer {SDN} programming through {Arbiter}", journal = j-SIGPLAN, volume = "51", number = "3", pages = "65--74", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814218", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software Defined Networking (SDN) programs are written with respect to assumptions on software and hardware facilities and protocol definitions. Silent mismatches between the expected feature set and implemented feature set of SDN artifacts can easily lead to hard to debug network configurations, decreased network performance, outages, or worse, security vulnerabilities. We show how the paradigm of axiomatic programming, supported by practical dependent types, provides effective support for SDN executable specifications and verification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Kolesnichenko:2016:CBG, author = "Alexey Kolesnichenko and Christopher M. Poskitt and Sebastian Nanz and Bertrand Meyer", title = "Contract-based general-purpose {GPU} programming", journal = j-SIGPLAN, volume = "51", number = "3", pages = "75--84", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814216", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Using GPUs as general-purpose processors has revolutionized parallel computing by offering, for a large and growing set of algorithms, massive data-parallelization on desktop machines. An obstacle to widespread adoption, however, is the difficulty of programming them and the low-level control of the hardware required to achieve good performance. This paper suggests a programming library, SafeGPU, that aims at striking a balance between programmer productivity and performance, by making GPU data-parallel operations accessible from within a classical object-oriented programming language. The solution is integrated with the design-by-contract approach, which increases confidence in functional program correctness by embedding executable program specifications into the program text. We show that our library leads to modular and maintainable code that is accessible to GPGPU non-experts, while providing performance that is comparable with hand-written CUDA code. Furthermore, runtime contract checking turns out to be feasible, as the contracts can be executed on the GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Yamaguchi:2016:IMS, author = "Hiroshi Yamaguchi and Shigeru Chiba", title = "Inverse macro in {Scala}", journal = j-SIGPLAN, volume = "51", number = "3", pages = "85--94", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814213", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new variant of typed syntactic macro systems named inverse macro, which improves the expressiveness of macro systems. The inverse macro system enables to implement operators with complex side-effects, such as lazy operators and delimited continuation operators, which are beyond the power of existing macro systems. We have implemented the inverse macro system as an extension to Scala 2.11. We also show the expressiveness of the inverse macro system by comparing two versions of shift/reset, bundled in Scala 2.11 and implemented with the inverse macro system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Adam:2016:TTS, author = "Sorin Adam and Ulrik Pagh Schultz", title = "Towards tool support for spreadsheet-based domain-specific languages", journal = j-SIGPLAN, volume = "51", number = "3", pages = "95--98", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814215", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Spreadsheets are commonly used by non-programmers to store data in a structured form, this data can in some cases be considered to be a program in a domain-specific language (DSL). Unlike ordinary text-based domain-specific languages, there is however currently no formalism for expressing the syntax of such spreadsheet-based DSLs (SDSLs), and there is no tool support for automatically generating language infrastructure such as parsers and IDE support. In this paper we define a simple notion of two-dimensional grammars for SDSLs, and show how such grammars can be used for automatically generating parsers that extract structured data from a spreadsheet in the form of an AST. We demonstrate automatic generation of parsers for a number of examples, including the questionnaire DSL from LWC2014 and a DSL for writing safety specifications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Byalik:2016:NNA, author = "Antuan Byalik and Sanchit Chadha and Eli Tilevich", title = "Native-$2$-native: automated cross-platform code synthesis from web-based programming resources", journal = j-SIGPLAN, volume = "51", number = "3", pages = "99--108", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814210", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For maximal market penetration, popular mobile applications are typically supported on all major platforms, including Android and iOS. Despite the vast differences in the look-and-feel of major mobile platforms, applications running on these platforms in essence provide the same core functionality. As an application is maintained and evolved, the resulting changes must be replicated on all the supported platforms, a tedious and error-prone programming process. Existing automated source-to-source translation tools prove inadequate due to the structural and idiomatic differences in how functionalities are expressed across major platforms. In this paper, we present a new approach---Native-2-Native---that automatically synthesizes code for a mobile application to make use of native resources on one platform, based on the equivalent program transformations performed on another platform. First, the programmer modifies a mobile application's Android version to make use of some native resource, with a plugin capturing code changes. Based on the changes, the system then parameterizes a web search query over popular programming resources (e.g., Google Code, StackOverflow, etc.), to discover equivalent iOS code blocks with the closest similarity to the programmer-written Android code. The discovered iOS code block is then presented to the programmer as an automatically synthesized Swift source file to further fine-tune and subsequently integrate in the mobile application's iOS version. Our evaluation, enhancing mobile applications to make use of common native resources, shows that the presented approach can correctly synthesize more than 86\% of Swift code for the subject applications' iOS versions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Makarov:2016:CMS, author = "Dmitri Makarov and Matthias Hauswirth", title = "{CLOP}: a multi-stage compiler to seamlessly embed heterogeneous code", journal = j-SIGPLAN, volume = "51", number = "3", pages = "109--112", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814211", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous programming complicates software development. We present CLOP, a platform that embeds code targeting heterogeneous compute devices in a convenient and clean way, allowing unobstructed data flow between the host code and the devices, reducing the amount of source code by an order of magnitude. The CLOP compiler uses the standard facilities of the D programming language to generate code strictly at compile-time. In this paper we describe the CLOP language and the CLOP compiler implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Ringert:2016:CCG, author = "Jan Oliver Ringert and Bernhard Rumpe and Andreas Wortmann", title = "Composing code generators for {C\&C} {ADLs} with Application-specific behavior languages (tool demonstration)", journal = j-SIGPLAN, volume = "51", number = "3", pages = "113--116", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814224", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modeling software systems as component {\&} connector architectures with application-specific behavior modeling languages enables domain experts to describe each component behavior with the most appropriate language. Generating executable systems for such language aggregates requires composing appropriate code generators for the participating languages. Previous work on code generator composition either focuses on white-box integration based on code generator internals or requires extensive handcrafting of integration code. We demonstrate an approach to black-box generator composition for architecture description languages that relies on explicit interfaces and exploits the encapsulation of components. This approach is implemented for the architecture modeling framework MontiArcAutomaton and has been evaluated in various contexts. Ultimately, black-box code generator composition facilitates development of code generators for architecture description languages with embedded behavior languages and increases code generator reuse.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Kabac:2016:OMS, author = "Milan Kab{\'a}c and Charles Consel", title = "Orchestrating masses of sensors: a design-driven development approach", journal = j-SIGPLAN, volume = "51", number = "3", pages = "117--120", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814226", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes a design-driven development approach that is dedicated to the domain of orchestration of masses of sensors. The developer declares what an application does using a domain-specific language (DSL). Our compiler processes domain-specific declarations to generate a customized programming framework that guides and supports the programming phase.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Foust:2016:GRP, author = "Gabriel Foust and Jaakko J{\"a}rvi and Sean Parent", title = "Generating reactive programs for graphical user interfaces from multi-way dataflow constraint systems", journal = j-SIGPLAN, volume = "51", number = "3", pages = "121--130", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814207", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For a GUI to remain responsive, it must be able to schedule lengthy tasks to be executed asynchronously. In the traditional approach to GUI implementation--writing functions to handle individual user events--asynchronous programming easily leads to defects. Ensuring that all data dependencies are respected is difficult when new events arrive while prior events are still being handled. Reactive programming techniques, gaining popularity in GUI programming, help since they make data dependencies explicit and enforce them automatically as variables' values change. However, data dependencies in GUIs usually change along with its state. Reactive programming must therefore describe a GUI as a collection of many reactive programs, whose interaction the programmer must explicitly coordinate. This paper presents a declarative approach for GUI programming that relieves the programmer from coordinating asynchronous computations. The approach is based on our prior work on ``property models'', where GUI state is maintained by a dataflow constraint system. A property model responds to user events by atomically constructing new data dependencies and scheduling asynchronous computations to enforce those dependencies. In essence, a property model dynamically generates a reactive program, adding to it as new events occur. The approach gives the following guarantee: the same sequence of events produces the same results, regardless of the timing of those events.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Florence:2016:PPP, author = "Spencer P. Florence and Bruke Fetscher and Matthew Flatt and William H. Temps and Tina Kiguradze and Dennis P. West and Charlotte Niznik and Paul R. Yarnold and Robert Bruce Findler and Steven M. Belknap", title = "{POP-PL}: a patient-oriented prescription programming language", journal = j-SIGPLAN, volume = "51", number = "3", pages = "131--140", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814221", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Medical professionals have long used algorithmic thinking to describe and implement health care processes without the benefit of the conceptual framework provided by a programming language. Instead, medical algorithms are expressed using English, flowcharts, or data tables. This results in prescriptions that are difficult to understand, hard to debug, and awkward to reuse. This paper reports on the design and evaluation of a domain-specific programming language, POP-PL for expressing medical algorithms. The design draws on the experience of researchers in two disciplines, programming languages and medicine. The language is based around the idea that programs and humans have complementary strengths, that when combined can make for safer, more accurate performance of prescriptions. We implemented a prototype of our language and evaluated its design by writing prescriptions in the new language and administering a usability survey to medical professionals. This formative evaluation suggests that medical prescriptions can be conveyed by a programming language's mode of expression and provides useful information for refining the language. Analysis of the survey results suggests that medical professionals can understand and correctly modify programs in POP-PL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Selgrad:2016:LGV, author = "Kai Selgrad and Alexander Lier and Franz K{\"o}ferl and Marc Stamminger and Daniel Lohmann", title = "Lightweight, generative variant exploration for high-performance graphics applications", journal = j-SIGPLAN, volume = "51", number = "3", pages = "141--150", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814220", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Rendering performance is an everlasting goal of computer graphics and significant driver for advances in both, hardware architecture and algorithms. Thereby, it has become possible to apply advanced computer graphics technology even in low-cost embedded appliances, such as car instruments. Yet, to come up with an efficient implementation, developers have to put enormous efforts into hardware/problem-specific tailoring, fine-tuning, and domain exploration, which requires profound expert knowledge. If a good solution has been found, there is a high probability that it does not work as well with other architectures or even the next hardware generation. Generative DSL-based approaches could mitigate these efforts and provide for an efficient exploration of algorithmic variants and hardware-specific tuning ideas. However, in vertically organized industries, such as automotive, suppliers are reluctant to introduce these techniques as they fear loss of control, high introduction costs, and additional constraints imposed by the OEM with respect to software and tool-chain certification. Moreover, suppliers do not want to share their generic solutions with the OEM, but only concrete instances. To this end, we propose a light-weight and incremental approach for meta programming of graphics applications. Our approach relies on an existing formulation of C-like languages that is amenable to meta programming, which we extend to become a lightweight language to combine algorithmic features. Our method provides a concise notation for meta programs and generates easily sharable output in the appropriate C-style target language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Seidl:2016:GSP, author = "Christoph Seidl and Sven Schuster and Ina Schaefer", title = "Generative software product line development using variability-aware design patterns", journal = j-SIGPLAN, volume = "51", number = "3", pages = "151--160", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814212", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software Product Lines (SPLs) are an approach to reuse in-the-large that models a set of closely related software systems in terms of commonalities and variabilities. Design patterns are best practices for addressing recurring design problems in object-oriented source code. In the practice of implementing an SPL, instances of certain design patterns are employed to handle variability, which makes these ``variability-aware design patterns'' a best practice for SPL design. However, there currently is no dedicated method for proactively developing SPL using design patterns suitable for realizing variable functionality. In this paper, we present a method to perform generative SPL development with design patterns. We use role models to capture design patterns and their relation to a variability model. We further allow mapping of individual design pattern roles to elements of realization artifacts to be generated (e.g., classes, methods) and check the conformance of the realization with the specification of the pattern. With this method, we support proactive development of SPL using design patterns to apply best practices for the realization of variability. We present an implementation of our approach within the Eclipse IDE and demonstrate it within a case study.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Font:2016:AMR, author = "Jaime Font and Lorena Arcega and {\O}ystein Haugen and Carlos Cetina", title = "Addressing metamodel revisions in model-based software product lines", journal = j-SIGPLAN, volume = "51", number = "3", pages = "161--170", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814214", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Metamodels evolve over time, which can break the conformance between the models and the metamodel. Model migration strategies aim to co-evolve models and metamodels together, but their application is not fully automatizable and is thus cumbersome and error prone. We introduce the Variable MetaModel (VMM) strategy to address the evolution of the reusable model assets of a model-based Software Product Line. The VMM strategy applies variability modeling ideas to express the evolution of the metamodel in terms of commonalities and variabilities. When the metamodel evolves, the models continue to conform to the VMM, avoiding the need for migration. We have applied both the traditional migration strategy and the VMM strategy to a retrospective case study that includes 13 years of evolution of our industrial partner, an induction hobs manufacturer. The comparison between the two strategies shows better results for the VMM strategy in terms of model indirection, automation, and trust leak.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Inostroza:2016:MIM, author = "Pablo Inostroza and Tijs van der Storm", title = "Modular interpreters for the masses: implicit context propagation using object algebras", journal = j-SIGPLAN, volume = "51", number = "3", pages = "171--180", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814209", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modular interpreters have the potential to achieve component-based language development: instead of writing language interpreters from scratch, they can be assembled from reusable, semantic building blocks. Unfortunately, traditional language interpreters are hard to extend because different language constructs may require different interpreter signatures. For instance, arithmetic interpreters produce a value without any context information, whereas binding constructs require an additional environment. In this paper, we present a practical solution to this problem based on implicit context propagation. By structuring denotational-style interpreters as Object Algebras, base interpreters can be retroactively lifted into new interpreters that have an extended signature. The additional parameters are implicitly propagated behind the scenes, through the evaluation of the base interpreter. Interpreter lifting enables a flexible style of component-based language development. The technique works in mainstream object-oriented languages, does not sacrifice type safety or separate compilation, and can be easily automated. We illustrate implicit context propagation using a modular definition of Featherweight Java and its extension to support side-effects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Noguera:2016:MQQ, author = "Carlos Noguera and Viviane Jonckers", title = "Model querying with query models", journal = j-SIGPLAN, volume = "51", number = "3", pages = "181--184", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814225", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Model querying is an integral part of Model-Driven Engineering. Developers query models when specifying model transformations, when defining model constraints, or simply when they need to extract some information from the model. Model queries are often specified in a general-purpose programming language, with developers just navigating models through their programming interfaces. OCL is the best known model query language, and while successful, it is difficult to express complex structural properties featured in target model elements. In this paper we describe a model query facility that aims at easing the description of structural features in a query. In our approach, developers model their queries by reusing fragments of the target model to specify the invariant parts of the template, augmented with variables and special relations to specify what can vary. The query itself conforms to a meta-model that extends the meta-model under-query. By reusing the queried meta- model developers can reduce the mental overhead that comes from using a different language to specify the queries. We have developed a proof of concept tool for the Eclipse Modeling Framework (EMF) that (1) generates a query meta-model from a target meta- model, (2) allows the construction of queries using a graphical, graph-based editor and (3) executes the queries by translating them to a set of logic predicates that are then evaluated using an extension of the logic-based query language Ekeko.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '15 conference proceedings.", } @Article{Zhou:2016:PUH, author = "Yuanyuan Zhou", title = "Programming Uncertain {$<$T$>$ hings}", journal = j-SIGPLAN, volume = "51", number = "4", pages = "1--2", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872416", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Innovation flourishes with good abstractions. For instance, codification of the IEEE Floating Point standard in 1985 was critical to the subsequent success of scientific computing. Programming languages currently lack appropriate abstractions for uncertain data. Applications already use estimates from sensors, machine learning, big data, humans, and approximate algorithms, but most programming languages do not help developers address correctness, programmability, and optimization problems due to estimates. To address these problems, we propose a new programming abstraction called Uncertain We encourage the community to develop and use abstractions for estimates.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Abadal:2016:WAF, author = "Sergi Abadal and Albert Cabellos-Aparicio and Eduard Alarcon and Josep Torrellas", title = "{WiSync}: an Architecture for Fast Synchronization through On-Chip Wireless Communication", journal = j-SIGPLAN, volume = "51", number = "4", pages = "3--17", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872396", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In shared-memory multiprocessing, fine-grain synchronization is challenging because it requires frequent communication. As technology scaling delivers larger manycore chips, such pattern is expected to remain costly to support. In this paper, we propose to address this challenge by using on-chip wireless communication. Each core has a transceiver and an antenna to communicate with all the other cores. This environment supports very low latency global communication. Our architecture, called WiSync, uses a per-core Broadcast Memory (BM). When a core writes to its BM, all the other 100+ BMs get updated in less than 10 processor cycles. We also use a second wireless channel with cheaper transfers to execute barriers efficiently. WiSync supports multiprogramming, virtual memory, and context switching. Our evaluation with simulations of 128-threaded kernels and 64-threaded applications shows that WiSync speeds-up synchronization substantially. Compared to using advanced conventional synchronization, WiSync attains an average speedup of nearly one order of magnitude for the kernels, and 1.12 for PARSEC and SPLASH-2.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Wang:2016:RTE, author = "Xiaodong Wang and Jos{\'e} F. Mart{\'\i}nez", title = "{ReBudget}: Trading Off Efficiency vs. Fairness in Market-Based Multicore Resource Allocation via Runtime Budget Reassignment", journal = j-SIGPLAN, volume = "51", number = "4", pages = "19--32", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872382", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficiently allocating shared resources in computer systems is critical to optimizing execution. Recently, a number of market-based solutions have been proposed to attack this problem. Some of them provide provable theoretical bounds to efficiency and/or fairness losses under market equilibrium. However, they are limited to markets with potentially important constraints, such as enforcing equal budget for all players, or curve-fitting players' utility into a specific function type. Moreover, they do not generally provide an intuitive ``knob'' to control efficiency vs. fairness. In this paper, we introduce two new metrics, Market Utility Range (MUR) and Market Budget Range (MBR), through which we provide for the first time theoretical bounds on efficiency and fairness of market equilibria under arbitrary budget assignments. We leverage this result and propose ReBudget, an iterative budget re-assignment algorithm that can be used to control efficiency vs. fairness at run-time. We apply our algorithm to a multi-resource allocation problem in multicore chips. Our evaluation using detailed execution-driven simulations shows that our budget re-assignment technique is intuitive, effective, and efficient.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Zhu:2016:DEQ, author = "Haishan Zhu and Mattan Erez", title = "Dirigent: Enforcing {QoS} for Latency-Critical Tasks on Shared Multicore Systems", journal = j-SIGPLAN, volume = "51", number = "4", pages = "33--47", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872394", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Latency-critical applications suffer from both average performance degradation and reduced completion time predictability when collocated with batch tasks. Such variation forces the system to overprovision resources to ensure Quality of Service (QoS) for latency-critical tasks, degrading overall system throughput. We explore the causes of this variation and exploit the opportunities of mitigating variation directly to simultaneously improve both QoS and utilization. We develop, implement, and evaluate Dirigent, a lightweight performance-management runtime system that accurately controls the QoS of latency-critical applications at fine time scales, leveraging existing architecture mechanisms. We evaluate Dirigent on a real machine and show that it is significantly more effective than configurations representative of prior schemes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Kuperman:2016:PR, author = "Yossi Kuperman and Eyal Moscovici and Joel Nider and Razya Ladelsky and Abel Gordon and Dan Tsafrir", title = "Paravirtual Remote {I/O}", journal = j-SIGPLAN, volume = "51", number = "4", pages = "49--65", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872378", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The traditional ``trap and emulate'' I/O paravirtualization model conveniently allows for I/O interposition, yet it inherently incurs costly guest-host context switches. The newer ``sidecore'' model eliminates this overhead by dedicating host (side)cores to poll the relevant guest memory regions and react accordingly without context switching. But the dedication of sidecores on each host might be wasteful when I/O activity is low, or it might not provide enough computational power when I/O activity is high. We propose to alleviate this problem at rack scale by consolidating the dedicated sidecores spread across several hosts onto one server. The hypervisor is then effectively split into two parts: the local hypervisor that hosts the VMs, and the remote hypervisor that processes their paravirtual I/O. We call this model vRIO---paraVirtual Remote I/O. We find that by increasing the latency somewhat, it provides comparable throughput with fewer sidecores and superior throughput with the same number of sidecores as compared to the state of the art. vRIO additionally constitutes a new, cost-effective way to consolidate I/O devices (on the remote hypervisor) while supporting efficient programmable I/O interposition.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Kaufmann:2016:HPP, author = "Antoine Kaufmann and Simon Peter and Naveen Kr. Sharma and Thomas Anderson and Arvind Krishnamurthy", title = "High Performance Packet Processing with {FlexNIC}", journal = j-SIGPLAN, volume = "51", number = "4", pages = "67--81", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The recent surge of network I/O performance has put enormous pressure on memory and software I/O processing sub systems. We argue that the primary reason for high memory and processing overheads is the inefficient use of these resources by current commodity network interface cards (NICs). We propose FlexNIC, a flexible network DMA interface that can be used by operating systems and applications alike to reduce packet processing overheads. FlexNIC allows services to install packet processing rules into the NIC, which then executes simple operations on packets while exchanging them with host memory. Thus, our proposal moves some of the packet processing traditionally done in software to the NIC, where it can be done flexibly and at high speed. We quantify the potential benefits of FlexNIC by emulating the proposed FlexNIC functionality with existing hardware or in software. We show that significant gains in application performance are possible, in terms of both latency and throughput, for several widely used applications, including a key-value store, a stream processing system, and an intrusion detection system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Bornholt:2016:SCF, author = "James Bornholt and Antoine Kaufmann and Jialin Li and Arvind Krishnamurthy and Emina Torlak and Xi Wang", title = "Specifying and Checking File System Crash-Consistency Models", journal = j-SIGPLAN, volume = "51", number = "4", pages = "83--98", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872406", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Applications depend on persistent storage to recover state after system crashes. But the POSIX file system interfaces do not define the possible outcomes of a crash. As a result, it is difficult for application writers to correctly understand the ordering of and dependencies between file system operations, which can lead to corrupt application state and, in the worst case, catastrophic data loss. This paper presents crash-consistency models, analogous to memory consistency models, which describe the behavior of a file system across crashes. Crash-consistency models include both litmus tests, which demonstrate allowed and forbidden behaviors, and axiomatic and operational specifications. We present a formal framework for developing crash-consistency models, and a toolkit, called Ferrite, for validating those models against real file system implementations. We develop a crash-consistency model for ext4, and use Ferrite to demonstrate unintuitive crash behaviors of the ext4 implementation. To demonstrate the utility of crash-consistency models to application writers, we use our models to prototype proof-of-concept verification and synthesis tools, as well as new library interfaces for crash-safe applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Prasad:2016:PMR, author = "Aravinda Prasad and K. Gopinath", title = "Prudent Memory Reclamation in Procrastination-Based Synchronization", journal = j-SIGPLAN, volume = "51", number = "4", pages = "99--112", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872405", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Procrastination is the fundamental technique used in synchronization mechanisms such as Read-Copy-Update (RCU) where writers, in order to synchronize with readers, defer the freeing of an object until there are no readers referring to the object. The synchronization mechanism determines when the deferred object is safe to reclaim and when it is actually reclaimed. Hence, such memory reclamations are completely oblivious of the memory allocator state. This induces poor memory allocator performance, for instance, when the reclamations are ill-timed. Furthermore, deferred objects provide hints about the future that inform memory regions that are about to be freed. Although useful, hints are not exploited as deferred objects are not visible to memory allocators. We introduce Prudence, a dynamic memory allocator, that is tightly integrated with the synchronization mechanism to ensure visibility of deferred objects to the memory allocator. Such an integration enables Prudence to (i) identify the safe time to reclaim deferred objects' memory, (ii) have an inclusive view of the allocated, free and about-to-be-freed objects, and (iii) exploit optimizations based on the hints about the future during important state transitions. Our evaluation in the Linux kernel shows that Prudence integrated with RCU performs 3.9X to 28X better in micro-benchmarks compared to SLUB, a recent memory allocator in the Linux kernel. It also improves the overall performance perceptibly (4\%-18\%) for a mix of widely used synthetic and application benchmarks. Further, it performs better (up to 98\%) in terms of object hits in caches, object cache churns, slab churns, peak memory usage and total fragmentation, when compared with the SLUB allocator.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Mukkara:2016:WID, author = "Anurag Mukkara and Nathan Beckmann and Daniel Sanchez", title = "{Whirlpool}: Improving Dynamic Cache Management with Static Data Classification", journal = j-SIGPLAN, volume = "51", number = "4", pages = "113--127", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872363", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cache hierarchies are increasingly non-uniform and difficult to manage. Several techniques, such as scratchpads or reuse hints, use static information about how programs access data to manage the memory hierarchy. Static techniques are effective on regular programs, but because they set fixed policies, they are vulnerable to changes in program behavior or available cache space. Instead, most systems rely on dynamic caching policies that adapt to observed program behavior. Unfortunately, dynamic policies spend significant resources trying to learn how programs use memory, and yet they often perform worse than a static policy. We present Whirlpool, a novel approach that combines static information with dynamic policies to reap the benefits of each. Whirlpool statically classifies data into pools based on how the program uses memory. Whirlpool then uses dynamic policies to tune the cache to each pool. Hence, rather than setting policies statically, Whirlpool uses static analysis to guide dynamic policies. We present both an API that lets programmers specify pools manually and a profiling tool that discovers pools automatically in unmodified binaries. We evaluate Whirlpool on a state-of-the-art NUCA cache. Whirlpool significantly outperforms prior approaches: on sequential programs, Whirlpool improves performance by up to 38\% and reduces data movement energy by up to 53\%; on parallel programs, Whirlpool improves performance by up to 67\% and reduces data movement energy by up to 2.6x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Jeon:2016:TTD, author = "Myeongjae Jeon and Yuxiong He and Hwanju Kim and Sameh Elnikety and Scott Rixner and Alan L. Cox", title = "{TPC}: Target-Driven Parallelism Combining Prediction and Correction to Reduce Tail Latency in Interactive Services", journal = j-SIGPLAN, volume = "51", number = "4", pages = "129--141", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In interactive services such as web search, recommendations, games and finance, reducing the tail latency is crucial to provide fast response to every user. Using web search as a driving example, we systematically characterize interactive workload to identify the opportunities and challenges for reducing tail latency. We find that the workload consists of mainly short requests that do not benefit from parallelism, and a few long requests which significantly impact the tail but exhibit high parallelism speedup. This motivates estimating request execution time, using a predictor, to identify long requests and to parallelize them. Prediction, however, is not perfect; a long request mispredicted as short is likely to contribute to the server tail latency, setting a ceiling on the achievable tail latency. We propose TPC, an approach that combines prediction information judiciously with dynamic correction for inaccurate prediction. Dynamic correction increases parallelism to accelerate a long request that is mispredicted as short. TPC carefully selects the appropriate target latencies based on system load and parallelism efficiency to reduce tail latency. We implement TPC and several prior approaches to compare them experimentally on a single search server and on a cluster of 40 search servers. The experimental results show that TPC reduces the 99th- and 99.9th-percentile latency by up to 40\% compared with the best prior work. Moreover, we evaluate TPC on a finance server, demonstrating its effectiveness on reducing tail latency of interactive services beyond web search.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Brown:2016:HBS, author = "Fraser Brown and Andres N{\"o}tzli and Dawson Engler", title = "How to Build Static Checking Systems Using Orders of Magnitude Less Code", journal = j-SIGPLAN, volume = "51", number = "4", pages = "143--157", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872364", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern static bug finding tools are complex. They typically consist of hundreds of thousands of lines of code, and most of them are wedded to one language (or even one compiler). This complexity makes the systems hard to understand, hard to debug, and hard to retarget to new languages, thereby dramatically limiting their scope. This paper reduces checking system complexity by addressing a fundamental assumption, the assumption that checkers must depend on a full-blown language specification and compiler front end. Instead, our program checkers are based on drastically incomplete language grammars (``micro-grammars'') that describe only portions of a language relevant to a checker. As a result, our implementation is tiny-roughly 2500 lines of code, about two orders of magnitude smaller than a typical system. We hope that this dramatic increase in simplicity will allow people to use more checkers on more systems in more languages. We implement our approach in $ \mu $ chex, a language-agnostic framework for writing static bug checkers. We use it to build micro-grammar based checkers for six languages (C, the C preprocessor, C++, Java, JavaScript, and Dart) and find over 700 errors in real-world projects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Zhang:2016:TED, author = "Tong Zhang and Dongyoon Lee and Changhee Jung", title = "{TxRace}: Efficient Data Race Detection Using Commodity Hardware Transactional Memory", journal = j-SIGPLAN, volume = "51", number = "4", pages = "159--173", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872384", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Detecting data races is important for debugging shared-memory multithreaded programs, but the high runtime overhead prevents the wide use of dynamic data race detectors. This paper presents TxRace, a new software data race detector that leverages commodity hardware transactional memory (HTM) to speed up data race detection. TxRace instruments a multithreaded program to transform synchronization-free regions into transactions, and exploits the conflict detection mechanism of HTM for lightweight data race detection at runtime. However, the limitations of the current best-effort commodity HTMs expose several challenges in using them for data race detection: (1) lack of ability to pinpoint racy instructions, (2) false positives caused by cache line granularity of conflict detection, and (3) transactional aborts for non-conflict reasons (e.g., capacity or unknown). To overcome these challenges, TxRace performs lightweight HTM-based data race detection at first, and occasionally switches to slow yet precise data race detection only for the small fraction of execution intervals in which potential races are reported by HTM. According to the experimental results, TxRace reduces the average runtime overhead of dynamic data race detection from 11.68x to 4.65x with only a small number of false negatives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Amani:2016:CVH, author = "Sidney Amani and Alex Hixon and Zilin Chen and Christine Rizkallah and Peter Chubb and Liam O'Connor and Joel Beeren and Yutaka Nagashima and Japheth Lim and Thomas Sewell and Joseph Tuong and Gabriele Keller and Toby Murray and Gerwin Klein and Gernot Heiser", title = "{CoGENT}: Verifying High-Assurance File System Implementations", journal = j-SIGPLAN, volume = "51", number = "4", pages = "175--188", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872404", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an approach to writing and formally verifying high-assurance file-system code in a restricted language called COGENT, supported by a certifying compiler that produces C code, high-level specification of COGENT, and translation correctness proofs. The language is strongly typed and guarantees absence of a number of common file system implementation errors. We show how verification effort is drastically reduced for proving higher-level properties of the file system implementation by reasoning about the generated formal specification rather than its low-level C code. We use the framework to write two Linux file systems, and compare their performance with their native C implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Asmussen:2016:MHO, author = "Nils Asmussen and Marcus V{\"o}lp and Benedikt N{\"o}then and Hermann H{\"a}rtig and Gerhard Fettweis", title = "{M3}: a Hardware\slash Operating-System Co-Design to Tame Heterogeneous Manycores", journal = j-SIGPLAN, volume = "51", number = "4", pages = "189--203", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872371", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the last decade, the number of available cores increased and heterogeneity grew. In this work, we ask the question whether the design of the current operating systems (OSes) is still appropriate if these trends continue and lead to abundantly available but heterogeneous cores, or whether it forces a fundamental rethinking of how systems are designed. We argue that: 1. hiding heterogeneity behind a common hardware interface unifies, to a large extent, the control and coordination of cores and accelerators in the OS, 2. isolating at the network-on-chip rather than with processor features (like privileged mode, memory management unit, ...), allows running untrusted code on arbitrary cores, and 3. providing OS services via protocols over the network-on-chip, instead of via system calls, makes them accessible to arbitrary types of cores as well. In summary, this turns accelerators into first-class citizens and enables a single and convenient programming environment for all cores without the need to trust any application. In this paper, we introduce network-on-chip-level isolation, present the design of our microkernel-based OS, M3, and the common hardware interface, and evaluate the performance of our prototype in comparison to Linux. A bit surprising, without using accelerators, M3 outperforms Linux in some application-level benchmarks by more than a factor of five.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Liaqat:2016:SEE, author = "Daniyal Liaqat and Silviu Jingoi and Eyal de Lara and Ashvin Goel and Wilson To and Kevin Lee and Italo {De Moraes Garcia} and Manuel Saldana", title = "{Sidewinder}: an Energy Efficient and Developer Friendly Heterogeneous Architecture for Continuous Mobile Sensing", journal = j-SIGPLAN, volume = "51", number = "4", pages = "205--215", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872398", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Applications that perform continuous sensing on mobile phones have the potential to revolutionize everyday life. Examples range from medical and health monitoring applications, such as pedometers and fall detectors, to participatory sensing applications, such as noise pollution, traffic and seismic activity monitoring. Unfortunately, current mobile devices are a poor match for continuous sensing applications as they require the device to remain awake for extended periods of time, resulting in poor battery life. This paper presents Sidewinder, a new approach towards offloading sensor data processing to a low-power processor and waking up the main processor when events of interest occur. This approach differs from other heterogeneous architectures in that developers are presented with a programming interface that lets them construct application specific wake-up conditions by linking together and parameterizing predefined sensor data processing algorithms. Our experiments indicate performance that is comparable to approaches that provide fully programmable offloading, but do so with a much simpler programming interface that facilitates deployment and portability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Balkind:2016:OOS, author = "Jonathan Balkind and Michael McKeown and Yaosheng Fu and Tri Nguyen and Yanqi Zhou and Alexey Lavrov and Mohammad Shahrad and Adi Fuchs and Samuel Payne and Xiaohua Liang and Matthew Matl and David Wentzlaff", title = "{OpenPiton}: an Open Source Manycore Research Framework", journal = j-SIGPLAN, volume = "51", number = "4", pages = "217--232", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872414", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Industry is building larger, more complex, manycore processors on the back of strong institutional knowledge, but academic projects face difficulties in replicating that scale. To alleviate these difficulties and to develop and share knowledge, the community needs open architecture frameworks for simulation, synthesis, and software exploration which support extensibility, scalability, and configurability, alongside an established base of verification tools and supported software. In this paper we present OpenPiton, an open source framework for building scalable architecture research prototypes from 1 core to 500 million cores. OpenPiton is the world's first open source, general-purpose, multithreaded manycore processor and framework. OpenPiton leverages the industry hardened OpenSPARC T1 core with modifications and builds upon it with a scratch-built, scalable uncore creating a flexible, modern manycore design. In addition, OpenPiton provides synthesis and backend scripts for ASIC and FPGA to enable other researchers to bring their designs to implementation. OpenPiton provides a complete verification infrastructure of over 8000 tests, is supported by mature software tools, runs full-stack multiuser Debian Linux, and is written in industry standard Verilog. Multiple implementations of OpenPiton have been created including a taped-out 25-core implementation in IBM's 32nm process and multiple Xilinx FPGA prototypes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Lustig:2016:CVM, author = "Daniel Lustig and Geet Sethi and Margaret Martonosi and Abhishek Bhattacharjee", title = "{COATCheck}: Verifying Memory Ordering at the Hardware-{OS} Interface", journal = j-SIGPLAN, volume = "51", number = "4", pages = "233--247", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872399", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern computer systems include numerous compute elements, from CPUs to GPUs to accelerators. Harnessing their full potential requires well-defined, properly-implemented memory consistency models (MCMs), and low-level system functionality such as virtual memory and address translation (AT). Unfortunately, it is difficult to specify and implement hardware-OS interactions correctly; in the past, many hardware and OS specification mismatches have resulted in implementation bugs in commercial processors. In an effort to resolve this verification gap, this paper makes the following contributions. First, we present COATCheck, an address translation-aware framework for specifying and statically verifying memory ordering enforcement at the microarchitecture and operating system levels. We develop a domain-specific language for specifying ordering enforcement, for including ordering-related OS events and hardware micro-operations, and for programmatically enumerating happens-before graphs. Using a fast and automated static constraint solver, COATCheck can efficiently analyze interesting and important memory ordering scenarios for modern, high-performance, out-of-order processors. Second, we show that previous work on Virtual Address Memory Consistency (VAMC) does not capture every translation-related ordering scenario of interest, and that some such cases even fall outside the traditional scope of consistency. We therefore introduce the term transistency model to describe the superset of consistency which captures all translation-aware sets of ordering rules.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Markuze:2016:TIP, author = "Alex Markuze and Adam Morrison and Dan Tsafrir", title = "True {IOMMU} Protection from {DMA} Attacks: When Copy is Faster than Zero Copy", journal = j-SIGPLAN, volume = "51", number = "4", pages = "249--262", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872379", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Malicious I/O devices might compromise the OS using DMAs. The OS therefore utilizes the IOMMU to map and unmap every target buffer right before and after its DMA is processed, thereby restricting DMAs to their designated locations. This usage model, however, is not truly secure for two reasons: (1) it provides protection at page granularity only, whereas DMA buffers can reside on the same page as other data; and (2) it delays DMA buffer unmaps due to performance considerations, creating a vulnerability window in which devices can access in-use memory. We propose that OSes utilize the IOMMU differently, in a manner that eliminates these two flaws. Our new usage model restricts device access to a set of shadow DMA buffers that are never unmapped, and it copies DMAed data to/from these buffers, thus providing sub-page protection while eliminating the aforementioned vulnerability window. Our key insight is that the cost of interacting with, and synchronizing access to the slow IOMMU hardware---required for zero-copy protection against devices---make copying preferable to zero-copying. We implement our model in Linux and evaluate it with standard networking benchmarks utilizing a 40,Gb/s NIC. We demonstrate that despite being more secure than the safest preexisting usage model, our approach provides up to 5x higher throughput. Additionally, whereas it is inherently less scalable than an IOMMU-less (unprotected) system, our approach incurs only 0\%--25\% performance degradation in comparison.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Awad:2016:SSZ, author = "Amro Awad and Pratyusa Manadhata and Stuart Haber and Yan Solihin and William Horne", title = "Silent Shredder: Zero-Cost Shredding for Secure Non-Volatile Main Memory Controllers", journal = j-SIGPLAN, volume = "51", number = "4", pages = "263--276", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872377", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As non-volatile memory (NVM) technologies are expected to replace DRAM in the near future, new challenges have emerged. For example, NVMs have slow and power-consuming writes, and limited write endurance. In addition, NVMs have a data remanence vulnerability, i.e., they retain data for a long time after being powered off. NVM encryption alleviates the vulnerability, but exacerbates the limited endurance by increasing the number of writes to memory. We observe that, in current systems, a large percentage of main memory writes result from data shredding in operating systems, a process of zeroing out physical pages before mapping them to new processes, in order to protect previous processes' data. In this paper, we propose Silent Shredder, which repurposes initialization vectors used in standard counter mode encryption to completely eliminate the data shredding writes. Silent Shredder also speeds up reading shredded cache lines, and hence reduces power consumption and improves overall performance. To evaluate our design, we run three PowerGraph applications and 26 multi-programmed workloads from the SPEC 2006 suite, on a gem5-based full system simulator. Silent Shredder eliminates an average of 48.6\% of the writes in the initialization and graph construction phases. It speeds up main memory reads by 3.3 times, and improves the number of instructions per cycle (IPC) by 6.4\% on average. Finally, we discuss several use cases, including virtual machines' data isolation and user-level large data initialization, where Silent Shredder can be used effectively at no extra cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Kwon:2016:SPT, author = "Youngjin Kwon and Alan M. Dunn and Michael Z. Lee and Owen S. Hofmann and Yuanzhong Xu and Emmett Witchel", title = "{Sego}: Pervasive Trusted Metadata for Efficiently Verified Untrusted System Services", journal = j-SIGPLAN, volume = "51", number = "4", pages = "277--290", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sego is a hypervisor-based system that gives strong privacy and integrity guarantees to trusted applications, even when the guest operating system is compromised or hostile. Sego verifies operating system services, like the file system, instead of replacing them. By associating trusted metadata with user data across all system devices, Sego verifies system services more efficiently than previous systems, especially services that depend on data contents. We extensively evaluate Sego's performance on real workloads and implement a kernel fault injector to validate Sego's file system-agnostic crash consistency and recovery protocol.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Tsafrir:2016:SAW, author = "Dan Tsafrir", title = "Synopsis of the {ASPLOS '16 Wild and Crazy Ideas (WACI)} Invited-Speakers Session", journal = j-SIGPLAN, volume = "51", number = "4", pages = "291--294", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2876512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Wild and Crazy Ideas (WACI) session is a longstanding tradition at ASPLOS, soliciting talks that consist of forward-looking, visionary, inspiring, creative, far out or just plain amazing ideas presented in an exciting way. (Amusing elements in the presentations are tolerated ;-) but are in fact optional.) The first WACI session took place in 1998. Back then, the call for talks included a problem statement, which contended that ``papers usually do not get admitted to [such conferences as] ISCA or ASPLOS unless the systems that they describe are mature enough to run [some standard benchmark suites, which] has a chilling effect on the idea generation process---encouraging incremental research'' [1]. The 1998 WACI session turned out to be a great success. Its webpage states that ``there were 42 submissions [competing over] only eight time slots, [which resulted in] this session [having] a lower acceptance rate than the conference itself'' [2]. But the times they are a-changin' [3], and the WACI session no longer enjoys that many submissions (Figure 1), perhaps because nowadays there exist many forums for researchers to describe/discuss their preliminary ideas, including: the ``hot topics in'' workshops [4--7]; a journal like CAL, dedicated to early results [8]; main conferences soliciting short submissions describing ``original or unconventional ideas at a preliminary stage'' in addition to regular papers [9]; and the many workshops co-located with main conferences, like ISCA '15, which hosted thirteen such workshops [10]. Regardless of the reason for the declining number of submissions, this time we've decided to organize the WACI session differently to ensure its continued high quality. Instead of soliciting talks via an open call and hoping for the best, we proactively invited speakers whom we believe are capable of delivering excellent WACI presentations. That is, this year's WACI session consists exclusively of invited speakers. Filling up the available slots turned out to be fairly easy, as most of the researchers we invited promptly accepted our invitation. The duration of each talk was set to be eight minutes (exactly as in the first WACI session from 1998) plus two minutes for questions. The talks are outlined below. We believe they are interesting and exciting, and we hope the attendees of the session will find them stimulating and insightful.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Williams:2016:BIC, author = "R. Stanley Williams", title = "Brain Inspired Computing", journal = j-SIGPLAN, volume = "51", number = "4", pages = "295--295", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872417", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Phothilimthana:2016:SS, author = "Phitchaya Mangpo Phothilimthana and Aditya Thakur and Rastislav Bodik and Dinakar Dhurjati", title = "Scaling up Superoptimization", journal = j-SIGPLAN, volume = "51", number = "4", pages = "297--310", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing a code optimizer is challenging, especially for new, idiosyncratic ISAs. Superoptimization can, in principle, discover machine-specific optimizations automatically by searching the space of all instruction sequences. If we can increase the size of code fragments a superoptimizer can optimize, we will be able to discover more optimizations. We develop LENS, a search algorithm that increases the size of code a superoptimizer can synthesize by rapidly pruning away invalid candidate programs. Pruning is achieved by selectively refining the abstraction under which candidates are considered equivalent, only in the promising part of the candidate space. LENS also uses a bidirectional search strategy to prune the candidate space from both forward and backward directions. These pruning strategies allow LENS to solve twice as many benchmarks as existing enumerative search algorithms, while LENS is about 11-times faster. Additionally, we increase the effective size of the superoptimized fragments by relaxing the correctness condition using contexts (surrounding code). Finally, we combine LENS with complementary search techniques into a cooperative superoptimizer, which exploits the stochastic search to make random jumps in a large candidate space, and a symbolic (SAT-solver-based) search to synthesize arbitrary constants. While existing superoptimizers consistently solve 9--16 out of 32 benchmarks, the cooperative superoptimizer solves 29 benchmarks. It can synthesize code fragments that are up to 82\% faster than code generated by gcc -O3 from WiBench and MiBench.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Hasabnis:2016:LAI, author = "Niranjan Hasabnis and R. Sekar", title = "Lifting Assembly to Intermediate Representation: a Novel Approach Leveraging Compilers", journal = j-SIGPLAN, volume = "51", number = "4", pages = "311--324", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872380", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Translating low-level machine instructions into higher-level intermediate language (IL) is one of the central steps in many binary analysis and instrumentation systems. Existing systems build such translators manually. As a result, it takes a great deal of effort to support new architectures. Even for widely deployed architectures, full instruction sets may not be modeled, e.g., mature systems such as Valgrind still lack support for AVX, FMA4 and SSE4.1 for x86 processors. To overcome these difficulties, we propose a novel approach that leverages knowledge about instruction set semantics that is already embedded into modern compilers such as GCC. In particular, we present a learning-based approach for automating the translation of assembly instructions to a compiler's architecture-neutral IL. We present an experimental evaluation that demonstrates the ability of our approach to easily support many architectures (x86, ARM and AVR), including their advanced instruction sets. Our implementation is available as open-source software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Muralidharan:2016:AAC, author = "Saurav Muralidharan and Amit Roy and Mary Hall and Michael Garland and Piyush Rai", title = "Architecture-Adaptive Code Variant Tuning", journal = j-SIGPLAN, volume = "51", number = "4", pages = "325--338", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872411", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Code variants represent alternative implementations of a computation, and are common in high-performance libraries and applications to facilitate selecting the most appropriate implementation for a specific execution context (target architecture and input dataset). Automating code variant selection typically relies on machine learning to construct a model during an offline learning phase that can be quickly queried at runtime once the execution context is known. In this paper, we define a new approach called architecture-adaptive code variant tuning, where the variant selection model is learned on a set of source architectures, and then used to predict variants on a new target architecture without having to repeat the training process. We pose this as a multi-task learning problem, where each source architecture corresponds to a task; we use device features in the construction of the variant selection model. This work explores the effectiveness of multi-task learning and the impact of different strategies for device feature selection. We evaluate our approach on a set of benchmarks and a collection of six NVIDIA GPU architectures from three distinct generations. We achieve performance results that are mostly comparable to the previous approach of tuning for a single GPU architecture without having to repeat the learning phase.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Lin:2016:SKT, author = "Xiaofeng Lin and Yu Chen and Xiaodong Li and Junjie Mao and Jiaquan He and Wei Xu and Yuanchun Shi", title = "Scalable Kernel {TCP} Design and Implementation for Short-Lived Connections", journal = j-SIGPLAN, volume = "51", number = "4", pages = "339--352", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872391", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the rapid growth of network bandwidth, increases in CPU cores on a single machine, and application API models demanding more short-lived connections, a scalable TCP stack is performance-critical. Although many clean-state designs have been proposed, production environments still call for a bottom-up parallel TCP stack design that is backward-compatible with existing applications. We present Fastsocket, a BSD Socket-compatible and scalable kernel socket design, which achieves table-level connection partition in TCP stack and guarantees connection locality for both passive and active connections. Fastsocket architecture is a ground up partition design, from NIC interrupts all the way up to applications, which naturally eliminates various lock contentions in the entire stack. Moreover, Fastsocket maintains the full functionality of the kernel TCP stack and BSD-socket-compatible API, and thus applications need no modifications. Our evaluations show that Fastsocket achieves a speedup of 20.4x on a 24-core machine under a workload of short-lived connections, outperforming the state-of-the-art Linux kernel TCP implementations. When scaling up to 24 CPU cores, Fastsocket increases the throughput of Nginx and HAProxy by 267\% and 621\% respectively compared with the base Linux kernel. We also demonstrate that Fastsocket can achieve scalability and preserve BSD socket API at the same time. Fastsocket is already deployed in the production environment of Sina WeiBo, serving 50 million daily active users and billions of requests per day.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{ElHajj:2016:SPM, author = "Izzat {El Hajj} and Alexander Merritt and Gerd Zellweger and Dejan Milojicic and Reto Achermann and Paolo Faraboschi and Wen-mei Hwu and Timothy Roscoe and Karsten Schwan", title = "{SpaceJMP}: Programming with Multiple Virtual Address Spaces", journal = j-SIGPLAN, volume = "51", number = "4", pages = "353--368", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872366", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Memory-centric computing demands careful organization of the virtual address space, but traditional methods for doing so are inflexible and inefficient. If an application wishes to address larger physical memory than virtual address bits allow, if it wishes to maintain pointer-based data structures beyond process lifetimes, or if it wishes to share large amounts of memory across simultaneously executing processes, legacy interfaces for managing the address space are cumbersome and often incur excessive overheads. We propose a new operating system design that promotes virtual address spaces to first-class citizens, enabling process threads to attach to, detach from, and switch between multiple virtual address spaces. Our work enables data-centric applications to utilize vast physical memory beyond the virtual range, represent persistent pointer-rich data structures without special pointer representations, and share large amounts of memory between processes efficiently. We describe our prototype implementations in the DragonFly BSD and Barrelfish operating systems. We also present programming semantics and a compiler transformation to detect unsafe pointer usage. We demonstrate the benefits of our work on data-intensive applications such as the GUPS benchmark, the SAMTools genomics workflow, and the Redis key-value store.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Lin:2016:MTP, author = "Felix Xiaozhu Lin and Xu Liu", title = "{\tt memif}: Towards Programming Heterogeneous Memory Asynchronously", journal = j-SIGPLAN, volume = "51", number = "4", pages = "369--383", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872401", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To harness a heterogeneous memory hierarchy, it is advantageous to integrate application knowledge in guiding frequent memory move, i.e., replicating or migrating virtual memory regions. To this end, we present memif, a protected OS service for asynchronous, hardware-accelerated memory move. Compared to the state of the art --- page migration in Linux, memif incurs low overhead and low latency; in order to do so, it not only redefines the semantics of kernel interface but also overhauls the underlying mechanisms, including request/completion management, race handling, and DMA engine configuration. We implement memif in Linux for a server-class system-on-chip that features heterogeneous memories. Compared to the current Linux page migration, memif reduces CPU usage by up to 15\% for small pages and by up to 38x for large pages; in continuously serving requests, memif has no need for request batching and reduces latency by up to 63\%. By crafting a small runtime atop memif, we improve the throughputs for a set of streaming workloads by up to 33\%. Overall, memif has opened the door to software management of heterogeneous memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Kim:2016:NEN, author = "Wook-Hee Kim and Jinwoong Kim and Woongki Baek and Beomseok Nam and Youjip Won", title = "{NVWAL}: Exploiting {NVRAM} in Write-Ahead Logging", journal = j-SIGPLAN, volume = "51", number = "4", pages = "385--398", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872392", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging byte-addressable non-volatile memory is considered an alternative storage device for database logs that require persistency and high performance. In this work, we develop NVWAL (NVRAM Write-Ahead Logging) for SQLite. The contribution of NVWAL consists of three elements: (i) byte-granularity differential logging that effectively eliminates the excessive I/O overhead of filesystem-based logging or journaling, (ii) transaction-aware lazy synchronization that reduces cache synchronization overhead by two-thirds, and (iii) user-level heap management of the NVRAM persistent WAL structure, which reduces the overhead of managing persistent objects. We implemented NVWAL in SQLite and measured the performance on a Nexus 5 smartphone and an NVRAM emulation board --- Tuna. Our performance study shows the following: (i) the overhead of enforcing strict ordering of NVRAM writes can be reduced via NVRAM-aware transaction management. (ii) From the application performance point of view, the overhead of guaranteeing failure atomicity is negligible; the cache line flush overhead accounts for only 0.8~4.6\% of transaction execution time. Therefore, application performance is much less sensitive to the NVRAM performance than we expected. Decreasing the NVRAM latency by one-fifth (from 1942 nsec to 437 nsec), SQLite achieves a mere 4\% performance gain (from 2517 ins/sec to 2621 ins/sec). (iii) Overall, when the write latency of NVRAM is 2 usec, NVWAL increases SQLite performance by at least 10x compared to that of WAL on flash memory (from 541 ins/sec to 5812 ins/sec).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Kolli:2016:HPT, author = "Aasheesh Kolli and Steven Pelley and Ali Saidi and Peter M. Chen and Thomas F. Wenisch", title = "High-Performance Transactions for Persistent Memories", journal = j-SIGPLAN, volume = "51", number = "4", pages = "399--411", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872381", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging non-volatile memory (NVRAM) technologies offer the durability of disk with the byte-addressability of DRAM. These devices will allow software to access persistent data structures directly in NVRAM using processor loads and stores, however, ensuring consistency of persistent data across power failures and crashes is difficult. Atomic, durable transactions are a widely used abstraction to enforce such consistency. Implementing transactions on NVRAM requires the ability to constrain the order of NVRAM writes, for example, to ensure that a transaction's log record is complete before it is marked committed. Since NVRAM write latencies are expected to be high, minimizing these ordering constraints is critical for achieving high performance. Recent work has proposed programming interfaces to express NVRAM write ordering constraints to hardware so that NVRAM writes may be coalesced and reordered while preserving necessary constraints. Unfortunately, a straightforward implementation of transactions under these interfaces imposes unnecessary constraints. We show how to remove these dependencies through a variety of techniques, notably, deferring commit until after locks are released. We present a comprehensive analysis contrasting two transaction designs across three NVRAM programming interfaces, demonstrating up to 2.5x speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Guo:2016:HDI, author = "Qing Guo and Karin Strauss and Luis Ceze and Henrique S. Malvar", title = "High-Density Image Storage Using Approximate Memory Cells", journal = j-SIGPLAN, volume = "51", number = "4", pages = "413--426", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872413", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes tailoring image encoding for an approximate storage substrate. We demonstrate that indiscriminately storing encoded images in approximate memory generates unacceptable and uncontrollable quality degradation. The key finding is that errors in the encoded bit streams have non-uniform impact on the decoded image quality. We develop a methodology to determine the relative importance of encoded bits and store them in an approximate storage substrate. The storage cells are optimized to reduce error rate via biasing and are tuned to meet the desired reliability requirement via selective error correction. In a case study with the progressive transform codec (PTC), a precursor to JPEG XR, the proposed approximate image storage system exhibits a 2.7x increase in density of pixels per silicon volume under bounded error rates, and this achievement is additive to the storage savings of PTC compression.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Izraelevitz:2016:FAP, author = "Joseph Izraelevitz and Terence Kelly and Aasheesh Kolli", title = "Failure-Atomic Persistent Memory Updates via {JUSTDO} Logging", journal = j-SIGPLAN, volume = "51", number = "4", pages = "427--442", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872410", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Persistent memory invites applications to manipulate persistent data via load and store instructions. Because failures during updates may destroy transient data (e.g., in CPU registers), preserving data integrity in the presence of failures requires failure-atomic bundles of updates. Prior failure atomicity approaches for persistent memory entail overheads due to logging and CPU cache flushing. Persistent caches can eliminate the need for flushing, but conventional logging remains complex and memory intensive. We present the design and implementation of JUSTDO logging, a new failure atomicity mechanism that greatly reduces the memory footprint of logs, simplifies log management, and enables fast parallel recovery following failure. Crash-injection tests confirm that JUSTDO logging preserves application data integrity and performance evaluations show that it improves throughput 3x or more compared with a state-of-the-art alternative for a spectrum of data-intensive algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Han:2016:IMD, author = "Jaeung Han and Seungheun Jeon and Young-ri Choi and Jaehyuk Huh", title = "Interference Management for Distributed Parallel Applications in Consolidated Clusters", journal = j-SIGPLAN, volume = "51", number = "4", pages = "443--456", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872388", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Consolidating multiple applications on a system can improve the overall resource utilization of data center systems. However, such consolidation can adversely affect the performance of some applications due to interference caused by resource contention. Despite many prior studies on the interference effects in single-node systems, the interference behaviors of distributed parallel applications have not been investigated thoroughly. With distributed applications, a local interference in a node can affect the whole execution of an application spanning many nodes. This paper studies an interference modeling methodology for distributed applications to predict their performance under interference effects in consolidated clusters. This study first characterizes the effects of interference for various distributed applications over different interference settings, and analyzes how diverse interference intensities on multiple nodes affect the overall performance. Based on the characterization, this study proposes a static profiling-based model for interference propagation and heterogeneity behaviors. In addition, this paper presents use case studies of the modeling method, two interference-aware placement techniques for consolidated virtual clusters, which attempt to maximize the overall throughput or to guarantee the quality-of-service.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Maas:2016:THL, author = "Martin Maas and Krste Asanovi{\'c} and Tim Harris and John Kubiatowicz", title = "{Taurus}: a Holistic Language Runtime System for Coordinating Distributed Managed-Language Applications", journal = j-SIGPLAN, volume = "51", number = "4", pages = "457--471", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872386", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many distributed workloads in today's data centers are written in managed languages such as Java or Ruby. Examples include big data frameworks such as Hadoop, data stores such as Cassandra or applications such as the SOLR search engine. These workloads typically run across many independent language runtime systems on different nodes. This setup represents a source of inefficiency, as these language runtime systems are unaware of each other. For example, they may perform Garbage Collection at times that are locally reasonable but not in a distributed setting. We address these problems by introducing the concept of a Holistic Runtime System that makes runtime-level decisions for the entire distributed application rather than locally. We then present Taurus, a Holistic Runtime System prototype. Taurus is a JVM drop-in replacement, requires almost no configuration and can run unmodified off-the-shelf Java applications. Taurus enforces user-defined coordination policies and provides a DSL for writing these policies. By applying Taurus to Garbage Collection, we demonstrate the potential of such a system and use it to explore coordination strategies for the runtime systems of real-world distributed applications, to improve application performance and address tail-latencies in latency-sensitive workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Delimitrou:2016:HRE, author = "Christina Delimitrou and Christos Kozyrakis", title = "{HCloud}: Resource-Efficient Provisioning in Shared Cloud Systems", journal = j-SIGPLAN, volume = "51", number = "4", pages = "473--488", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872365", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud computing promises flexibility and high performance for users and cost efficiency for operators. To achieve this, cloud providers offer instances of different sizes, both as long-term reservations and short-term, on-demand allocations. Unfortunately, determining the best provisioning strategy is a complex, multi-dimensional problem that depends on the load fluctuation and duration of incoming jobs, and the performance unpredictability and cost of resources. We first compare the two main provisioning strategies (reserved and on-demand resources) on Google Compute Engine (GCE) using three representative workload scenarios with batch and latency-critical applications. We show that either approach is suboptimal for performance or cost. We then present HCloud, a hybrid provisioning system that uses both reserved and on-demand resources. HCloud determines which jobs should be mapped to reserved versus on-demand resources based on overall load, and resource unpredictability. It also determines the optimal instance size an application needs to satisfy its Quality of Service (QoS) constraints. We demonstrate that hybrid configurations improve performance by 2.1x compared to fully on-demand provisioning, and reduce cost by 46\% compared to fully reserved systems. We also show that hybrid strategies are robust to variation in system and job parameters, such as cost and system load.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Yu:2016:CWM, author = "Xiao Yu and Pallavi Joshi and Jianwu Xu and Guoliang Jin and Hui Zhang and Guofei Jiang", title = "{CloudSeer}: Workflow Monitoring of Cloud Infrastructures via Interleaved Logs", journal = j-SIGPLAN, volume = "51", number = "4", pages = "489--502", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872407", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud infrastructures provide a rich set of management tasks that operate computing, storage, and networking resources in the cloud. Monitoring the executions of these tasks is crucial for cloud providers to promptly find and understand problems that compromise cloud availability. However, such monitoring is challenging because there are multiple distributed service components involved in the executions. CloudSeer enables effective workflow monitoring. It takes a lightweight non-intrusive approach that purely works on interleaved logs widely existing in cloud infrastructures. CloudSeer first builds an automaton for the workflow of each management task based on normal executions, and then it checks log messages against a set of automata for workflow divergences in a streaming manner. Divergences found during the checking process indicate potential execution problems, which may or may not be accompanied by error log messages. For each potential problem, CloudSeer outputs necessary context information including the affected task automaton and related log messages hinting where the problem occurs to help further diagnosis. Our experiments on OpenStack, a popular open-source cloud infrastructure, show that CloudSeer's efficiency and problem-detection capability are suitable for online monitoring.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Kwon:2016:LCI, author = "Yonghwi Kwon and Dohyeong Kim and William Nick Sumner and Kyungtae Kim and Brendan Saltaformaggio and Xiangyu Zhang and Dongyan Xu", title = "{LDX}: Causality Inference by Lightweight Dual Execution", journal = j-SIGPLAN, volume = "51", number = "4", pages = "503--515", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872395", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Causality inference, such as dynamic taint analysis, has many applications (e.g., information leak detection). It determines whether an event e is causally dependent on a preceding event c during execution. We develop a new causality inference engine LDX. Given an execution, it spawns a slave execution, in which it mutates c and observes whether any change is induced at e. To preclude non-determinism, LDX couples the executions by sharing syscall outcomes. To handle path differences induced by the perturbation, we develop a novel on-the-fly execution alignment scheme that maintains a counter to reflect the progress of execution. The scheme relies on program analysis and compiler transformation. LDX can effectively detect information leak and security attacks with an average overhead of 6.08\% while running the master and the slave concurrently on separate CPUs, much lower than existing systems that require instruction level monitoring. Furthermore, it has much better accuracy in causality inference.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Leesatapornwongsa:2016:TTN, author = "Tanakorn Leesatapornwongsa and Jeffrey F. Lukman and Shan Lu and Haryadi S. Gunawi", title = "{TaxDC}: a Taxonomy of Non-Deterministic Concurrency Bugs in Datacenter Distributed Systems", journal = j-SIGPLAN, volume = "51", number = "4", pages = "517--530", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872374", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present TaxDC, the largest and most comprehensive taxonomy of non-deterministic concurrency bugs in distributed systems. We study 104 distributed concurrency (DC) bugs from four widely-deployed cloud-scale datacenter distributed systems, Cassandra, Hadoop MapReduce, HBase and ZooKeeper. We study DC-bug characteristics along several axes of analysis such as the triggering timing condition and input preconditions, error and failure symptoms, and fix strategies, collectively stored as 2,083 classification labels in TaxDC database. We discuss how our study can open up many new research directions in combating DC bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Mao:2016:RFR, author = "Junjie Mao and Yu Chen and Qixue Xiao and Yuanchun Shi", title = "{RID}: Finding Reference Count Bugs with Inconsistent Path Pair Checking", journal = j-SIGPLAN, volume = "51", number = "4", pages = "531--544", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872389", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reference counts are widely used in OS kernels for resource management. However, reference counts are not trivial to be used correctly in large scale programs because it is left to developers to make sure that an increment to a reference count is always paired with a decrement. This paper proposes inconsistent path pair checking, a novel technique that can statically discover bugs related to reference counts without knowing how reference counts should be changed in a function. A prototype called RID is implemented and evaluations show that RID can discover more than 80 bugs which were confirmed by the developers in the latest Linux kernel. The results also show that RID tends to reveal bugs caused by developers' misunderstanding on API specifications or error conditions that are not handled properly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Zhang:2016:MPU, author = "Huazhe Zhang and Henry Hoffmann", title = "Maximizing Performance Under a Power Cap: a Comparison of Hardware, Software, and Hybrid Techniques", journal = j-SIGPLAN, volume = "51", number = "4", pages = "545--559", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872375", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Power and thermal dissipation constrain multicore performance scaling. Modern processors are built such that they could sustain damaging levels of power dissipation, creating a need for systems that can implement processor power caps. A particular challenge is developing systems that can maximize performance within a power cap, and approaches have been proposed in both software and hardware. Software approaches are flexible, allowing multiple hardware resources to be coordinated for maximum performance, but software is slow, requiring a long time to converge to the power target. In contrast, hardware power capping quickly converges to the the power cap, but only manages voltage and frequency, limiting its potential performance. In this work we propose PUPiL, a hybrid software/hardware power capping system. Unlike previous approaches, PUPiL combines hardware's fast reaction time with software's flexibility. We implement PUPiL on real Linux/x86 platform and compare it to Intel's commercial hardware power capping system for both single and multi-application workloads. We find PUPiL provides the same reaction time as Intel's hardware with significantly higher performance. On average, PUPiL outperforms hardware by from 1:18-2:4 depending on workload and power target. Thus, PUPiL provides a promising way to enforce power caps with greater performance than current state-of-the-art hardware-only approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Fan:2016:CSG, author = "Songchun Fan and Seyed Majid Zahedi and Benjamin C. Lee", title = "The Computational Sprinting Game", journal = j-SIGPLAN, volume = "51", number = "4", pages = "561--575", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872383", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computational sprinting is a class of mechanisms that boost performance but dissipate additional power. We describe a sprinting architecture in which many, independent chip multiprocessors share a power supply and sprints are constrained by the chips' thermal limits and the rack's power limits. Moreover, we present the computational sprinting game, a multi-agent perspective on managing sprints. Strategic agents decide whether to sprint based on application phases and system conditions. The game produces an equilibrium that improves task throughput for data analytics workloads by 4-6$ \times $ over prior greedy heuristics and performs within 90\% of an upper bound on throughput from a globally optimized policy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Colin:2016:EIF, author = "Alexei Colin and Graham Harvey and Brandon Lucia and Alanson P. Sample", title = "An Energy-interference-free Hardware-Software Debugger for Intermittent Energy-harvesting Systems", journal = j-SIGPLAN, volume = "51", number = "4", pages = "577--589", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy-autonomous computing devices have the potential to extend the reach of computing to a scale beyond either wired or battery-powered systems. However, these devices pose a unique set of challenges to application developers who lack both hardware and software support tools. Energy harvesting devices experience power intermittence which causes the system to reset and power-cycle unpredictably, tens to hundreds of times per second. This can result in code execution errors that are not possible in continuously-powered systems and cannot be diagnosed with conventional debugging tools such as JTAG and/or oscilloscopes. We propose the Energy-interference-free Debugger, a hardware and software platform for monitoring and debugging intermittent systems without adversely effecting their energy state. The Energy-interference-free Debugger re-creates a familiar debugging environment for intermittent software and augments it with debugging primitives for effective diagnosis of intermittence bugs. Our evaluation of the Energy-interference-free Debugger quantifies its energy-interference-freedom and shows its value in a set of debugging tasks in complex test programs and several real applications, including RFID code and a machine-learning-based activity recognition system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Witchel:2016:PPW, author = "Emmett Witchel", title = "Programmer Productivity in a World of Mushy Interfaces: Challenges of the Post-{ISA} Reality", journal = j-SIGPLAN, volume = "51", number = "4", pages = "591--591", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2876511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Since 1964, we had the notion that the instruction set architecture (ISA) is a useful and fairly opaque abstraction layer between hardware and software. Software rode hardware's performance wave while remaining gloriously oblivious to hardware's growing complexity. Unfortunately, the jig is up. We still have ISAs, but the abstraction no longer offers seamless portability---parallel software needs to be tuned for different core counts, and heterogeneous processing elements (CPUs, GPUs, accelerators) further complicate programmability. We are better at building large-scale heterogeneous processors than we are at programming them. Maintaining software across multiple current platforms is difficult and porting to future platforms is also difficult. There have been many technical responses: virtual ISAs (e.g., NVIDIA's PTX), higher-level programming interfaces (e.g., CUDA or OpenCL), and late-stage compilation and platform-specific tailoring (e.g., Android ART), etc. A team of opinionated experts, drawn from the three ASPLOS communities will examine the problem of programmer productivity in the post-ISA world, first from the perspective of their area of expertise and then noting the contributions from the other two communities. What research will save us and how? This wide-ranging debate will frame important research areas for future work while being grounded in frank discussion about what has succeeded in the past. Attendees can expect actionable insight into important research issues as well an entertaining discussion.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Angstadt:2016:RPP, author = "Kevin Angstadt and Westley Weimer and Kevin Skadron", title = "{RAPID} Programming of Pattern-Recognition Processors", journal = j-SIGPLAN, volume = "51", number = "4", pages = "593--605", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872393", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present RAPID, a high-level programming language and combined imperative and declarative model for programming pattern-recognition processors, such as Micron's Automata Processor (AP). The AP is a novel, non-Von Neumann architecture for direct execution of non-deterministic finite automata (NFAs), and has been demonstrated to provide substantial speedup for a variety of data-processing applications. RAPID is clear, maintainable, concise, and efficient both at compile and run time. Language features, such as code abstraction and parallel control structures, map well to pattern-matching problems, providing clarity and maintainability. For generation of efficient runtime code, we present algorithms to convert RAPID programs into finite automata. Further, we introduce a tessellation technique for configuring the AP, which significantly reduces compile time, increases programmer productivity, and improves maintainability. We evaluate five RAPID programs against custom, baseline implementations previously demonstrated to be significantly accelerated by the AP. We find that RAPID programs are much shorter in length, are expressible at a higher level of abstraction than their handcrafted counterparts, and yield generated code that is often more compact. In addition, our tessellation technique for configuring the AP has comparable device utilization to, and results in compilation that is up to four orders of magnitude faster than, current solutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Sui:2016:PCA, author = "Xin Sui and Andrew Lenharth and Donald S. Fussell and Keshav Pingali", title = "Proactive Control of Approximate Programs", journal = j-SIGPLAN, volume = "51", number = "4", pages = "607--621", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872402", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Approximate computing trades off accuracy of results for resources such as energy or computing time. There is a large and rapidly growing literature on approximate computing that has focused mostly on showing the benefits of approximate computing. However, we know relatively little about how to control approximation in a disciplined way. In this paper, we address the problem of controlling approximation for non-streaming programs that have a set of ``knobs'' that can be dialed up or down to control the level of approximation of different components in the program. We formulate this control problem as a constrained optimization problem, and describe a system called Capri that uses machine learning to learn cost and error models for the program, and uses these models to determine, for a desired level of approximation, knob settings that optimize metrics such as running time or energy usage. Experimental results with complex benchmarks from different problem domains demonstrate the effectiveness of this approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Park:2016:ATC, author = "Jongse Park and Emmanuel Amaro and Divya Mahajan and Bradley Thwaites and Hadi Esmaeilzadeh", title = "{AxGames}: Towards Crowdsourcing Quality Target Determination in Approximate Computing", journal = j-SIGPLAN, volume = "51", number = "4", pages = "623--636", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872376", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Approximate computing trades quality of application output for higher efficiency and performance. Approximation is useful only if its impact on application output quality is acceptable to the users. However, there is a lack of systematic solutions and studies that explore users' perspective on the effects of approximation. In this paper, we seek to provide one such solution for the developers to probe and discover the boundary of quality loss that most users will deem acceptable. We propose AxGames, a crowdsourced solution that enables developers to readily infer a statistical common ground from the general public through three entertaining games. The users engage in these games by betting on their opinion about the quality loss of the final output while the AxGames framework collects statistics about their perceptions. The framework then statistically analyzes the results to determine the acceptable levels of quality for a pair of (application, approximation technique). The three games are designed such that they effectively capture quality requirements with various tradeoffs and contexts. To evaluate AxGames, we examine seven diverse applications that produce user perceptible outputs and cover a wide range of domains, including image processing, optical character recognition, speech to text conversion, and audio processing. We recruit 700 participants/users through Amazon's Mechanical Turk to play the games that collect statistics about their perception on different levels of quality. Subsequently, the AxGames framework uses the Clopper-Pearson exact method, which computes a binomial proportion confidence interval, to analyze the collected statistics for each level of quality. Using this analysis, AxGames can statistically project the quality level that satisfies a given percentage of users. The developers can use these statistical projections to tune the level of approximation based on the user experience. We find that the level of acceptable quality loss significantly varies across applications. For instance, to satisfy 90\% of users, the level of acceptable quality loss is 2\% for one application (image processing) and 26\% for another (audio processing). Moreover, the pattern with which the crowd responds to approximation takes significantly different shape and form depending on the class of applications. These results confirm the necessity of solutions that systematically explore the effect of approximation on the end user experience.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Bornholt:2016:DBA, author = "James Bornholt and Randolph Lopez and Douglas M. Carmean and Luis Ceze and Georg Seelig and Karin Strauss", title = "A {DNA}-Based Archival Storage System", journal = j-SIGPLAN, volume = "51", number = "4", pages = "637--649", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872397", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Demand for data storage is growing exponentially, but the capacity of existing storage media is not keeping up. Using DNA to archive data is an attractive possibility because it is extremely dense, with a raw limit of 1 exabyte/mm$^3$ (109 GB/mm$^3$ ), and long-lasting, with observed half-life of over 500 years. This paper presents an architecture for a DNA-based archival storage system. It is structured as a key-value store, and leverages common biochemical techniques to provide random access. We also propose a new encoding scheme that offers controllable redundancy, trading off reliability for density. We demonstrate feasibility, random access, and robustness of the proposed encoding with wet lab experiments involving 151 kB of synthesized DNA and a 42 kB random-access subset, and simulation experiments of larger sets calibrated to the wet lab experiments. Finally, we highlight trends in biotechnology that indicate the impending practicality of DNA storage for much larger datasets.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Prabhakar:2016:GCH, author = "Raghu Prabhakar and David Koeplinger and Kevin J. Brown and HyoukJoong Lee and Christopher {De Sa} and Christos Kozyrakis and Kunle Olukotun", title = "Generating Configurable Hardware from Parallel Patterns", journal = j-SIGPLAN, volume = "51", number = "4", pages = "651--665", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872415", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In recent years the computing landscape has seen an increasing shift towards specialized accelerators. Field programmable gate arrays (FPGAs) are particularly promising for the implementation of these accelerators, as they offer significant performance and energy improvements over CPUs for a wide class of applications and are far more flexible than fixed-function ASICs. However, FPGAs are difficult to program. Traditional programming models for reconfigurable logic use low-level hardware description languages like Verilog and VHDL, which have none of the productivity features of modern software languages but produce very efficient designs, and low-level software languages like C and OpenCL coupled with high-level synthesis (HLS) tools that typically produce designs that are far less efficient. Functional languages with parallel patterns are a better fit for hardware generation because they provide high-level abstractions to programmers with little experience in hardware design and avoid many of the problems faced when generating hardware from imperative languages. In this paper, we identify two important optimizations for using parallel patterns to generate efficient hardware: tiling and metapipelining. We present a general representation of tiled parallel patterns, and provide rules for automatically tiling patterns and generating metapipelines. We demonstrate experimentally that these optimizations result in speedups up to 39.4$ \times $ on a set of benchmarks from the data analytics domain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Chang:2016:DLD, author = "Li-Wen Chang and Hee-Seok Kim and Wen-mei W. Hwu", title = "{DySel}: Lightweight Dynamic Selection for Kernel-based Data-parallel Programming Model", journal = j-SIGPLAN, volume = "51", number = "4", pages = "667--680", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The rising pressure for simultaneously improving performance and reducing power is driving more diversity into all aspects of computing devices. An algorithm that is well-matched to the target hardware can run multiple times faster and more energy efficiently than one that is not. The problem is complicated by the fact that a program's input also affects the appropriate choice of algorithm. As a result, software developers have been faced with the challenge of determining the appropriate algorithm for each potential combination of target device and data. This paper presents DySel, a novel runtime system for automating such determination for kernel-based data parallel programming models such as OpenCL, CUDA, OpenACC, and C++AMP. These programming models cover many applications that demand high performance in mobile, cloud and high-performance computing. DySel systematically deploys candidate kernels on a small portion of the actual data to determine which achieves the best performance for the hardware-data combination. The test-deployment, referred to as micro-profiling, contributes to the final execution result and incurs less than 8\% of overhead in the worst observed case when compared to an oracle. We show four major use cases where DySel provides significantly more consistent performance without tedious effort from the developer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Chen:2016:BQA, author = "Quan Chen and Hailong Yang and Jason Mars and Lingjia Tang", title = "{Baymax}: {QoS} Awareness and Increased Utilization for Non-Preemptive Accelerators in Warehouse Scale Computers", journal = j-SIGPLAN, volume = "51", number = "4", pages = "681--696", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872368", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern warehouse-scale computers (WSCs) are being outfitted with accelerators to provide the significant compute required by emerging intelligent personal assistant (IPA) workloads such as voice recognition, image classification, and natural language processing. It is well known that the diurnal user access pattern of user-facing services provides a strong incentive to co-locate applications for better accelerator utilization and efficiency, and prior work has focused on enabling co-location on multicore processors. However, interference when co-locating applications on non-preemptive accelerators is fundamentally different than contention on multi-core CPUs and introduces a new set of challenges to reduce QoS violation. To address this open problem, we first identify the underlying causes for QoS violation in accelerator-outfitted servers. Our experiments show that queuing delay for the compute resources and PCI-e bandwidth contention for data transfer are the main two factors that contribute to the long tails of user-facing applications. We then present Baymax, a runtime system that orchestrates the execution of compute tasks from different applications and mitigates PCI-e bandwidth contention to deliver the required QoS for user-facing applications and increase the accelerator utilization. Using DjiNN, a deep neural network service, Sirius, an end-to-end IPA workload, and traditional applications on a Nvidia K40 GPU, our evaluation shows that Baymax improves the accelerator utilization by 91.3\% while achieving the desired 99\%-ile latency target for for user-facing applications. In fact, Baymax reduces the 99\%-ile latency of user-facing applications by up to 195x over default execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Nowatzki:2016:ABS, author = "Tony Nowatzki and Karthikeyan Sankaralingam", title = "Analyzing Behavior Specialized Acceleration", journal = j-SIGPLAN, volume = "51", number = "4", pages = "697--711", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872412", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hardware specialization has become a promising paradigm for overcoming the inefficiencies of general purpose microprocessors. Of significant interest are Behavioral Specialized Accelerators (BSAs), which are designed to efficiently execute code with only certain properties, but remain largely configurable or programmable. The most important strength of BSAs --- their ability to target a wide variety of codes --- also makes their interactions and analysis complex, raising the following questions: can multiple BSAs be composed synergistically, what are their interactions with the general purpose core, and what combinations favor which workloads? From a methodological standpoint, BSAs are also challenging, as they each require ISA development, compiler and assembler extensions, and either simulator or RTL models. To study the potential of BSAs, we propose a novel modeling technique called the Transformable Dependence Graph (TDG) --- a higher level alternative to the time-consuming traditional compiler+simulator approach, while still enabling detailed microarchitectural models for both general cores and accelerators. We then propose a multi-BSA organization, called ExoCore, which we model and study using the TDG. A design space exploration reveals that an ExoCore organization can push designs beyond the established energy-performance frontiers for general purpose cores. For example, a 2-wide OOO processor with three BSAs matches the performance of a conventional 6-wide OOO core, has 40\% lower area, and is 2.6x more energy efficient.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Yoon:2016:PPI, author = "Man-Ki Yoon and Negin Salajegheh and Yin Chen and Mihai Christodorescu", title = "{PIFT}: Predictive Information-Flow Tracking", journal = j-SIGPLAN, volume = "51", number = "4", pages = "713--725", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872403", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Phones today carry sensitive information and have a great number of ways to communicate that data. As a result, malware that steal money, information, or simply disable functionality have hit the app stores. Current security solutions for preventing undesirable data leaks are mostly high-overhead and have not been practical enough for smartphones. In this paper, we show that simply monitoring just some instructions (only memory loads and stores) it is possible to achieve low overhead, highly accurate information flow tracking. Our method achieves 98\% accuracy (0\% false positive and 2\% false negative) over DroidBench and was able to successfully catch seven real-world malware instances that steal phone number, location, and device ID using SMS messages and HTTP connections.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Venkat:2016:HHI, author = "Ashish Venkat and Sriskanda Shamasunder and Hovav Shacham and Dean M. Tullsen", title = "{HIPStR}: Heterogeneous-{ISA} Program State Relocation", journal = j-SIGPLAN, volume = "51", number = "4", pages = "727--741", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872408", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous Chip Multiprocessors have been shown to provide significant performance and energy efficiency gains over homogeneous designs. Recent research has expanded the dimensions of heterogeneity to include diverse Instruction Set Architectures, called Heterogeneous-ISA Chip Multiprocessors. This work leverages such an architecture to realize substantial new security benefits, and in particular, to thwart Return-Oriented Programming. This paper proposes a novel security defense called HIPStR --- Heterogeneous-ISA Program State Relocation --- that performs dynamic randomization of run-time program state, both within and across ISAs. This technique outperforms the state-of-the-art just-in-time code reuse (JIT-ROP) defense by an average of 15.6\%, while simultaneously providing greater security guarantees against classic return-into-libc, ROP, JOP, brute force, JIT-ROP, and several evasive variants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Aweke:2016:ASB, author = "Zelalem Birhanu Aweke and Salessawi Ferede Yitbarek and Rui Qiao and Reetuparna Das and Matthew Hicks and Yossi Oren and Todd Austin", title = "{ANVIL}: Software-Based Protection Against Next-Generation Rowhammer Attacks", journal = j-SIGPLAN, volume = "51", number = "4", pages = "743--755", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872390", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Ensuring the integrity and security of the memory system is critical. Recent studies have shown serious security concerns due to ``rowhammer'' attacks, where repeated accesses to a row of memory cause bit flips in adjacent rows. Recent work by Google's Project Zero has shown how to leverage rowhammer-induced bit-flips as the basis for security exploits that include malicious code injection and memory privilege escalation. Being an important security concern, industry has attempted to defend against rowhammer attacks. Deployed defenses employ two strategies: (1) doubling the system DRAM refresh rate and (2) restricting access to the CLFLUSH instruction that attackers use to bypass the cache to increase memory access frequency (i.e., the rate of rowhammering). We demonstrate that such defenses are inadequte: we implement rowhammer attacks that both avoid using the CLFLUSH instruction and cause bit flips with a doubled refresh rate. Our next-generation CLFLUSH-free rowhammer attack bypasses the cache by manipulating cache replacement state to allow frequent misses out of the last-level cache to DRAM rows of our choosing. To protect existing systems from more advanced rowhammer attacks, we develop a software-based defense, ANVIL, which thwarts all known rowhammer attacks on existing systems. ANVIL detects rowhammer attacks by tracking the locality of DRAM accesses using existing hardware performance counters. Our detector identifies the rows being frequently accessed (i.e., the aggressors), then selectively refreshes the nearby victim rows to prevent hammering. Experiments running on real hardware with the SPEC2006 benchmarks show that ANVIL has less than a 1\% false positive rate and an average slowdown of 1\%. ANVIL is low-cost and robust, and our experiments indicate that it is an effective approach for protecting existing and future systems from even advanced rowhammer attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Didona:2016:PAM, author = "Diego Didona and Nuno Diegues and Anne-Marie Kermarrec and Rachid Guerraoui and Ricardo Neves and Paolo Romano", title = "{ProteusTM}: Abstraction Meets Performance in Transactional Memory", journal = j-SIGPLAN, volume = "51", number = "4", pages = "757--771", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872385", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Transactional Memory (TM) paradigm promises to greatly simplify the development of concurrent applications. This led, over the years, to the creation of a plethora of TM implementations delivering wide ranges of performance across workloads. Yet, no universal implementation fits each and every workload. In fact, the best TM in a given workload can reveal to be disastrous for another one. This forces developers to face the complex task of tuning TM implementations, which significantly hampers their wide adoption. In this paper, we address the challenge of automatically identifying the best TM implementation for a given workload. Our proposed system, ProteusTM, hides behind the TM interface a large library of implementations. Underneath, it leverages a novel multi-dimensional online optimization scheme, combining two popular learning techniques: Collaborative Filtering and Bayesian Optimization. We integrated ProteusTM in GCC and demonstrate its ability to switch between TMs and adapt several configuration parameters (e.g., number of threads). We extensively evaluated ProteusTM, obtaining average performance {$<$3}\% from optimal, and gains up to 100x over static alternatives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Shalev:2016:CCS, author = "Noam Shalev and Eran Harpaz and Hagar Porat and Idit Keidar and Yaron Weinsberg", title = "{CSR}: Core Surprise Removal in Commodity Operating Systems", journal = j-SIGPLAN, volume = "51", number = "4", pages = "773--787", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "One of the adverse effects of shrinking transistor sizes is that processors have become increasingly prone to hardware faults. At the same time, the number of cores per die rises. Consequently, core failures can no longer be ruled out, and future operating systems for many-core machines will have to incorporate fault tolerance mechanisms. We present CSR, a strategy for recovery from unexpected permanent processor faults in commodity operating systems. Our approach overcomes surprise removal of faulty cores, and also tolerates cascading core failures. When a core fails in user mode, CSR terminates the process executing on that core and migrates the remaining processes in its run-queue to other cores. We further show how hardware transactional memory may be used to overcome failures in critical kernel code. Our solution is scalable, incurs low overhead, and is designed to integrate into modern operating systems. We have implemented it in the Linux kernel, using Haswell's Transactional Synchronization Extension, and tested it on a real system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Gangwani:2016:CBS, author = "Tanmay Gangwani and Adam Morrison and Josep Torrellas", title = "{CASPAR}: Breaking Serialization in Lock-Free Multicore Synchronization", journal = j-SIGPLAN, volume = "51", number = "4", pages = "789--804", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872400", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In multicores, performance-critical synchronization is increasingly performed in a lock-free manner using atomic instructions such as CAS or LL/SC. However, when many processors synchronize on the same variable, performance can still degrade significantly. Contending writes get serialized, creating a non-scalable condition. Past proposals that build hardware queues of synchronizing processors do not fundamentally solve this problem --- at best, they help to efficiently serialize the contending writes. This paper proposes a novel architecture that breaks the serialization of hardware queues and enables the queued processors to perform lock-free synchronization in parallel. The architecture, called CASPAR, is able to (1) execute the CASes in the queued-up processors in parallel through eager forwarding of expected values, and (2) validate the CASes in parallel and dequeue groups of processors at a time. The result is highly-scalable synchronization. We evaluate CASPAR with simulations of a 64-core chip. Compared to existing proposals with hardware queues, CASPAR improves the throughput of kernels by 32\% on average, and reduces the execution time of the sections considered in lock-free versions of applications by 47\% on average. This makes these sections 2.5x faster than in the original applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '16 conference proceedings.", } @Article{Spink:2016:EAI, author = "Tom Spink and Harry Wagstaff and Bj{\"o}rn Franke", title = "Efficient asynchronous interrupt handling in a full-system instruction set simulator", journal = j-SIGPLAN, volume = "51", number = "5", pages = "1--10", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907953", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Instruction set simulators (ISS) have many uses in embedded software and hardware development and are typically based on dynamic binary translation (DBT), where frequently executed regions of guest instructions are compiled into host instructions using a just-in-time (JIT) compiler. Full-system simulation, which necessitates handling of asynchronous interrupts from e.g. timers and I/O devices, complicates matters as control flow is interrupted unpredictably and diverted from the current region of code. In this paper we present a novel scheme for handling of asynchronous interrupts, which integrates seamlessly into a region-based dynamic binary translator. We first show that our scheme is correct, i.e. interrupt handling is not deferred indefinitely, even in the presence of code regions comprising control flow loops. We demonstrate that our new interrupt handling scheme is efficient as we minimise the number of inserted checks. Interrupt handlers are also presented to the JIT compiler and compiled to native code, further enhancing the performance of our system. We have evaluated our scheme in an ARM simulator using a region-based JIT compilation strategy. We demonstrate that our solution reduces the number of dynamic interrupt checks by 73\%, reduces interrupt service latency by 26\% and improves throughput of an I/O bound workload by 7\%, over traditional per-block schemes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Robinson:2016:CCM, author = "Forrest J. Robinson and Michael R. Jantz and Prasad A. Kulkarni", title = "Code cache management in managed language {VMs} to reduce memory consumption for embedded systems", journal = j-SIGPLAN, volume = "51", number = "5", pages = "11--20", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907958", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "The compiled native code generated by a just-in-time (JIT) compiler in managed language virtual machines (VM) is placed in a region of memory called the code cache. Code cache management (CCM) in a VM is responsible to find and evict methods from the code cache to maintain execution correctness and manage program performance for a given code cache size or memory budget. Effective CCM can also boost program speed by enabling more aggressive JIT compilation, powerful optimizations, and improved hardware instruction cache and I-TLB performance. Though important, CCM is an overlooked component in VMs. We find that the default CCM policies in Oracle's production-grade HotSpot VM perform poorly even at modest memory pressure. We develop a detailed simulation-based framework to model and evaluate the potential efficiency of many different CCM policies in a controlled and realistic, but VM-independent environment. We make the encouraging discovery that effective CCM policies can sustain high program performance even for very small cache sizes. Our simulation study provides the rationale and motivation to improve CCM strategies in existing VMs. We implement and study the properties of several CCM policies in HotSpot. We find that in spite of working within the bounds of the HotSpot VM's current CCM sub-system, our best CCM policy implementation in HotSpot improves program performance over the default CCM algorithm by 39\%, 41\%, 55\%, and 50\% with code cache sizes that are 90\%, 75\%, 50\%, and 25\% of the desired cache size, on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Nobre:2016:GBI, author = "Ricardo Nobre and Luiz G. A. Martins and Jo{\~a}o M. P. Cardoso", title = "A graph-based iterative compiler pass selection and phase ordering approach", journal = j-SIGPLAN, volume = "51", number = "5", pages = "21--30", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907959", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nowadays compilers include tens or hundreds of optimization passes, which makes it difficult to find sequences of optimizations that achieve compiled code more optimized than the one obtained using typical compiler options such as -O2 and -O3. The problem involves both the selection of the compiler passes to use and their ordering in the compilation pipeline. The improvement achieved by the use of custom phase orders for each function can be significant, and thus important to satisfy strict requirements such as the ones present in high-performance embedded computing systems. In this paper we present a new and fast iterative approach to the phase selection and ordering challenges resulting in compiled code with higher performance than the one achieved with the standard optimization levels of the LLVM compiler. The obtained performance improvements are comparable with the ones achieved by other iterative approaches while requiring considerably less time and resources. Our approach is based on sampling over a graph representing transitions between compiler passes. We performed a number of experiments targeting the LEON3 microarchitecture using the Clang/LLVM 3.7 compiler, considering 140 LLVM passes and a set of 42 representative signal and image processing C functions. An exhaustive cross-validation shows our new exploration method is able to achieve a geometric mean performance speedup of 1.28x over the best individually selected -OX flag when considering 100,000 iterations; versus geometric mean speedups from 1.16x to 1.25x obtained with state-of-the-art iterative methods not using the graph. From the set of exploration methods tested, our new method is the only one consistently finding compiler sequences that result in performance improvements when considering 100 or less exploration iterations. Specifically, it achieved geometric mean speedups of 1.08x and 1.16x for 10 and 100 iterations, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Banerjee:2016:TVL, author = "Kunal Banerjee and Chittaranjan Mandal and Dipankar Sarkar", title = "Translation validation of loop and arithmetic transformations in the presence of recurrences", journal = j-SIGPLAN, volume = "51", number = "5", pages = "31--40", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907954", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiler optimization of array-intensive programs involves extensive application of loop transformations and arithmetic transformations. Hence, translation validation of array-intensive programs requires manipulation of intervals of integers (representing domains of array indices) and relations over such intervals to account for loop transformations and simplification of arithmetic expressions to handle arithmetic transformations. A major obstacle for verification of such programs is posed by the presence of recurrences, whereby an element of an array gets defined in a statement S inside a loop in terms of some other element(s) of the same array which have been previously defined through the same statement S. Recurrences lead to cycles in the data-dependence graph of a program which make dependence analyses and simplifications (through closed-form representations) of the data transformations difficult. Another technique which works better for recurrences does not handle arithmetic transformations. In this work, array data-dependence graphs (ADDGs) are used to represent both the original and the optimized versions of the program and a validation scheme is proposed where the cycles due to recurrences in the ADDGs are suitably abstracted as acyclic subgraphs. Thus, this work provides a unified equivalence checking framework to handle loop and arithmetic transformations along with most of the recurrences --- this combination of features had not been achieved by a single verification technique earlier.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Sui:2016:LOA, author = "Yulei Sui and Xiaokang Fan and Hao Zhou and Jingling Xue", title = "Loop-oriented array- and field-sensitive pointer analysis for automatic {SIMD} vectorization", journal = j-SIGPLAN, volume = "51", number = "5", pages = "41--51", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907957", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiler-based auto-vectorization is a promising solution to automatically generate code that makes efficient use of SIMD processors in high performance platforms and embedded systems. Two main auto-vectorization techniques, superword-level parallelism vectorization (SLP) and loop-level vectorization (LLV), require precise dependence analysis on arrays and structs in order to vectorize isomorphic scalar instructions and/or reduce dynamic dependence checks incurred at runtime. The alias analyses used in modern vectorizing compilers are either intra-procedural (without tracking inter-procedural data-flows) or inter-procedural (by using field-insensitive models, which are too imprecise in handling arrays and structs). This paper proposes an inter-procedural Loop-oriented Pointer Analysis, called LPA, for analyzing arrays and structs to support aggressive SLP and LLV optimizations. Unlike field-insensitive solutions that preallocate objects for each memory allocation site, our approach uses a fine-grained memory model to generate location sets based on how structs and arrays are accessed. LPA can precisely analyze arrays and nested aggregate structures to enable SIMD optimizations for large programs. By separating the location set generation as an independent concern from the rest of the pointer analysis, LPA is designed to reuse easily existing points-to resolution algorithms. We evaluate LPA using SLP and LLV, the two classic vectorization techniques on a set of 20 CPU2000/2006 benchmarks. For SLP, LPA enables it to vectorize a total of 133 more basic blocks, with an average of 12.09 per benchmark, resulting in the best speedup of 2.95\% for 173.applu. For LLV, LPA has reduced a total of 319 static bound checks, with an average of 22.79 per benchmark, resulting in the best speedup of 7.18\% for 177.mesa.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Domagala:2016:GCT, author = "Lukasz Domagala and Duco van Amstel and Fabrice Rastello", title = "Generalized cache tiling for dataflow programs", journal = j-SIGPLAN, volume = "51", number = "5", pages = "52--61", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907960", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The dataflow programming paradigm has facilitated the expression of a great number of algorithmic applications on embedded platforms in a wide variety of applicative domains. Whether it is a Domain Specific Language (DSL) or a more generalistic one, the dataflow paradigm allows to intuitively state the successive steps of an algorithm and link them through data communications. The optimization of cache-memory in this context has been a subject of interest since the early '90s as the reuse and communication of data between the agents of a dataflow program is a key factor in achieving a high-performance implementation within the reduced limits of embedded architectures. In order to improve data reuse among the dataflow agents we propose a modelisation of the communications and data usage within a dataflow program. Aside from providing an estimate of the amount of cache-misses that a given scheduling generates, this model allows us to specify the associated optimization problem in a manner that is identical to loop-nest tiling. Improving on the existing state-of-the-art methods we extend our tiling technique to include non-uniform dependencies on one of the dimensions of the iteration space. When applying the proposed technique to dataflow programs expressed within the StreamIt framework we are able to showcase significant reductions in the number of cache-misses for a majority of test-cases when compared to existing optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Chu:2016:SEM, author = "Duc-Hiep Chu and Joxan Jaffar and Rasool Maghareh", title = "Symbolic execution for memory consumption analysis", journal = j-SIGPLAN, volume = "51", number = "5", pages = "62--71", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907955", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the advances in both hardware and software of embedded systems in the past few years, dynamic memory allocation can now be safely used in embedded software. As a result, the need to develop methods to avoid heap overflow errors in safety-critical embedded systems has increased. Resource analysis of imperative programs with non-regular loop patterns and signed integers, to support both memory allocation and deallocation, has long been an open problem. Existing methods can generate symbolic bounds that are parametric w.r.t. the program inputs; such bounds, however, are imprecise in the presence of non-regular loop patterns. In this paper, we present a worst-case memory consumption analysis, based upon the framework of symbolic execution. Our assumption is that loops (and recursions) of to-be-analyzed programs are indeed bounded. We then can exhaustively unroll loops and the memory consumption of each iteration can be precisely computed and summarized for aggregation. Because of path-sensitivity, our algorithm generates more precise bounds. Importantly, we demonstrate that by introducing a new concept of reuse, symbolic execution scales to a set of realistic benchmark programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Metta:2016:TSM, author = "Ravindra Metta and Martin Becker and Prasad Bokil and Samarjit Chakraborty and R. Venkatesh", title = "{TIC}: a scalable model checking based approach to {WCET} estimation", journal = j-SIGPLAN, volume = "51", number = "5", pages = "72--81", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907961", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The application of Model Checking to compute WCET has not been explored as much as Integer Linear Programming (ILP), primarily because model checkers fail to scale for complex programs. These programs have loops with large or unknown bounds, leading to a state space explosion that model checkers cannot handle. To overcome this, we have developed a technique, TIC, that employs slicing, loop acceleration and over-approximation on time-annotated source code, enabling Model Checking to scale better for WCET computation. Further, our approach is parametric, so that the user can make a trade-off between the tightness of WCET estimate and the analysis time. We conducted experiments on the M{\"a}lardalen benchmarks to evaluate the effect of various abstractions on the WCET estimate and analysis time. Additionally, we compared our estimates to those made by an ILP-based analyzer and found that our estimates were tighter for more than 30\% of the examples and were equal for the rest.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Chen:2016:CIM, author = "Kuan-Hsun Chen and Bj{\"o}rn B{\"o}nninghoff and Jian-Jia Chen and Peter Marwedel", title = "Compensate or ignore? {Meeting} control robustness requirements through adaptive soft-error handling", journal = j-SIGPLAN, volume = "51", number = "5", pages = "82--91", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907952", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To avoid catastrophic events like unrecoverable system failures on mobile and embedded systems caused by soft-errors, software-based error detection and compensation techniques have been proposed. Methods like error-correction codes or redundant execution can offer high flexibility and allow for application-specific fault-tolerance selection without the needs of special hardware supports. However, such software-based approaches may lead to system overload due to the execution time overhead. An adaptive deployment of such techniques to meet both application requirements and system constraints is desired. From our case study, we observe that a control task can tolerate limited errors with acceptable performance loss. Such tolerance can be modeled as a (m,k) constraint which requires at least m correct runs out of any k consecutive runs to be correct. In this paper, we discuss how a given (m,k) constraint can be satisfied by adopting patterns of task instances with individual error detection and compensation capabilities. We introduce static strategies and provide a formal feasibility analysis for validation. Furthermore, we develop an adaptive scheme that extends our initial approach with online awareness that increases efficiency while preserving analysis results. The effectiveness of our method is shown in a real-world case study as well as for synthesized task sets.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Chakraborty:2016:OCP, author = "Prasenjit Chakraborty and Gautam Doshi and Shashank Shekhar and Vikrant Kumar", title = "Opportunity for compute partitioning in pursuit of energy-efficient systems", journal = j-SIGPLAN, volume = "51", number = "5", pages = "92--101", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907956", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance of computing systems, from handhelds to supercomputers, is increasingly constrained by the energy consumed. A significant and increasing fraction of the energy is consumed in the movement of data. In a compute node, caches have been very effective in reducing data movement by exploiting the available data locality in programs. Program regions with poor data locality, then effect most of the data movement, and consequently consume an ever larger fraction of energy. In this paper we explore the energy efficiency opportunity of minimizing the data movement in precisely such program regions, by first imagining the possibility of compute near memory, and then partitioning the program's execution between a compute core and the compute near memory (CnM). Due to the emergence of 3D stacked memory, a CnM implementation appears more realistic. Our focus is on evaluating the partitioning opportunity in applications and to do a limit study of systems enabled with CnM capabilities to understand and guide their architectural embodiment. We describe an automated method of analyzing the data access pattern of optimized workload binaries, via a binary-instrumentation tool called SnapCnM, to identify the beneficial program regions (loops) for CnM execution.We also perform a limit study to evaluate the impact of such partitioning over a range of parameters affecting CnM design choices. Our results show that compute partitioning a small ({$<$10}\%) fraction of a workload can improve its energy efficiency from 3\% (for compute-bound applications) to 27\% (for memory-bound applications). From the study in this work we discuss the important aspects that help to shape the future CnM design space.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Phothilimthana:2016:CGR, author = "Phitchaya Mangpo Phothilimthana and Michael Schuldt and Rastislav Bodik", title = "Compiling a gesture recognition application for a low-power spatial architecture", journal = j-SIGPLAN, volume = "51", number = "5", pages = "102--112", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907962", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy efficiency is one of the main performance goals when designing processors for embedded systems. Typically, the simpler the processor, the less energy it consumes. Thus, an ultra-low power multicore processor will, likely have very small distributed memory with a simple interconnect. To compile for such an architecture, a partitioning strategy that can tune between space and communication minimization is crucial to fit a program in its limited resources and achieve good performance. A careful program layout design is also critical. Aside fulfilling the space constraint, a compiler needs to be able to optimize for program latency to satisfy a certain timing requirement as well. To satisfy all aforementioned constraints, we present a flexible code partitioning strategy and light-weight mechanisms to express parallelism and program layout. First, we compare two strategies for partitioning program structures and introduce a language construct to let programmers choose which strategies to use and when. The compiler then partitions program structures with a mix of both strategies. Second, we add supports for programmer-specified parallelism and program layout through imposing additional spatial constraints to the compiler. We evaluate our compiler by implementing an accelerometer-based gesture recognition application on GA144, a recent low-power minimalistic multicore architecture. When compared to MSP430, GA144 is overall 19x more energy-efficient and 23x faster when running this application. Without these inventions, this application would not be able to fit on GA144.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Micolet:2016:MLA, author = "Paul-Jules Micolet and Aaron Smith and Christophe Dubach", title = "A machine learning approach to mapping streaming workloads to dynamic multicore processors", journal = j-SIGPLAN, volume = "51", number = "5", pages = "113--122", month = may, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980930.2907951", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:24 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dataflow programming languages facilitate the design of data intensive programs such as streaming applications commonly found in embedded systems. They also expose parallelism that can be exploited using multicore processors which are now part of the mobile landscape. In recent years a shift has occurred towards heterogeneity ( ARM big.LITTLE) and reconfigurability. Dynamic Multicore Processors (DMPs) bridge the gap between fully reconfigurable processors and homogeneous multicore systems. They can re-allocate their resources at runtime to create larger more powerful logical processors fine-tuned to the workload. Unfortunately, there exists no accurate method to determine how to partition the cores in a DMP among application threads. Often programmers rely on analyzing the application manually and using a set of hand picked heuristics. This leads to sub-optimal performance, reducing the potential of DMPs. What is needed is a way to determine the optimal partitioning and grouping of resources to maximize performance. As a first step, this paper studies the effect of thread partitioning and hardware resource allocation on a set of StreamIt applications. We show that the resulting space is not trivial and exhibits a large performance variation depending on the combination of parameters. We introduce a machine-learning based methodology to tackle the space complexity. Our machine-learning model is able to directly predict the best combination of parameters using static code features. The predicted set of parameters leads to performance on-par with the best performance found in a space of more than 32,000 configurations per application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '16 conference proceedings.", } @Article{Memarian:2016:DCE, author = "Kayvan Memarian and Justus Matthiesen and James Lingard and Kyndylan Nienhuis and David Chisnall and Robert N. M. Watson and Peter Sewell", title = "Into the depths of {C}: elaborating the de facto standards", journal = j-SIGPLAN, volume = "51", number = "6", pages = "1--15", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908081", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "C remains central to our computing infrastructure. It is notionally defined by ISO standards, but in reality the properties of C assumed by systems code and those implemented by compilers have diverged, both from the ISO standards and from each other, and none of these are clearly understood. We make two contributions to help improve this error-prone situation. First, we describe an in-depth analysis of the design space for the semantics of pointers and memory in C as it is used in practice. We articulate many specific questions, build a suite of semantic test cases, gather experimental data from multiple implementations, and survey what C experts believe about the de facto standards. We identify questions where there is a consensus (either following ISO or differing) and where there are conflicts. We apply all this to an experimental C implemented above capability hardware. Second, we describe a formal model, Cerberus, for large parts of C. Cerberus is parameterised on its memory model; it is linkable either with a candidate de facto memory object model, under construction, or with an operational C11 concurrency model; it is defined by elaboration to a much simpler Core language for accessibility, and it is executable as a test oracle on small examples. This should provide a solid basis for discussion of what mainstream C is now: what programmers and analysis tools can assume and what compilers aim to implement. Ultimately we hope it will be a step towards clear, consistent, and accepted semantics for the various use-cases of C.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Chamith:2016:LER, author = "Buddhika Chamith and Bo Joel Svensson and Luke Dalessandro and Ryan R. Newton", title = "Living on the edge: rapid-toggling probes with cross-modification on x86", journal = j-SIGPLAN, volume = "51", number = "6", pages = "16--26", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908084", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic probe injection is now a widely used method to debug performance in production. Current techniques for dynamic probing of native code, however, rely on an expensive stop-the-world approach: binary changes are made within a safe state of the program --- typically in which all the program threads are halted --- to ensure that another thread executing the modified code region doesn't step into a partially-modified code. Stop-the-world patching is not scalable. In contrast, low overhead, scalable probes that can be rapidly toggled on and off in-place would open up new use cases for statistical profilers and language implementations, even traditional ahead-of-time, native-code compilers. In this paper we introduce safe cross-modification protocols that mutate x86 code between threads but do not require quiescing threads, resulting in radically lower overheads than existing solutions. A key problem is handling instructions that straddle cache lines. We empirically evaluate existing x86 architectures to derive a safe policy given current processor behavior, and we argue that future architectures should clarify the semantics of instruction fetching to make cheap cross-modification easier and future proof.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Noonan:2016:PTI, author = "Matt Noonan and Alexey Loginov and David Cok", title = "Polymorphic type inference for machine code", journal = j-SIGPLAN, volume = "51", number = "6", pages = "27--41", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908119", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For many compiled languages, source-level types are erased very early in the compilation process. As a result, further compiler passes may convert type-safe source into type-unsafe machine code. Type-unsafe idioms in the original source and type-unsafe optimizations mean that type information in a stripped binary is essentially nonexistent. The problem of recovering high-level types by performing type inference over stripped machine code is called type reconstruction, and offers a useful capability in support of reverse engineering and decompilation. In this paper, we motivate and develop a novel type system and algorithm for machine-code type inference. The features of this type system were developed by surveying a wide collection of common source- and machine-code idioms, building a catalog of challenging cases for type reconstruction. We found that these idioms place a sophisticated set of requirements on the type system, inducing features such as recursively-constrained polymorphic types. Many of the features we identify are often seen only in expressive and powerful type systems used by high-level functional languages. Using these type-system features as a guideline, we have developed Retypd: a novel static type-inference algorithm for machine code that supports recursive types, polymorphism, and subtyping. Retypd yields more accurate inferred types than existing algorithms, while also enabling new capabilities such as reconstruction of pointer const annotations with 98\% recall. Retypd can operate on weaker program representations than the current state of the art, removing the need for high-quality points-to information that may be impractical to compute.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Padhi:2016:DDP, author = "Saswat Padhi and Rahul Sharma and Todd Millstein", title = "Data-driven precondition inference with learned features", journal = j-SIGPLAN, volume = "51", number = "6", pages = "42--56", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908099", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We extend the data-driven approach to inferring preconditions for code from a set of test executions. Prior work requires a fixed set of features, atomic predicates that define the search space of possible preconditions, to be specified in advance. In contrast, we introduce a technique for on-demand feature learning, which automatically expands the search space of candidate preconditions in a targeted manner as necessary. We have instantiated our approach in a tool called PIE. In addition to making precondition inference more expressive, we show how to apply our feature-learning technique to the setting of data-driven loop invariant inference. We evaluate our approach by using PIE to infer rich preconditions for black-box OCaml library functions and using our loop-invariant inference algorithm as part of an automatic program verifier for C++ programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Sousa:2016:CHL, author = "Marcelo Sousa and Isil Dillig", title = "{Cartesian} {Hoare} logic for verifying $k$-safety properties", journal = j-SIGPLAN, volume = "51", number = "6", pages = "57--69", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908092", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Unlike safety properties which require the absence of a ``bad'' program trace, k-safety properties stipulate the absence of a ``bad'' interaction between $k$ traces. Examples of $k$-safety properties include transitivity, associativity, anti-symmetry, and monotonicity. This paper presents a sound and relatively complete calculus, called Cartesian Hoare Logic (CHL), for verifying $k$-safety properties. We also present an automated verification algorithm based on CHL and implement it in a tool called DESCARTES. We use DESCARTES to analyze user-defined relational operators in Java and demonstrate that DESCARTES is effective at verifying (or finding violations of) multiple $k$-safety properties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Lee:2016:VBM, author = "Wonyeol Lee and Rahul Sharma and Alex Aiken", title = "Verifying bit-manipulations of floating-point", journal = j-SIGPLAN, volume = "51", number = "6", pages = "70--84", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908107", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reasoning about floating-point is difficult and becomes only more so if there is an interplay between floating-point and bit-level operations. Even though real-world floating-point libraries use implementations that have such mixed computations, no systematic technique to verify the correctness of the implementations of such computations is known. In this paper, we present the first general technique for verifying the correctness of mixed binaries, which combines abstraction, analytical optimization, and testing. The technique provides a method to compute an error bound of a given implementation with respect to its mathematical specification. We apply our technique to Intel's implementations of transcendental functions and prove formal error bounds for these widely used routines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Chen:2016:CDD, author = "Yuting Chen and Ting Su and Chengnian Sun and Zhendong Su and Jianjun Zhao", title = "Coverage-directed differential testing of {JVM} implementations", journal = j-SIGPLAN, volume = "51", number = "6", pages = "85--99", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908095", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Java virtual machine (JVM) is a core technology, whose reliability is critical. Testing JVM implementations requires painstaking effort in designing test classfiles (*.class) along with their test oracles. An alternative is to employ binary fuzzing to differentially test JVMs by blindly mutating seeding classfiles and then executing the resulting mutants on different JVM binaries for revealing inconsistent behaviors. However, this blind approach is not cost effective in practice because most of the mutants are invalid and redundant. This paper tackles this challenge by introducing classfuzz, a coverage-directed fuzzing approach that focuses on representative classfiles for differential testing of JVMs' startup processes. Our core insight is to (1) mutate seeding classfiles using a set of predefined mutation operators (mutators) and employ Markov Chain Monte Carlo (MCMC) sampling to guide mutator selection, and (2) execute the mutants on a reference JVM implementation and use coverage uniqueness as a discipline for accepting representative ones. The accepted classfiles are used as inputs to differentially test different JVM implementations and find defects. We have implemented classfuzz and conducted an extensive evaluation of it against existing fuzz testing algorithms. Our evaluation results show that classfuzz can enhance the ratio of discrepancy-triggering classfiles from 1.7\% to 11.9\%. We have also reported 62 JVM discrepancies, along with the test classfiles, to JVM developers. Many of our reported issues have already been confirmed as JVM defects, and some even match recent clarifications and changes to the Java SE 8 edition of the JVM specification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Sorensen:2016:EER, author = "Tyler Sorensen and Alastair F. Donaldson", title = "Exposing errors related to weak memory in {GPU} applications", journal = j-SIGPLAN, volume = "51", number = "6", pages = "100--113", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908114", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the systematic design of a testing environment that uses stressing and fuzzing to reveal errors in GPU applications that arise due to weak memory effects. We evaluate our approach on seven GPUs spanning three Nvidia architectures, across ten CUDA applications that use fine-grained concurrency. Our results show that applications that rarely or never exhibit errors related to weak memory when executed natively can readily exhibit these errors when executed in our testing environment. Our testing environment also provides a means to help identify the root causes of such errors, and automatically suggests how to insert fences that harden an application against weak memory bugs. To understand the cost of GPU fences, we benchmark applications with fences provided by the hardening strategy as well as a more conservative, sound fencing strategy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Faddegon:2016:LCT, author = "Maarten Faddegon and Olaf Chitil", title = "Lightweight computation tree tracing for lazy functional languages", journal = j-SIGPLAN, volume = "51", number = "6", pages = "114--128", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908104", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A computation tree of a program execution describes computations of functions and their dependencies. A computation tree describes how a program works and is at the heart of algorithmic debugging. To generate a computation tree, existing algorithmic debuggers either use a complex implementation or yield a less informative approximation. We present a method for lazy functional languages that requires only a simple tracing library to generate a detailed computation tree. With our algorithmic debugger a programmer can debug any Haskell program by only importing our library and annotating suspected functions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Hong:2016:EPM, author = "Changwan Hong and Wenlei Bao and Albert Cohen and Sriram Krishnamoorthy and Louis-No{\"e}l Pouchet and Fabrice Rastello and J. Ramanujam and P. Sadayappan", title = "Effective padding of multidimensional arrays to avoid cache conflict misses", journal = j-SIGPLAN, volume = "51", number = "6", pages = "129--144", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908123", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Caches are used to significantly improve performance. Even with high degrees of set associativity, the number of accessed data elements mapping to the same set in a cache can easily exceed the degree of associativity. This can cause conflict misses and lower performance, even if the working set is much smaller than cache capacity. Array padding (increasing the size of array dimensions) is a well-known optimization technique that can reduce conflict misses. In this paper, we develop the first algorithms for optimal padding of arrays aimed at a set-associative cache for arbitrary tile sizes. In addition, we develop the first solution to padding for nested tiles and multi-level caches. Experimental results with multiple benchmarks demonstrate a significant performance improvement from padding.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Zhu:2016:GLE, author = "Yuhao Zhu and Vijay Janapa Reddi", title = "{GreenWeb}: language extensions for energy-efficient mobile web computing", journal = j-SIGPLAN, volume = "51", number = "6", pages = "145--160", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908082", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Web computing is gradually shifting toward mobile devices, in which the energy budget is severely constrained. As a result, Web developers must be conscious of energy efficiency. However, current Web languages provide developers little control over energy consumption. In this paper, we take a first step toward language-level research to enable energy-efficient Web computing. Our key motivation is that mobile systems can wisely budget energy usage if informed with user quality-of-service (QoS) constraints. To do this, programmers need new abstractions. We propose two language abstractions, QoS type and QoS target, to capture two fundamental aspects of user QoS experience. We then present GreenWeb, a set of language extensions that empower developers to easily express the QoS abstractions as program annotations. As a proof of concept, we develop a GreenWeb runtime, which intelligently determines how to deliver specified user QoS expectation while minimizing energy consumption. Overall, GreenWeb shows significant energy savings (29.2\% ~ 66.0\%) over Android's default Interactive governor with few QoS violations. Our work demonstrates a promising first step toward language innovations for energy-efficient Web computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Laurenzano:2016:IRU, author = "Michael A. Laurenzano and Parker Hill and Mehrzad Samadi and Scott Mahlke and Jason Mars and Lingjia Tang", title = "Input responsiveness: using canary inputs to dynamically steer approximation", journal = j-SIGPLAN, volume = "51", number = "6", pages = "161--176", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908087", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces Input Responsive Approximation (IRA), an approach that uses a canary input --- a small program input carefully constructed to capture the intrinsic properties of the original input --- to automatically control how program approximation is applied on an input-by-input basis. Motivating this approach is the observation that many of the prior techniques focusing on choosing how to approximate arrive at conservative decisions by discounting substantial differences between inputs when applying approximation. The main challenges in overcoming this limitation lie in making the choice of how to approximate both effectively (e.g., the fastest approximation that meets a particular accuracy target) and rapidly for every input. With IRA, each time the approximate program is run, a canary input is constructed and used dynamically to quickly test a spectrum of approximation alternatives. Based on these runtime tests, the approximation that best fits the desired accuracy constraints is selected and applied to the full input to produce an approximate result. We use IRA to select and parameterize mixes of four approximation techniques from the literature for a range of 13 image processing, machine learning, and data mining applications. Our results demonstrate that IRA significantly outperforms prior approaches, delivering an average of 10.2$ \times $ speedup over exact execution while minimizing accuracy losses in program outputs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Achour:2016:CSP, author = "Sara Achour and Rahul Sarpeshkar and Martin C. Rinard", title = "Configuration synthesis for programmable analog devices with {Arco}", journal = j-SIGPLAN, volume = "51", number = "6", pages = "177--193", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908116", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmable analog devices have emerged as a powerful computing substrate for performing complex neuromorphic and cytomorphic computations. We present Arco, a new solver that, given a dynamical system specification in the form of a set of differential equations, generates physically realizable configurations for programmable analog devices that are algebraically equivalent to the specified system. On a set of benchmarks from the biological domain, Arco generates configurations with 35 to 534 connections and 28 to 326 components in 1 to 54 minutes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Madsen:2016:DFD, author = "Magnus Madsen and Ming-Ho Yee and Ondrej Lhot{\'a}k", title = "From {Datalog} to {Flix}: a declarative language for fixed points on lattices", journal = j-SIGPLAN, volume = "51", number = "6", pages = "194--208", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908096", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Flix, a declarative programming language for specifying and solving least fixed point problems, particularly static program analyses. Flix is inspired by Datalog and extends it with lattices and monotone functions. Using Flix, implementors of static analyses can express a broader range of analyses than is currently possible in pure Datalog, while retaining its familiar rule-based syntax. We define a model-theoretic semantics of Flix as a natural extension of the Datalog semantics. This semantics captures the declarative meaning of Flix programs without imposing any specific evaluation strategy. An efficient strategy is semi-naive evaluation which we adapt for Flix. We have implemented a compiler and runtime for Flix, and used it to express several well-known static analyses, including the IFDS and IDE algorithms. The declarative nature of Flix clearly exposes the similarity between these two algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Truong:2016:LLC, author = "Leonard Truong and Rajkishore Barik and Ehsan Totoni and Hai Liu and Chick Markley and Armando Fox and Tatiana Shpeisman", title = "{Latte}: a language, compiler, and runtime for elegant and efficient deep neural networks", journal = j-SIGPLAN, volume = "51", number = "6", pages = "209--223", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908105", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deep neural networks (DNNs) have undergone a surge in popularity with consistent advances in the state of the art for tasks including image recognition, natural language processing, and speech recognition. The computationally expensive nature of these networks has led to the proliferation of implementations that sacrifice abstraction for high performance. In this paper, we present Latte, a domain-specific language for DNNs that provides a natural abstraction for specifying new layers without sacrificing performance. Users of Latte express DNNs as ensembles of neurons with connections between them. The Latte compiler synthesizes a program based on the user specification, applies a suite of domain-specific and general optimizations, and emits efficient machine code for heterogeneous architectures. Latte also includes a communication runtime for distributed memory data-parallelism. Using networks described using Latte, we demonstrate 3-6x speedup over Caffe (C++/MKL) on the three state-of-the-art ImageNet models executing on an Intel Xeon E5-2699 v3 x86 CPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Adams:2016:CPP, author = "Michael D. Adams and Celeste Hollenbeck and Matthew Might", title = "On the complexity and performance of parsing with derivatives", journal = j-SIGPLAN, volume = "51", number = "6", pages = "224--236", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908128", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Current algorithms for context-free parsing inflict a trade-off between ease of understanding, ease of implementation, theoretical complexity, and practical performance. No algorithm achieves all of these properties simultaneously. Might et al. introduced parsing with derivatives, which handles arbitrary context-free grammars while being both easy to understand and simple to implement. Despite much initial enthusiasm and a multitude of independent implementations, its worst-case complexity has never been proven to be better than exponential. In fact, high-level arguments claiming it is fundamentally exponential have been advanced and even accepted as part of the folklore. Performance ended up being sluggish in practice, and this sluggishness was taken as informal evidence of exponentiality. In this paper, we reexamine the performance of parsing with derivatives. We have discovered that it is not exponential but, in fact, cubic. Moreover, simple (though perhaps not obvious) modifications to the implementation by Might et al. lead to an implementation that is not only easy to understand but also highly performant in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Heule:2016:SSA, author = "Stefan Heule and Eric Schkufza and Rahul Sharma and Alex Aiken", title = "Stratified synthesis: automatically learning the x86-64 instruction set", journal = j-SIGPLAN, volume = "51", number = "6", pages = "237--250", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908121", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The x86-64 ISA sits at the bottom of the software stack of most desktop and server software. Because of its importance, many software analysis and verification tools depend, either explicitly or implicitly, on correct modeling of the semantics of x86-64 instructions. However, formal semantics for the x86-64 ISA are difficult to obtain and often written manually through great effort. We describe an automatically synthesized formal semantics of the input/output behavior for a large fraction of the x86-64 Haswell ISA's many thousands of instruction variants. The key to our results is stratified synthesis, where we use a set of instructions whose semantics are known to synthesize the semantics of additional instructions whose semantics are unknown. As the set of formally described instructions increases, the synthesis vocabulary expands, making it possible to synthesize the semantics of increasingly complex instructions. Using this technique we automatically synthesized formal semantics for 1,795 instruction variants of the x86-64 Haswell ISA. We evaluate the learned semantics against manually written semantics (where available) and find that they are formally equivalent with the exception of 50 instructions, where the manually written semantics contain an error. We further find the learned formulas to be largely as precise as manually written ones and of similar size.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Eizenberg:2016:ROD, author = "Ariel Eizenberg and Shiliang Hu and Gilles Pokam and Joseph Devietti", title = "{Remix}: online detection and repair of cache contention for the {JVM}", journal = j-SIGPLAN, volume = "51", number = "6", pages = "251--265", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908090", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As ever more computation shifts onto multicore architectures, it is increasingly critical to find effective ways of dealing with multithreaded performance bugs like true and false sharing. Previous approaches to fixing false sharing in unmanaged languages have employed highly-invasive runtime program modifications. We observe that managed language runtimes, with garbage collection and JIT code compilation, present unique opportunities to repair such bugs directly, mirroring the techniques used in manual repairs. We present Remix, a modified version of the Oracle HotSpot JVM which can detect cache contention bugs and repair false sharing at runtime. Remix's detection mechanism leverages recent performance counter improvements on Intel platforms, which allow for precise, unobtrusive monitoring of cache contention at the hardware level. Remix can detect and repair known false sharing issues in the LMAX Disruptor high-performance inter-thread messaging library and the Spring Reactor event-processing framework, automatically providing 1.5-2x speedups over unoptimized code and matching the performance of hand-optimization. Remix also finds a new false sharing bug in SPECjvm2008, and uncovers a true sharing bug in the HotSpot JVM that, when fixed, improves the performance of three NAS Parallel Benchmarks by 7-25x. Remix incurs no statistically-significant performance overhead on other benchmarks that do not exhibit cache contention, making Remix practical for always-on use.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{David:2016:SSB, author = "Yaniv David and Nimrod Partush and Eran Yahav", title = "Statistical similarity of binaries", journal = j-SIGPLAN, volume = "51", number = "6", pages = "266--280", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908126", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address the problem of finding similar procedures in stripped binaries. We present a new statistical approach for measuring the similarity between two procedures. Our notion of similarity allows us to find similar code even when it has been compiled using different compilers, or has been modified. The main idea is to use similarity by composition: decompose the code into smaller comparable fragments, define semantic similarity between fragments, and use statistical reasoning to lift fragment similarity into similarity between procedures. We have implemented our approach in a tool called Esh, and applied it to find various prominent vulnerabilities across compilers and versions, including Heartbleed, Shellshock and Venom. We show that Esh produces high accuracy results, with few to no false positives --- a crucial factor in the scenario of vulnerability search in stripped binaries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Zhang:2016:ABS, author = "Yizhou Zhang and Guido Salvaneschi and Quinn Beightol and Barbara Liskov and Andrew C. Myers", title = "Accepting blame for safe tunneled exceptions", journal = j-SIGPLAN, volume = "51", number = "6", pages = "281--295", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908086", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Unhandled exceptions crash programs, so a compile-time check that exceptions are handled should in principle make software more reliable. But designers of some recent languages have argued that the benefits of statically checked exceptions are not worth the costs. We introduce a new statically checked exception mechanism that addresses the problems with existing checked-exception mechanisms. In particular, it interacts well with higher-order functions and other design patterns. The key insight is that whether an exception should be treated as a ``checked'' exception is not a property of its type but rather of the context in which the exception propagates. Statically checked exceptions can ``tunnel'' through code that is oblivious to their presence, but the type system nevertheless checks that these exceptions are handled. Further, exceptions can be tunneled without being accidentally caught, by expanding the space of exception identifiers to identify the exception-handling context. The resulting mechanism is expressive and syntactically light, and can be implemented efficiently. We demonstrate the expressiveness of the mechanism using significant codebases and evaluate its performance. We have implemented this new exception mechanism as part of the new Genus programming language, but the mechanism could equally well be applied to other programming languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Kent:2016:OTM, author = "Andrew M. Kent and David Kempe and Sam Tobin-Hochstadt", title = "Occurrence typing modulo theories", journal = j-SIGPLAN, volume = "51", number = "6", pages = "296--309", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908091", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new type system combining occurrence typing --- a technique previously used to type check programs in dynamically-typed languages such as Racket, Clojure, and JavaScript --- with dependent refinement types. We demonstrate that the addition of refinement types allows the integration of arbitrary solver-backed reasoning about logical propositions from external theories. By building on occurrence typing, we can add our enriched type system as a natural extension of Typed Racket, reusing its core while increasing its expressiveness. The result is a well-tested type system with a conservative, decidable core in which types may depend on a small but extensible set of program terms. In addition to describing our design, we present the following: a formal model and proof of correctness; a strategy for integrating new theories, with specific examples including linear arithmetic and bitvectors; and an evaluation in the context of the full Typed Racket implementation. Specifically, we take safe vector operations as a case study, examining all vector accesses in a 56,000 line corpus of Typed Racket programs. Our system is able to prove that 50\% of these are safe with no new annotations, and with a few annotations and modifications we capture more than 70\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Vekris:2016:RTT, author = "Panagiotis Vekris and Benjamin Cosman and Ranjit Jhala", title = "Refinement types for {TypeScript}", journal = j-SIGPLAN, volume = "51", number = "6", pages = "310--325", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908110", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Refined TypeScript (RSC), a lightweight refinement type system for TypeScript, that enables static verification of higher-order, imperative programs. We develop a formal system for RSC that delineates the interaction between refinement types and mutability, and enables flow-sensitive reasoning by translating input programs to an equivalent intermediate SSA form. By establishing type safety for the intermediate form, we prove safety for the input programs. Next, we extend the core to account for imperative and dynamic features of TypeScript, including overloading, type reflection, ad hoc type hierarchies and object initialization. Finally, we evaluate RSC on a set of real-world benchmarks, including parts of the Octane benchmarks, D3, Transducers, and the TypeScript compiler. We show how RSC successfully establishes a number of value dependent properties, such as the safety of array accesses and downcasts, while incurring a modest overhead in type annotations and code restructuring.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Smith:2016:MPS, author = "Calvin Smith and Aws Albarghouthi", title = "{MapReduce} program synthesis", journal = j-SIGPLAN, volume = "51", number = "6", pages = "326--340", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908102", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "By abstracting away the complexity of distributed systems, large-scale data processing platforms-MapReduce, Hadoop, Spark, Dryad, etc.-have provided developers with simple means for harnessing the power of the cloud. In this paper, we ask whether we can automatically synthesize MapReduce-style distributed programs from input-output examples. Our ultimate goal is to enable end users to specify large-scale data analyses through the simple interface of examples. We thus present a new algorithm and tool for synthesizing programs composed of efficient data-parallel operations that can execute on cloud computing infrastructure. We evaluate our tool on a range of real-world big-data analysis tasks and general computations. Our results demonstrate the efficiency of our approach and the small number of examples it requires to synthesize correct, scalable programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Chugh:2016:PDM, author = "Ravi Chugh and Brian Hempel and Mitchell Spradlin and Jacob Albers", title = "Programmatic and direct manipulation, together at last", journal = j-SIGPLAN, volume = "51", number = "6", pages = "341--354", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908103", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Direct manipulation interfaces and programmatic systems have distinct and complementary strengths. The former provide intuitive, immediate visual feedback and enable rapid prototyping, whereas the latter enable complex, reusable abstractions. Unfortunately, existing systems typically force users into just one of these two interaction modes. We present a system called Sketch-n-Sketch that integrates programmatic and direct manipulation for the particular domain of Scalable Vector Graphics (SVG). In Sketch-n-Sketch, the user writes a program to generate an output SVG canvas. Then the user may directly manipulate the canvas while the system immediately infers a program update in order to match the changes to the output, a workflow we call live synchronization. To achieve this, we propose (i) a technique called trace-based program synthesis that takes program execution history into account in order to constrain the search space and (ii) heuristics for dealing with ambiguities. Based on our experience with examples spanning 2,000 lines of code and from the results of a preliminary user study, we believe that Sketch-n-Sketch provides a novel workflow that can augment traditional programming systems. Our approach may serve as the basis for live synchronization in other application domains, as well as a starting point for yet more ambitious ways of combining programmatic and direct manipulation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Loncaric:2016:FSF, author = "Calvin Loncaric and Emina Torlak and Michael D. Ernst", title = "Fast synthesis of fast collections", journal = j-SIGPLAN, volume = "51", number = "6", pages = "355--368", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908122", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many applications require specialized data structures not found in the standard libraries, but implementing new data structures by hand is tedious and error-prone. This paper presents a novel approach for synthesizing efficient implementations of complex collection data structures from high-level specifications that describe the desired retrieval operations. Our approach handles a wider range of data structures than previous work, including structures that maintain an order among their elements or have complex retrieval methods. We have prototyped our approach in a data structure synthesizer called Cozy. Four large, real-world case studies compare structures generated by Cozy against handwritten implementations in terms of correctness and performance. Structures synthesized by Cozy match the performance of handwritten data structures while avoiding human error.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{McClurg:2016:EDN, author = "Jedidiah McClurg and Hossein Hojjat and Nate Foster and Pavol Cern{\'y}", title = "Event-driven network programming", journal = j-SIGPLAN, volume = "51", number = "6", pages = "369--385", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908097", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-defined networking (SDN) programs must simultaneously describe static forwarding behavior and dynamic updates in response to events. Event-driven updates are critical to get right, but difficult to implement correctly due to the high degree of concurrency in networks. Existing SDN platforms offer weak guarantees that can break application invariants, leading to problems such as dropped packets, degraded performance, security violations, etc. This paper introduces EVENT-DRIVEN CONSISTENT UPDATES that are guaranteed to preserve well-defined behaviors when transitioning between configurations in response to events. We propose NETWORK EVENT STRUCTURES (NESs) to model constraints on updates, such as which events can be enabled simultaneously and causal dependencies between events. We define an extension of the NetKAT language with mutable state, give semantics to stateful programs using NESs, and discuss provably-correct strategies for implementing NESs in SDNs. Finally, we evaluate our approach empirically, demonstrating that it gives well-defined consistency guarantees while avoiding expensive synchronization and packet buffering.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Beckett:2016:TN, author = "Ryan Beckett and Michael Greenberg and David Walker", title = "Temporal {NetKAT}", journal = j-SIGPLAN, volume = "51", number = "6", pages = "386--401", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908108", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the past 5-10 years, the rise of software-defined networking (SDN) has inspired a wide range of new systems, libraries, hypervisors and languages for programming, monitoring, and debugging network behavior. Oftentimes, these systems are disjoint-one language for programming and another for verification, and yet another for run-time monitoring and debugging. In this paper, we present a new, unified framework, called Temporal NetKAT, capable of facilitating all of these tasks at once. As its name suggests, Temporal NetKAT is the synthesis of two formal theories: past-time (finite trace) linear temporal logic and (network) Kleene Algebra with Tests. Temporal predicates allow programmers to write down concise properties of a packet's path through the network and to make dynamic packet-forwarding, access control or debugging decisions on that basis. In addition to being useful for programming, the combined equational theory of LTL and NetKAT facilitates proofs of path-based correctness properties. Using new, general, proof techniques, we show that the equational semantics is sound with respect to the denotational semantics, and, for a class of programs we call network-wide programs, complete. We have also implemented a compiler for temporal NetKAT, evaluated its performance on a range of benchmarks, and studied the effectiveness of several optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{El-Hassany:2016:SCA, author = "Ahmed El-Hassany and Jeremie Miserez and Pavol Bielik and Laurent Vanbever and Martin Vechev", title = "{SDNRacer}: concurrency analysis for software-defined networks", journal = j-SIGPLAN, volume = "51", number = "6", pages = "402--415", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908124", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency violations are an important source of bugs in Software-Defined Networks (SDN), often leading to policy or invariant violations. Unfortunately, concurrency violations are also notoriously difficult to avoid, detect and debug. This paper presents a novel approach and a tool, SDNRacer, for detecting concurrency violations of SDNs. Our approach is enabled by three key ingredients: (i) a precise happens-before model for SDNs that captures when events can happen concurrently; (ii) a set of sound, domain-specific filters that reduce reported violations by orders of magnitude, and; (iii) a sound and complete dynamic analyzer, based on the above, that can ensure the network is free of harmful errors such as data races and per-packet incoherence. We evaluated SDNRacer on several real-world OpenFlow controllers, running both reactive and proactive applications in large networks. We show that SDNRacer is practically effective: it quickly pinpoints harmful concurrency violations without overwhelming the user with false positives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Shambaugh:2016:RCV, author = "Rian Shambaugh and Aaron Weiss and Arjun Guha", title = "Rehearsal: a configuration verification tool for puppet", journal = j-SIGPLAN, volume = "51", number = "6", pages = "416--430", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908083", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale data centers and cloud computing have turned system configuration into a challenging problem. Several widely-publicized outages have been blamed not on software bugs, but on configuration bugs. To cope, thousands of organizations use system configuration languages to manage their computing infrastructure. Of these, Puppet is the most widely used with thousands of paying customers and many more open-source users. The heart of Puppet is a domain-specific language that describes the state of a system. Puppet already performs some basic static checks, but they only prevent a narrow range of errors. Furthermore, testing is ineffective because many errors are only triggered under specific machine states that are difficult to predict and reproduce. With several examples, we show that a key problem with Puppet is that configurations can be non-deterministic. This paper presents Rehearsal, a verification tool for Puppet configurations. Rehearsal implements a sound, complete, and scalable determinacy analysis for Puppet. To develop it, we (1) present a formal semantics for Puppet, (2) use several analyses to shrink our models to a tractable size, and (3) frame determinism-checking as decidable formulas for an SMT solver. Rehearsal then leverages the determinacy analysis to check other important properties, such as idempotency. Finally, we apply Rehearsal to several real-world Puppet configurations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Chen:2016:TCV, author = "Hao Chen and Xiongnan (Newman) Wu and Zhong Shao and Joshua Lockerman and Ronghui Gu", title = "Toward compositional verification of interruptible {OS} kernels and device drivers", journal = j-SIGPLAN, volume = "51", number = "6", pages = "431--447", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908101", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An operating system (OS) kernel forms the lowest level of any system software stack. The correctness of the OS kernel is the basis for the correctness of the entire system. Recent efforts have demonstrated the feasibility of building formally verified general-purpose kernels, but it is unclear how to extend their work to verify the functional correctness of device drivers, due to the non-local effects of interrupts. In this paper, we present a novel compositional framework for building certified interruptible OS kernels with device drivers. We provide a general device model that can be instantiated with various hardware devices, and a realistic formal model of interrupts, which can be used to reason about interruptible code. We have realized this framework in the Coq proof assistant. To demonstrate the effectiveness of our new approach, we have successfully extended an existing verified non-interruptible kernel with our framework and turned it into an interruptible kernel with verified device drivers. To the best of our knowledge, this is the first verified interruptible operating system with device drivers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Mullen:2016:VPO, author = "Eric Mullen and Daryl Zuniga and Zachary Tatlock and Dan Grossman", title = "Verified peephole optimizations for {CompCert}", journal = j-SIGPLAN, volume = "51", number = "6", pages = "448--461", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908109", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transformations over assembly code are common in many compilers. These transformations are also some of the most bug-dense compiler components. Such bugs could be eliminated by formally verifying the compiler, but state-of-the-art formally verified compilers like CompCert do not support assembly-level program transformations. This paper presents Peek, a framework for expressing, verifying, and running meaning-preserving assembly-level program transformations in CompCert. Peek contributes four new components: a lower level semantics for CompCert x86 syntax, a liveness analysis, a library for expressing and verifying peephole optimizations, and a verified peephole optimization pass built into CompCert. Each of these is accompanied by a correctness proof in Coq against realistic assumptions about the calling convention and the system memory allocator. Verifying peephole optimizations in Peek requires proving only a set of local properties, which we have proved are sufficient to ensure global transformation correctness. We have proven these local properties for 28 peephole transformations from the literature. We discuss the development of our new assembly semantics, liveness analysis, representation of program transformations, and execution engine; describe the verification challenges of each component; and detail techniques we applied to mitigate the proof burden.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Ren:2016:JTS, author = "Brianna M. Ren and Jeffrey S. Foster", title = "Just-in-time static type checking for dynamic languages", journal = j-SIGPLAN, volume = "51", number = "6", pages = "462--476", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908127", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic languages such as Ruby, Python, and JavaScript have many compelling benefits, but the lack of static types means subtle errors can remain latent in code for a long time. While many researchers have developed various systems to bring some of the benefits of static types to dynamic languages, prior approaches have trouble dealing with metaprogramming, which generates code as the program executes. In this paper, we propose Hummingbird, a new system that uses a novel technique, just-in-time static type checking, to type check Ruby code even in the presence of metaprogramming. In Hummingbird, method type signatures are gathered dynamically at run-time, as those methods are created. When a method is called, Hummingbird statically type checks the method body against current type signatures. Thus, Hummingbird provides thorough static checks on a per-method basis, while also allowing arbitrarily complex metaprogramming. For performance, Hummingbird memoizes the static type checking pass, invalidating cached checks only if necessary. We formalize Hummingbird using a core, Ruby-like language and prove it sound. To evaluate Hummingbird, we applied it to six apps, including three that use Ruby on Rails, a powerful framework that relies heavily on metaprogramming. We found that all apps typecheck successfully using Hummingbird, and that Hummingbird's performance overhead is reasonable. We applied Hummingbird to earlier versions of one Rails app and found several type errors that had been introduced and then fixed. Lastly, we demonstrate using Hummingbird in Rails development mode to typecheck an app as live updates are applied to it.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Petricek:2016:TDM, author = "Tomas Petricek and Gustavo Guerra and Don Syme", title = "Types from data: making structured data first-class citizens in {F\#}", journal = j-SIGPLAN, volume = "51", number = "6", pages = "477--490", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908115", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most modern applications interact with external services and access data in structured formats such as XML, JSON and CSV. Static type systems do not understand such formats, often making data access more cumbersome. Should we give up and leave the messy world of external data to dynamic typing and runtime checks? Of course, not! We present F\# Data, a library that integrates external structured data into F\#. As most real-world data does not come with an explicit schema, we develop a shape inference algorithm that infers a shape from representative sample documents. We then integrate the inferred shape into the F\# type system using type providers. We formalize the process and prove a relative type soundness theorem. Our library significantly reduces the amount of data access code and it provides additional safety guarantees when contrasted with the widely used weakly typed techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Zhu:2016:ALS, author = "He Zhu and Gustavo Petri and Suresh Jagannathan", title = "Automatically learning shape specifications", journal = j-SIGPLAN, volume = "51", number = "6", pages = "491--507", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908125", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel automated procedure for discovering expressive shape specifications for sophisticated functional data structures. Our approach extracts potential shape predicates based on the definition of constructors of arbitrary user-defined inductive data types, and combines these predicates within an expressive first-order specification language using a lightweight data-driven learning procedure. Notably, this technique requires no programmer annotations, and is equipped with a type-based decision procedure to verify the correctness of discovered specifications. Experimental results indicate that our implementation is both efficient and effective, capable of automatically synthesizing sophisticated shape specifications over a range of complex data types, going well beyond the scope of existing solutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Yaghmazadeh:2016:STH, author = "Navid Yaghmazadeh and Christian Klinger and Isil Dillig and Swarat Chaudhuri", title = "Synthesizing transformations on hierarchically structured data", journal = j-SIGPLAN, volume = "51", number = "6", pages = "508--521", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908088", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a new approach for synthesizing transformations on tree-structured data, such as Unix directories and XML documents. We consider a general abstraction for such data, called hierarchical data trees (HDTs) and present a novel example-driven synthesis algorithm for HDT transformations. Our central insight is to reduce the problem of synthesizing tree transformers to the synthesis of list transformations that are applied to the paths of the tree. The synthesis problem over lists is solved using a new algorithm that combines SMT solving and decision tree learning. We have implemented our technique in a system called HADES and show that HADES can automatically synthesize a variety of interesting transformations collected from online forums.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Polikarpova:2016:PSP, author = "Nadia Polikarpova and Ivan Kuraj and Armando Solar-Lezama", title = "Program synthesis from polymorphic refinement types", journal = j-SIGPLAN, volume = "51", number = "6", pages = "522--538", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908093", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a method for synthesizing recursive functions that provably satisfy a given specification in the form of a polymorphic refinement type. We observe that such specifications are particularly suitable for program synthesis for two reasons. First, they offer a unique combination of expressive power and decidability, which enables automatic verification-and hence synthesis-of nontrivial programs. Second, a type-based specification for a program can often be effectively decomposed into independent specifications for its components, causing the synthesizer to consider fewer component combinations and leading to a combinatorial reduction in the size of the search space. At the core of our synthesis procedure is a new algorithm for refinement type checking, which supports specification decomposition. We have evaluated our prototype implementation on a large set of synthesis problems and found that it exceeds the state of the art in terms of both scalability and usability. The tool was able to synthesize more complex programs than those reported in prior work (several sorting algorithms and operations on balanced search trees), as well as most of the benchmarks tackled by existing synthesizers, often starting from a more concise and intuitive user input.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Maleki:2016:HOT, author = "Sepideh Maleki and Annie Yang and Martin Burtscher", title = "Higher-order and tuple-based massively-parallel prefix sums", journal = j-SIGPLAN, volume = "51", number = "6", pages = "539--552", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908089", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Prefix sums are an important parallel primitive, especially in massively-parallel programs. This paper discusses two orthogonal generalizations thereof, which we call higher-order and tuple-based prefix sums. Moreover, it describes and evaluates SAM, a GPU-friendly algorithm for computing prefix sums and other scans that directly supports higher orders and tuple values. Its templated CUDA implementation unifies all of these computations in a single 100-statement kernel. SAM is communication-efficient in the sense that it minimizes main-memory accesses. When computing prefix sums of a million or more values, it outperforms Thrust and CUDPP on both a Titan X and a K40 GPU. On the Titan X, SAM reaches memory-copy speeds for large input sizes, which cannot be surpassed. SAM outperforms CUB, the currently fastest conventional prefix sum implementation, by up to a factor of 2.9 on eighth-order prefix sums and by up to a factor of 2.6 on eight-tuple prefix sums.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Kim:2016:DOF, author = "Junghyun Kim and Gangwon Jo and Jaehoon Jung and Jungwon Kim and Jaejin Lee", title = "A distributed {OpenCL} framework using redundant computation and data replication", journal = j-SIGPLAN, volume = "51", number = "6", pages = "553--569", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908094", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Applications written solely in OpenCL or CUDA cannot execute on a cluster as a whole. Most previous approaches that extend these programming models to clusters are based on a common idea: designating a centralized host node and coordinating the other nodes with the host for computation. However, the centralized host node is a serious performance bottleneck when the number of nodes is large. In this paper, we propose a scalable and distributed OpenCL framework called SnuCL-D for large-scale clusters. SnuCL-D's remote device virtualization provides an OpenCL application with an illusion that all compute devices in a cluster are confined in a single node. To reduce the amount of control-message and data communication between nodes, SnuCL-D replicates the OpenCL host program execution and data in each node. We also propose a new OpenCL host API function and a queueing optimization technique that significantly reduce the overhead incurred by the previous centralized approaches. To show the effectiveness of SnuCL-D, we evaluate SnuCL-D with a microbenchmark and eleven benchmark applications on a large-scale CPU cluster and a medium-scale GPU cluster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Degenbaev:2016:ITG, author = "Ulan Degenbaev and Jochen Eisinger and Manfred Ernst and Ross McIlroy and Hannes Payer", title = "Idle time garbage collection scheduling", journal = j-SIGPLAN, volume = "51", number = "6", pages = "570--583", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908106", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient garbage collection is increasingly important in today's managed language runtime systems that demand low latency, low memory consumption, and high throughput. Garbage collection may pause the application for many milliseconds to identify live memory, free unused memory, and compact fragmented regions of memory, even when employing concurrent garbage collection. In animation-based applications that require 60 frames per second, these pause times may be observable, degrading user experience. This paper introduces idle time garbage collection scheduling to increase the responsiveness of applications by hiding expensive garbage collection operations inside of small, otherwise unused idle portions of the application's execution, resulting in smoother animations. Additionally we take advantage of idleness to reduce memory consumption while allowing higher memory use when high throughput is required. We implemented idle time garbage collection scheduling in V8, an open-source, production JavaScript virtual machine running within Chrome. We present performance results on various benchmarks running popular webpages and show that idle time garbage collection scheduling can significantly improve latency and memory consumption. Furthermore, we introduce a new metric called frame time discrepancy to quantify the quality of the user experience and precisely measure the improvements that idle time garbage collection provides for a WebGL-based game benchmark. Idle time garbage collection is shipped and enabled by default in Chrome.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Jacek:2016:ALP, author = "Nicholas Jacek and Meng-Chieh Chiu and Benjamin Marlin and Eliot Moss", title = "Assessing the limits of program-specific garbage collection performance", journal = j-SIGPLAN, volume = "51", number = "6", pages = "584--598", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908120", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the ultimate limits of program-specific garbage collector performance for real programs. We first characterize the GC schedule optimization problem using Markov Decision Processes (MDPs). Based on this characterization, we develop a method of determining, for a given program run and heap size, an optimal schedule of collections for a non-generational collector. We further explore the limits of performance of a generational collector, where it is not feasible to search the space of schedules to prove optimality. Still, we show significant improvements with Least Squares Policy Iteration, a reinforcement learning technique for solving MDPs. We demonstrate that there is considerable promise to reduce garbage collection costs by developing program-specific collection policies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{vGleissenthall:2016:CUQ, author = "Klaus v. Gleissenthall and Nikolaj Bj{\o}rner and Andrey Rybalchenko", title = "Cardinalities and universal quantifiers for verifying parameterized systems", journal = j-SIGPLAN, volume = "51", number = "6", pages = "599--613", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908129", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parallel and distributed systems rely on intricate protocols to manage shared resources and synchronize, i.e., to manage how many processes are in a particular state. Effective verification of such systems requires universally quantification to reason about parameterized state and cardinalities tracking sets of processes, messages, failures to adequately capture protocol logic. In this paper we present Tool, an automatic invariant synthesis method that integrates cardinality-based reasoning and universal quantification. The resulting increase of expressiveness allows Tool to verify, for the first time, a representative collection of intricate parameterized protocols.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Padon:2016:ISV, author = "Oded Padon and Kenneth L. McMillan and Aurojit Panda and Mooly Sagiv and Sharon Shoham", title = "{Ivy}: safety verification by interactive generalization", journal = j-SIGPLAN, volume = "51", number = "6", pages = "614--630", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908118", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite several decades of research, the problem of formal verification of infinite-state systems has resisted effective automation. We describe a system --- Ivy --- for interactively verifying safety of infinite-state systems. Ivy's key principle is that whenever verification fails, Ivy graphically displays a concrete counterexample to induction. The user then interactively guides generalization from this counterexample. This process continues until an inductive invariant is found. Ivy searches for universally quantified invariants, and uses a restricted modeling language. This ensures that all verification conditions can be checked algorithmically. All user interactions are performed using graphical models, easing the user's task. We describe our initial experience with verifying several distributed protocols.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Yang:2016:PDI, author = "Jean Yang and Travis Hance and Thomas H. Austin and Armando Solar-Lezama and Cormac Flanagan and Stephen Chong", title = "Precise, dynamic information flow for database-backed applications", journal = j-SIGPLAN, volume = "51", number = "6", pages = "631--647", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908098", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an approach for dynamic information flow control across the application and database. Our approach reduces the amount of policy code required, yields formal guarantees across the application and database, works with existing relational database implementations, and scales for realistic applications. In this paper, we present a programming model that factors out information flow policies from application code and database queries, a dynamic semantics for the underlying $^J D B$ core language, and proofs of termination-insensitive non-interference and policy compliance for the semantics. We implement these ideas in Jacqueline, a Python web framework, and demonstrate feasibility through three application case studies: a course manager, a health record system, and a conference management system used to run an academic workshop. We show that in comparison to traditional applications with hand-coded policy checks, Jacqueline applications have (1) a smaller trusted computing base, (2) fewer lines of policy code, and (3) reasonable, often negligible, additional overheads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Costanzo:2016:EEV, author = "David Costanzo and Zhong Shao and Ronghui Gu", title = "End-to-end verification of information-flow security for {C} and assembly programs", journal = j-SIGPLAN, volume = "51", number = "6", pages = "648--664", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908100", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Protecting the confidentiality of information manipulated by a computing system is one of the most important challenges facing today's cybersecurity community. A promising step toward conquering this challenge is to formally verify that the end-to-end behavior of the computing system really satisfies various information-flow policies. Unfortunately, because today's system software still consists of both C and assembly programs, the end-to-end verification necessarily requires that we not only prove the security properties of individual components, but also carefully preserve these properties through compilation and cross-language linking. In this paper, we present a novel methodology for formally verifying end-to-end security of a software system that consists of both C and assembly programs. We introduce a general definition of observation function that unifies the concepts of policy specification, state indistinguishability, and whole-execution behaviors. We show how to use different observation functions for different levels of abstraction, and how to link different security proofs across abstraction levels using a special kind of simulation that is guaranteed to preserve state indistinguishability. To demonstrate the effectiveness of our new methodology, we have successfully constructed an end-to-end security proof, fully formalized in the Coq proof assistant, of a nontrivial operating system kernel (running on an extended CompCert x86 assembly machine model). Some parts of the kernel are written in C and some are written in assembly; we verify all of the code, regardless of language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Sinha:2016:DVM, author = "Rohit Sinha and Manuel Costa and Akash Lal and Nuno P. Lopes and Sriram Rajamani and Sanjit A. Seshia and Kapil Vaswani", title = "A design and verification methodology for secure isolated regions", journal = j-SIGPLAN, volume = "51", number = "6", pages = "665--681", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908113", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hardware support for isolated execution (such as Intel SGX) enables development of applications that keep their code and data confidential even while running in a hostile or compromised host. However, automatically verifying that such applications satisfy confidentiality remains challenging. We present a methodology for designing such applications in a way that enables certifying their confidentiality. Our methodology consists of forcing the application to communicate with the external world through a narrow interface, compiling it with runtime checks that aid verification, and linking it with a small runtime that implements the narrow interface. The runtime includes services such as secure communication channels and memory management. We formalize this restriction on the application as Information Release Confinement (IRC), and we show that it allows us to decompose the task of proving confidentiality into (a) one-time, human-assisted functional verification of the runtime to ensure that it does not leak secrets, (b) automatic verification of the application's machine code to ensure that it satisfies IRC and does not directly read or corrupt the runtime's internal state. We present /CONFIDENTIAL: a verifier for IRC that is modular, automatic, and keeps our compiler out of the trusted computing base. Our evaluation suggests that the methodology scales to real-world applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Spiegelman:2016:TDS, author = "Alexander Spiegelman and Guy Golan-Gueta and Idit Keidar", title = "Transactional data structure libraries", journal = j-SIGPLAN, volume = "51", number = "6", pages = "682--696", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908112", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce transactions into libraries of concurrent data structures; such transactions can be used to ensure atomicity of sequences of data structure operations. By focusing on transactional access to a well-defined set of data structure operations, we strike a balance between the ease-of-programming of transactions and the efficiency of custom-tailored data structures. We exemplify this concept by designing and implementing a library supporting transactions on any number of maps, sets (implemented as skiplists), and queues. Our library offers efficient and scalable transactions, which are an order of magnitude faster than state-of-the-art transactional memory toolkits. Moreover, our approach treats stand-alone data structure operations (like put and enqueue) as first class citizens, and allows them to execute with virtually no overhead, at the speed of the original data structure library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Baghsorkhi:2016:FAV, author = "Sara S. Baghsorkhi and Nalini Vasudevan and Youfeng Wu", title = "{FlexVec}: auto-vectorization for irregular loops", journal = j-SIGPLAN, volume = "51", number = "6", pages = "697--710", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908111", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traditional vectorization techniques build a dependence graph with distance and direction information to determine whether a loop is vectorizable. Since vectorization reorders the execution of instructions across iterations, in general instructions involved in a strongly connected component (SCC) are deemed not vectorizable unless the SCC can be eliminated using techniques such as scalar expansion or privatization. Therefore, traditional vectorization techniques are limited in their ability to efficiently handle loops with dynamic cross-iteration dependencies or complex control flow interweaved within the dependence cycles. When potential dependencies do not occur very often, the end-result is under utilization of the SIMD hardware. In this paper, we propose FlexVec architecture that combines new vector instructions with novel code generation techniques to dynamically adjusts vector length for loop statements affected by cross-iteration dependencies that happen at runtime. We have designed and implemented FlexVec's new ISA as extensions to the recently released AVX-512 ISA. We have evaluated the performance improvements enabled by FlexVec vectorization for 11 C/C++ SPEC 2006 benchmarks and 7 real applications with AVX-512 vectorization as baseline. We show that FlexVec vectorization technique produces a Geomean speedup of 9\% for SPEC 2006 and a Geomean speedup of 11\% for 7 real applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Kamil:2016:VLS, author = "Shoaib Kamil and Alvin Cheung and Shachar Itzhaky and Armando Solar-Lezama", title = "Verified lifting of stencil computations", journal = j-SIGPLAN, volume = "51", number = "6", pages = "711--726", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908117", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper demonstrates a novel combination of program synthesis and verification to lift stencil computations from low-level Fortran code to a high-level summary expressed using a predicate language. The technique is sound and mostly automated, and leverages counter-example guided inductive synthesis (CEGIS) to find provably correct translations. Lifting existing code to a high-performance description language has a number of benefits, including maintainability and performance portability. For example, our experiments show that the lifted summaries can enable domain specific compilers to do a better job of parallelization as compared to an off-the-shelf compiler working on the original code, and can even support fully automatic migration to hardware accelerators such as GPUs. We have implemented verified lifting in a system called STNG and have evaluated it using microbenchmarks, mini-apps, and real-world applications. We demonstrate the benefits of verified lifting by first automatically summarizing Fortran source code into a high-level predicate language, and subsequently translating the lifted summaries into Halide, with the translated code achieving median performance speedups of 4.1X and up to 24X for non-trivial stencils as compared to the original implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '16 conference proceedings.", } @Article{Chen:2017:BDA, author = "Yunji Chen", title = "Big Data Analytics and Intelligence at {Alibaba Cloud}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "1--1", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037699", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As China's largest cloud service provider, Alibaba Cloud has been one of the fastest growing cloud computing platforms in the world. In this talk, I'll present an overview of Big Data and AI computing platform at Alibaba Cloud, which consists of a wide range of products and services to enable fast and efficient big data development and intelligent analysis. The underlying computing infrastructure supports a variety of computation scenarios, including batch, interactive, stream, and graph computation, as well as large-scale machine learning on heterogeneous cloud-scale data centers. Several big data products, such as rule-based engine, recommendation system, BI tools, etc., are provided to address different business needs. The platform not only supports Alibaba's internal businesses but also provides solid services to enterprise customers. In addition, I'll describe key techniques and system internals, and outline outstanding research and engineering challenges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Cherupalli:2017:DAS, author = "Hari Cherupalli and Henry Duwe and Weidong Ye and Rakesh Kumar and John Sartori", title = "Determining Application-specific Peak Power and Energy Requirements for Ultra-low Power Processors", journal = j-SIGPLAN, volume = "52", number = "4", pages = "3--16", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037711", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many emerging applications such as IoT, wearables, implantables, and sensor networks are power- and energy-constrained. These applications rely on ultra-low-power processors that have rapidly become the most abundant type of processor manufactured today. In the ultra-low-power embedded systems used by these applications, peak power and energy requirements are the primary factors that determine critical system characteristics, such as size, weight, cost, and lifetime. While the power and energy requirements of these systems tend to be application-specific, conventional techniques for rating peak power and energy cannot accurately bound the power and energy requirements of an application running on a processor, leading to over-provisioning that increases system size and weight. In this paper, we present an automated technique that performs hardware-software co-analysis of the application and ultra-low-power processor in an embedded system to determine application-specific peak power and energy requirements. Our technique provides more accurate, tighter bounds than conventional techniques for determining peak power and energy requirements, reporting 15\% lower peak power and 17\% lower peak energy, on average, than a conventional approach based on profiling and guardbanding. Compared to an aggressive stressmark-based approach, our technique reports power and energy bounds that are 26\% and 26\% lower, respectively, on average. Also, unlike conventional approaches, our technique reports guaranteed bounds on peak power and energy independent of an application's input set. Tighter bounds on peak power and energy can be exploited to reduce system size, weight, and cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Chen:2017:PPQ, author = "Quan Chen and Hailong Yang and Minyi Guo and Ram Srivatsa Kannan and Jason Mars and Lingjia Tang", title = "{Prophet}: Precise {QoS} Prediction on Non-Preemptive Accelerators to Improve Utilization in Warehouse-Scale Computers", journal = j-SIGPLAN, volume = "52", number = "4", pages = "17--32", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037700", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Guaranteeing Quality-of-Service (QoS) of latency-sensitive applications while improving server utilization through application co-location is important yet challenging in modern datacenters. The key challenge is that when applications are co-located on a server, performance interference due to resource contention can be detrimental to the application QoS. Although prior work has proposed techniques to identify ``safe'' co-locations where application QoS is satisfied by predicting the performance interference on multicores, no such prediction technique on accelerators such as GPUs. In this work, we present Prophet, an approach to precisely predict the performance degradation of latency-sensitive applications on accelerators due to application co-location. We analyzed the performance interference on accelerators through a real system investigation and found that unlike on multicores where the key contentious resources are shared caches and main memory bandwidth, the key contentious resources on accelerators are instead processing elements, accelerator memory bandwidth and PCIe bandwidth. Based on this observation, we designed interference models that enable the precise prediction for processing element, accelerator memory bandwidth and PCIe bandwidth contention on real hardware. By using a novel technique to forecast solo-run execution traces of the co-located applications using interference models, Prophet can accurately predict the performance degradation of latency-sensitive applications on non-preemptive accelerators. Using Prophet, we can identify ``safe'' co-locations on accelerators to improve utilization without violating the QoS target. Our evaluation shows that Prophet can predict the performance degradation with an average prediction error 5.47\% on real systems. Meanwhile, based on the prediction, Prophet achieves accelerator utilization improvements of 49.9\% on average while maintaining the QoS target of latency-sensitive applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Kanev:2017:MAM, author = "Svilen Kanev and Sam Likun Xi and Gu-Yeon Wei and David Brooks", title = "{Mallacc}: Accelerating Memory Allocation", journal = j-SIGPLAN, volume = "52", number = "4", pages = "33--45", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037736", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent work shows that dynamic memory allocation consumes nearly 7\% of all cycles in Google datacenters. With the trend towards increased specialization of hardware, we propose Mallacc, an in-core hardware accelerator designed for broad use across a number of high-performance, modern memory allocators. The design of Mallacc is quite different from traditional throughput-oriented hardware accelerators. Because memory allocation requests tend to be very frequent, fast, and interspersed inside other application code, accelerators must be optimized for latency rather than throughput and area overheads must be kept to a bare minimum. Mallacc accelerates the three primary operations of a typical memory allocation request: size class computation, retrieval of a free memory block, and sampling of memory usage. Our results show that malloc latency can be reduced by up to 50\% with a hardware cost of less than 1500 um2 of silicon area, less than 0.006\% of a typical high-performance processor core.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Wen:2017:REV, author = "Shasha Wen and Milind Chabbi and Xu Liu", title = "{REDSPY}: Exploring Value Locality in Software", journal = j-SIGPLAN, volume = "52", number = "4", pages = "47--61", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037729", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Complex code bases with several layers of abstractions have abundant inefficiencies that affect the execution time. Value redundancy is a kind of inefficiency where the same values are repeatedly computed, stored, or retrieved over the course of execution. Not all redundancies can be easily detected or eliminated with compiler optimization passes due to the inherent limitations of the static analysis. Microscopic observation of whole executions at instruction- and operand-level granularity breaks down abstractions and helps recognize redundancies that masquerade in complex programs. We have developed REDSPY---a fine-grained profiler to pinpoint and quantify redundant operations in program executions. Value redundancy may happen over time at same locations or in adjacent locations, and thus it has temporal and spatial locality. REDSPY identifies both temporal and spatial value locality. Furthermore, REDSPY is capable of identifying values that are approximately the same, enabling optimization opportunities in HPC codes that often use floating point computations. REDSPY provides intuitive optimization guidance by apportioning redundancies to their provenance---source lines and execution calling contexts. REDSPY pinpointed dramatically high volume of redundancies in programs that were optimization targets for decades, such as SPEC CPU2006 suite, Rodinia benchmark, and NWChem---a production computational chemistry code. Guided by REDSPY, we were able to eliminate redundancies that resulted in significant speedups.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Bhattacharjee:2017:TTP, author = "Abhishek Bhattacharjee", title = "Translation-Triggered Prefetching", journal = j-SIGPLAN, volume = "52", number = "4", pages = "63--76", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037705", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose translation-enabled memory prefetching optimizations or TEMPO, a low-overhead hardware mechanism to boost memory performance by exploiting the operating system's (OS) virtual memory subsystem. We are the first to make the following observations: (1) a substantial fraction (20-40\%) of DRAM references in modern big-data workloads are devoted to accessing page tables; and (2) when memory references require page table lookups in DRAM, the vast majority of them (98\%+) also look up DRAM for the subsequent data access. TEMPO exploits these observations to enable DRAM row-buffer and on-chip cache prefetching of the data that page tables point to. TEMPO requires trivial changes to the memory controller (under 3\% additional area), no OS or application changes, and improves performance by 10-30\% and energy by 1-14\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Kim:2017:TAA, author = "Channoh Kim and Jaehyeok Kim and Sungmin Kim and Dooyoung Kim and Namho Kim and Gitae Na and Young H. Oh and Hyeon Gyu Cho and Jae W. Lee", title = "Typed Architectures: Architectural Support for Lightweight Scripting", journal = j-SIGPLAN, volume = "52", number = "4", pages = "77--90", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037726", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic scripting languages are becoming more and more widely adopted not only for fast prototyping but also for developing production-grade applications. They provide high-productivity programming environments featuring high levels of abstraction with powerful built-in functions, automatic memory management, object-oriented programming paradigm and dynamic typing. However, their flexible, dynamic type systems easily become the source of inefficiency in terms of instruction count, memory footprint, and energy consumption. This overhead makes it challenging to deploy these high-productivity programming technologies on emerging single-board computers for IoT applications. Addressing this challenge, this paper introduces Typed Architectures, a high-efficiency, low-cost execution substrate for dynamic scripting languages, where each data variable retains high-level type information at an ISA level. Typed Architectures calculate and check the dynamic type of each variable implicitly in hardware, rather than explicitly in software, hence significantly reducing instruction count for dynamic type checking. Besides, Typed Architectures introduce polymorphic instructions (e.g., xadd), which are bound to the correct native instruction at runtime within the pipeline (e.g., add or fadd) to efficiently implement polymorphic operators. Finally, Typed Architectures provide hardware support for flexible yet efficient type tag extraction and insertion, capturing common data layout patterns of tag-value pairs. Our evaluation using a fully synthesizable RISC-V RTL design on FPGA shows that Typed Architectures achieve geomean speedups of 11.2\% and 9.9\% with maximum speedups of 32.6\% and 43.5\% for two production-grade scripting engines for JavaScript and Lua, respectively. Moreover, Typed Architectures improve the energy-delay product (EDP) by 19.3\% for JavaScript and 16.5\% for Lua with an area overhead of 1.6\% at a 40nm technology node.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Seo:2017:FAS, author = "Jihye Seo and Wook-Hee Kim and Woongki Baek and Beomseok Nam and Sam H. Noh", title = "Failure-Atomic Slotted Paging for Persistent Memory", journal = j-SIGPLAN, volume = "52", number = "4", pages = "91--104", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037737", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The slotted-page structure is a database page format commonly used for managing variable-length records. In this work, we develop a novel ``failure-atomic slotted page structure'' for persistent memory that leverages byte addressability and durability of persistent memory to minimize redundant write operations used to maintain consistency in traditional database systems. Failure-atomic slotted paging consists of two key elements: (i) in-place commit per page using hardware transactional memory and (ii) slot header logging that logs the commit mark of each page. The proposed scheme is implemented in SQLite and compared against NVWAL, the current state-of-the-art scheme. Our performance study shows that our failure-atomic slotted paging shows optimal performance for database transactions that insert a single record. For transactions that touch more than one database page, our proposed slot-header logging scheme minimizes the logging overhead by avoiding duplicating pages and logging only the metadata of the dirty pages. Overall, we find that our failure-atomic slotted-page management scheme reduces database logging overhead to 1/6 and improves query response time by up to 33\% compared to NVWAL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Nguyen:2017:WSP, author = "Donald Nguyen and Keshav Pingali", title = "What Scalable Programs Need from Transactional Memory", journal = j-SIGPLAN, volume = "52", number = "4", pages = "105--118", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037750", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional memory (TM) has been the focus of numerous studies, and it is supported in processors such as the IBM Blue Gene/Q and Intel Haswell. Many studies have used the STAMP benchmark suite to evaluate their designs. However, the speedups obtained for the STAMP benchmarks on all TM systems we know of are quite limited; for example, with 64 threads on the IBM Blue Gene/Q, we observe a median speedup of 1.4X using the Blue Gene/Q hardware transactional memory (HTM), and a median speedup of 4.1X using a software transactional memory (STM). What limits the performance of these benchmarks on TMs? In this paper, we argue that the problem lies with the programming model and data structures used to write them. To make this point, we articulate two principles that we believe must be embodied in any scalable program and argue that STAMP programs violate both of them. By modifying the STAMP programs to satisfy both principles, we produce a new set of programs that we call the Stampede suite. Its median speedup on the Blue Gene/Q is 8.0X when using an STM. The two principles also permit us to simplify the TM design. Using this new STM with the Stampede benchmarks, we obtain a median speedup of 17.7X with 64 threads on the Blue Gene/Q and 13.2X with 32 threads on an Intel Westmere system. These results suggest that HTM and STM designs will benefit if more attention is paid to the division of labor between application programs, systems software, and hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Trippel:2017:TMM, author = "Caroline Trippel and Yatin A. Manerkar and Daniel Lustig and Michael Pellauer and Margaret Martonosi", title = "{TriCheck}: Memory Model Verification at the Trisection of Software, Hardware, and {ISA}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "119--133", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037719", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Memory consistency models (MCMs) which govern inter-module interactions in a shared memory system, are a significant, yet often under-appreciated, aspect of system design. MCMs are defined at the various layers of the hardware-software stack, requiring thoroughly verified specifications, compilers, and implementations at the interfaces between layers. Current verification techniques evaluate segments of the system stack in isolation, such as proving compiler mappings from a high-level language (HLL) to an ISA or proving validity of a microarchitectural implementation of an ISA. This paper makes a case for full-stack MCM verification and provides a toolflow, TriCheck, capable of verifying that the HLL, compiler, ISA, and implementation collectively uphold MCM requirements. The work showcases TriCheck's ability to evaluate a proposed ISA MCM in order to ensure that each layer and each mapping is correct and complete. Specifically, we apply TriCheck to the open source RISC-V ISA [55], seeking to verify accurate, efficient, and legal compilations from C11. We uncover under-specifications and potential inefficiencies in the current RISC-V ISA documentation and identify possible solutions for each. As an example, we find that a RISC-V-compliant microarchitecture allows 144 outcomes forbidden by C11 to be observed out of 1,701 litmus tests examined. Overall, this paper demonstrates the necessity of full-stack verification for detecting MCM-related bugs in the hardware-software stack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Nalli:2017:APM, author = "Sanketh Nalli and Swapnil Haria and Mark D. Hill and Michael M. Swift and Haris Volos and Kimberly Keeton", title = "An Analysis of Persistent Memory Use with {WHISPER}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "135--148", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037730", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging non-volatile memory (NVM) technologies promise durability with read and write latencies comparable to volatile memory (DRAM). We define Persistent Memory (PM) as NVM accessed with byte addressability at low latency via normal memory instructions. Persistent-memory applications ensure the consistency of persistent data by inserting ordering points between writes to PM allowing the construction of higher-level transaction mechanisms. An epoch is a set of writes to PM between ordering points. To put systems research in PM on a firmer footing, we developed and analyzed a PM benchmark suite called WHISPER (Wisconsin-HP Labs Suite for Persistence) that comprises ten PM applications we gathered to cover all current interfaces to PM. A quantitative analysis reveals several insights: (a) only 4\% of writes in PM-aware applications are to PM and the rest are to volatile memory, (b) software transactions are often implemented with 5 to 50 ordering points (c) 75\% of epochs update exactly one 64B cache line, (d) 80\% of epochs from the same thread depend on previous epochs from the same thread, while few epochs depend on epochs from other threads. Based on our analysis, we propose the Hands-off Persistence System (HOPS) to track updates to PM in hardware. Current hardware design requires applications to force data to PM as each epoch ends. HOPS provides high-level ISA primitives for applications to express durability and ordering constraints separately and enforces them automatically, while achieving 24.3\% better performance over current approaches to persistence.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Zhang:2017:PPD, author = "Tong Zhang and Changhee Jung and Dongyoon Lee", title = "{ProRace}: Practical Data Race Detection for Production Use", journal = j-SIGPLAN, volume = "52", number = "4", pages = "149--162", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037708", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents ProRace, a dynamic data race detector practical for production runs. It is lightweight, but still offers high race detection capability. To track memory accesses, ProRace leverages instruction sampling using the performance monitoring unit (PMU) in commodity processors. Our PMU driver enables ProRace to sample more memory accesses at a lower cost compared to the state-of-the-art Linux driver. Moreover, ProRace uses PMU-provided execution contexts including register states and program path, and reconstructs unsampled memory accesses offline. This technique allows \ProRace to overcome inherent limitations of sampling and improve the detection coverage by performing data race detection on the trace with not only sampled but also reconstructed memory accesses. Experiments using racy production software including apache and mysql shows that, with a reasonable offline cost, ProRace incurs only 2.6\% overhead at runtime with 27.5\% detection probability with a sampling period of 10,000.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Olson:2017:CGM, author = "Lena E. Olson and Mark D. Hill and David A. Wood", title = "Crossing Guard: Mediating Host-Accelerator Coherence Interactions", journal = j-SIGPLAN, volume = "52", number = "4", pages = "163--176", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037715", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Specialized hardware accelerators have performance and energy-efficiency advantages over general-purpose processors. To fully realize these benefits and aid programmability, accelerators may share a physical and virtual address space and full cache coherence with the host system. However, allowing accelerators --- particularly those designed by third parties --- to directly communicate with host coherence protocols poses several problems. Host coherence protocols are complex, vary between companies, and may be proprietary, increasing burden on accelerator designers. Bugs in the accelerator implementation may cause crashes and other serious consequences to the host system. We propose Crossing Guard, a coherence interface between the host coherence system and accelerators. The Crossing Guard interface provides the accelerator designer with a standardized set of coherence messages that are simple enough to aid in design of bug-free coherent caches. At the same time, they are sufficiently complex to allow customized and optimized accelerator caches with performance comparable to using the host protocol. The Crossing Guard hardware is implemented as part of the trusted host, and provides complete safety to the host coherence system, even in the presence of a pathologically buggy accelerator cache.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{McMahan:2017:ASF, author = "Joseph McMahan and Michael Christensen and Lawton Nichols and Jared Roesch and Sung-Yee Guo and Ben Hardekopf and Timothy Sherwood", title = "An Architecture Supporting Formal and Compositional Binary Analysis", journal = j-SIGPLAN, volume = "52", number = "4", pages = "177--191", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037733", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Building a trustworthy life-critical embedded system requires deep reasoning about the potential effects that sequences of machine instructions can have on full system operation. Rather than trying to analyze complete binaries and the countless ways their instructions can interact with one another --- memory, side effects, control registers, implicit state, etc. --- we explore a new approach. We propose an architecture controlled by a thin computational layer designed to tightly correspond with the lambda calculus, drawing on principles of functional programming to bring the assembly much closer to myriad reasoning frameworks, such as the Coq proof assistant. This approach allows assembly-level verified versions of critical code to operate safely in tandem with arbitrary code, including imperative and unverified system components, without the need for large supporting trusted computing bases. We demonstrate that this computational layer can be built in such a way as to simultaneously provide full programmability and compact, precise, and complete semantics, while still using hardware resources comparable to normal embedded systems. To demonstrate the practicality of this approach, our FPGA-implemented prototype runs an embedded medical application which monitors and treats life-threatening arrhythmias. Though the system integrates untrusted and imperative components, our architecture allows for the formal verification of multiple properties of the end-to-end system, including a proof of correctness of the assembly-level implementation of the core algorithm, the integrity of trusted data via a non-interference proof, and a guarantee that our prototype meets critical timing requirements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Hsiao:2017:ASI, author = "Chun-Hung Hsiao and Satish Narayanasamy and Essam Muhammad Idris Khan and Cristiano L. Pereira and Gilles A. Pokam", title = "{AsyncClock}: Scalable Inference of Asynchronous Event Causality", journal = j-SIGPLAN, volume = "52", number = "4", pages = "193--205", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037712", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Asynchronous programming model is commonly used in mobile systems and Web 2.0 environments. Asynchronous race detectors use algorithms that are an order of magnitude performance and space inefficient compared to conventional data race detectors. We solve this problem by identifying and addressing two important problems in reasoning about causality between asynchronous events. Unlike conventional signal-wait operations, establishing causal order between two asynchronous events is fundamentally more challenging as there is no common handle they operate on. We propose a new primitive named AsyncClock that addresses this problem by explicitly tracking causally preceding events, and show that AsyncClock can handle a wide variety of asynchronous causality models. We also address the important scalability problem of efficiently identifying heirless events whose metadata can be reclaimed. We built the first single-pass, non-graph-based Android race detector using our algorithm and applied it to find errors in 20 popular applications. Our tool incurs about 6x performance overhead, which is several times more efficient than the state-of-the-art solution. It also scales well with the execution length. We used our tool to find 147 previously unknown harmful races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Calciu:2017:BBC, author = "Irina Calciu and Siddhartha Sen and Mahesh Balakrishnan and Marcos K. Aguilera", title = "Black-box Concurrent Data Structures for {NUMA} Architectures", journal = j-SIGPLAN, volume = "52", number = "4", pages = "207--221", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037721", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High-performance servers are Non-Uniform Memory Access (NUMA) machines. To fully leverage these machines, programmers need efficient concurrent data structures that are aware of the NUMA performance artifacts. We propose Node Replication (NR), a black-box approach to obtaining such data structures. NR takes an arbitrary sequential data structure and automatically transforms it into a NUMA-aware concurrent data structure satisfying linearizability. Using NR requires no expertise in concurrent data structure design, and the result is free of concurrency bugs. NR draws ideas from two disciplines: shared-memory algorithms and distributed systems. Briefly, NR implements a NUMA-aware shared log, and then uses the log to replicate data structures consistently across NUMA nodes. NR is best suited for contended data structures, where it can outperform lock-free algorithms by 3.1x, and lock-based solutions by 30x. To show the benefits of NR to a real application, we apply NR to the data structures of Redis, an in-memory storage system. The result outperforms other methods by up to 14x. The cost of NR is additional memory for its log and replicas.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Vora:2017:CCR, author = "Keval Vora and Chen Tian and Rajiv Gupta and Ziang Hu", title = "{CoRAL}: Confined Recovery in Distributed Asynchronous Graph Processing", journal = j-SIGPLAN, volume = "52", number = "4", pages = "223--236", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037747", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing distributed asynchronous graph processing systems employ checkpointing to capture globally consistent snapshots and rollback all machines to most recent checkpoint to recover from machine failures. In this paper we argue that recovery in distributed asynchronous graph processing does not require the entire execution state to be rolled back to a globally consistent state due to the relaxed asynchronous execution semantics. We define the properties required in the recovered state for it to be usable for correct asynchronous processing and develop CoRAL, a lightweight checkpointing and recovery algorithm. First, this algorithm carries out confined recovery that only rolls back graph execution states of the failed machines to affect recovery. Second, it relies upon lightweight checkpoints that capture locally consistent snapshots with a reduced peak network bandwidth requirement. Our experiments using real-world graphs show that our technique recovers from failures and finishes processing 1.5x to 3.2x faster compared to the traditional asynchronous checkpointing and recovery mechanism when failures impact 1 to 6 machines of a 16 machine cluster. Moreover, capturing locally consistent snapshots significantly reduces intermittent high peak bandwidth usage required to save the snapshots --- the average reduction in 99th percentile bandwidth ranges from 22\% to 51\% while 1 to 6 snapshot replicas are being maintained.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Vora:2017:KFA, author = "Keval Vora and Rajiv Gupta and Guoqing Xu", title = "{KickStarter}: Fast and Accurate Computations on Streaming Graphs via Trimmed Approximations", journal = j-SIGPLAN, volume = "52", number = "4", pages = "237--251", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037748", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Continuous processing of a streaming graph maintains an approximate result of the iterative computation on a recent version of the graph. Upon a user query, the accurate result on the current graph can be quickly computed by feeding the approximate results to the iterative computation --- a form of incremental computation that corrects the (small amount of) error in the approximate result. Despite the effectiveness of this approach in processing growing graphs, it is generally not applicable when edge deletions are present --- existing approximations can lead to either incorrect results (e.g., monotonic computations terminate at an incorrect minima/maxima) or poor performance (e.g., with approximations, convergence takes longer than performing the computation from scratch). This paper presents KickStarter, a runtime technique that can trim the approximate values for a subset of vertices impacted by the deleted edges. The trimmed approximation is both safe and profitable, enabling the computation to produce correct results and converge quickly. KickStarter works for a class of monotonic graph algorithms and can be readily incorporated in any existing streaming graph system. Our experiments with four streaming algorithms on five large graphs demonstrate that trimming not only produces correct results but also accelerates these algorithms by 8.5--23.7x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Powers:2017:BBG, author = "Bobby Powers and John Vilk and Emery D. Berger", title = "{Browsix}: Bridging the Gap Between {Unix} and the Browser", journal = j-SIGPLAN, volume = "52", number = "4", pages = "253--266", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037727", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Applications written to run on conventional operating systems typically depend on OS abstractions like processes, pipes, signals, sockets, and a shared file system. Porting these applications to the web currently requires extensive rewriting or hosting significant portions of code server-side because browsers present a nontraditional runtime environment that lacks OS functionality. This paper presents Browsix, a framework that bridges the considerable gap between conventional operating systems and the browser, enabling unmodified programs expecting a Unix-like environment to run directly in the browser. Browsix comprises two core parts: (1) a JavaScript-only system that makes core Unix features (including pipes, concurrent processes, signals, sockets, and a shared file system) available to web applications; and (2) extended JavaScript runtimes for C, C++, Go, and Node.js that support running programs written in these languages as processes in the browser. Browsix supports running a POSIX shell, making it straightforward to connect applications together via pipes. We illustrate Browsix's capabilities via case studies that demonstrate how it eases porting legacy applications to the browser and enables new functionality. We demonstrate a Browsix-enabled LaTeX editor that operates by executing unmodified versions of pdfLaTeX and BibTeX. This browser-only LaTeX editor can render documents in seconds, making it fast enough to be practical. We further demonstrate how Browsix lets us port a client-server application to run entirely in the browser for disconnected operation. Creating these applications required less than 50 lines of glue code and no code modifications, demonstrating how easily Browsix can be used to build sophisticated web applications from existing parts without modification.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Rajbhandari:2017:OCM, author = "Samyam Rajbhandari and Yuxiong He and Olatunji Ruwase and Michael Carbin and Trishul Chilimbi", title = "Optimizing {CNNs} on Multicores for Scalability, Performance and Goodput", journal = j-SIGPLAN, volume = "52", number = "4", pages = "267--280", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037745", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Convolutional Neural Networks (CNN) are a class of Artificial Neural Networks (ANN) that are highly efficient at the pattern recognition tasks that underlie difficult AI problems in a variety of domains, such as speech recognition, object recognition, and natural language processing. CNNs are, however, computationally intensive to train. This paper presents the first characterization of the performance optimization opportunities for training CNNs on CPUs. Our characterization includes insights based on the structure of the network itself (i.e., intrinsic arithmetic intensity of the convolution and its scalability under parallelism) as well as dynamic properties of its execution (i.e., sparsity of the computation). Given this characterization, we present an automatic framework called spg-CNN for optimizing CNN training on CPUs. It comprises of a computation scheduler for efficient parallel execution, and two code generators: one that optimizes for sparsity, and the other that optimizes for spatial reuse in convolutions. We evaluate spg-CNN using convolutions from a variety of real world benchmarks, and show that spg-CNN can train CNNs faster than state-of-the-art approaches by an order of magnitude.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Sundararajah:2017:LTN, author = "Kirshanthan Sundararajah and Laith Sakka and Milind Kulkarni", title = "Locality Transformations for Nested Recursive Iteration Spaces", journal = j-SIGPLAN, volume = "52", number = "4", pages = "281--295", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037720", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There has been a significant amount of effort invested in designing scheduling transformations such as loop tiling and loop fusion that rearrange the execution of dynamic instances of loop nests to place operations that access the same data close together temporally. In recent years, there has been interest in designing similar transformations that operate on recursive programs, but until now these transformations have only considered simple scenarios: multiple recursions to be fused, or a recursion nested inside a simple loop. This paper develops the first set of scheduling transformations for nested recursions: recursive methods that call other recursive methods. These are the recursive analog to nested loops. We present a transformation called recursion twisting that automatically improves locality at all levels of the memory hierarchy, and show that this transformation can yield substantial performance improvements across several benchmarks that exhibit nested recursion.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Li:2017:LAC, author = "Ang Li and Shuaiwen Leon Song and Weifeng Liu and Xu Liu and Akash Kumar and Henk Corporaal", title = "Locality-Aware {CTA} Clustering for Modern {GPUs}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "297--311", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037709", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cache is designed to exploit locality; however, the role of on-chip L1 data caches on modern GPUs is often awkward. The locality among global memory requests from different SMs (Streaming Multiprocessors) is predominantly harvested by the commonly-shared L2 with long access latency; while the in-core locality, which is crucial for performance delivery, is handled explicitly by user-controlled scratchpad memory. In this work, we disclose another type of data locality that has been long ignored but with performance boosting potential --- the inter-CTA locality. Exploiting such locality is rather challenging due to unclear hardware feasibility, unknown and inaccessible underlying CTA scheduler, and small in-core cache capacity. To address these issues, we first conduct a thorough empirical exploration on various modern GPUs and demonstrate that inter-CTA locality can be harvested, both spatially and temporally, on L1 or L1/Tex unified cache. Through further quantification process, we prove the significance and commonality of such locality among GPU applications, and discuss whether such reuse is exploitable. By leveraging these insights, we propose the concept of CTA-Clustering and its associated software-based techniques to reshape the default CTA scheduling in order to group the CTAs with potential reuse together on the same SM. Our techniques require no hardware modification and can be directly deployed on existing GPUs. In addition, we incorporate these techniques into an integrated framework for automatic inter-CTA locality optimization. We evaluate our techniques using a wide range of popular GPU applications on all modern generations of NVIDIA GPU architectures. The results show that our proposed techniques significantly improve cache performance through reducing L2 cache transactions by 55\%, 65\%, 29\%, 28\% on average for Fermi, Kepler, Maxwell and Pascal, respectively, leading to an average of 1.46x, 1.48x, 1.45x, 1.41x (up to 3.8x, 3.6x, 3.1x, 3.3x) performance speedups for applications with algorithm-related inter-CTA reuse.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Churchill:2017:SLS, author = "Berkeley Churchill and Rahul Sharma and J. F. Bastien and Alex Aiken", title = "Sound Loop Superoptimization for {Google Native Client}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "313--326", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software fault isolation (SFI) is an important technique for the construction of secure operating systems, web browsers, and other extensible software. We demonstrate that superoptimization can dramatically improve the performance of Google Native Client, a SFI system that ships inside the Google Chrome Browser. Key to our results are new techniques for superoptimization of loops: we propose a new architecture for superoptimization tools that incorporates both a fully sound verification technique to ensure correctness and a bounded verification technique to guide the search to optimized code. In our evaluation we optimize 13 libc string functions, formally verify the correctness of the optimizations and report a median and average speedup of 25\% over the libraries shipped by Google.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Bianchini:2017:IDE, author = "Ricardo Bianchini", title = "Improving Datacenter Efficiency", journal = j-SIGPLAN, volume = "52", number = "4", pages = "327--327", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3046426", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Internet companies can improve datacenter efficiency and reduce costs, by minimizing resource waste while avoiding (or limiting) performance degradation. In this talk, I will first overview a few of the efficiency-related efforts we are undertaking at Microsoft, including leveraging workload history to improve resource management. I will then discuss some lessons from deploying these efforts in production and how they relate to academic research.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Liu:2017:DBD, author = "Mengxing Liu and Mingxing Zhang and Kang Chen and Xuehai Qian and Yongwei Wu and Weimin Zheng and Jinglei Ren", title = "{DudeTM}: Building Durable Transactions with Decoupling for Persistent Memory", journal = j-SIGPLAN, volume = "52", number = "4", pages = "329--343", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037714", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging non-volatile memory (NVM) offers non-volatility, byte-addressability and fast access at the same time. To make the best use of these properties, it has been shown by empirical evidence that programs should access NVM directly through CPU load and store instructions, so that the overhead of a traditional file system or database can be avoided. Thus, durable transactions become a common choice of applications for accessing persistent memory data in a crash consistent manner. However, existing durable transaction systems employ either undo logging, which requires a fence for every memory write, or redo logging, which requires intercepting all memory reads within transactions. This paper presents DUDETM, a crash-consistent durable transaction system that avoids the drawbacks of both undo logging and redo logging. DUDETM uses shadow DRAM to decouple the execution of a durable transaction into three fully asynchronous steps. The advantage is that only minimal fences and no memory read instrumentation are required. This design also enables an out-of-the-box transactional memory (TM) to be used as an independent component in our system. The evaluation results show that DUDETM adds durability to a TM system with only 7.4 ~ 24.6\% throughput degradation. Compared to the existing durable transaction systems, DUDETM provides 1.7times to 4.4times higher throughput. Moreover, DUDETM can be implemented with existing hardware TMs with minor hardware modifications, leading to a further 1.7times speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Klimovic:2017:RRF, author = "Ana Klimovic and Heiner Litz and Christos Kozyrakis", title = "{ReFlex}: Remote Flash $ \approx $ Local Flash", journal = j-SIGPLAN, volume = "52", number = "4", pages = "345--359", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037732", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Remote access to NVMe Flash enables flexible scaling and high utilization of Flash capacity and IOPS within a datacenter. However, existing systems for remote Flash access either introduce significant performance overheads or fail to isolate the multiple remote clients sharing each Flash device. We present ReFlex, a software-based system for remote Flash access, that provides nearly identical performance to accessing local Flash. ReFlex uses a dataplane kernel to closely integrate networking and storage processing to achieve low latency and high throughput at low resource requirements. Specifically, ReFlex can serve up to 850K IOPS per core over TCP/IP networking, while adding 21us over direct access to local Flash. ReFlex uses a QoS scheduler that can enforce tail latency and throughput service-level objectives (SLOs) for thousands of remote clients. We show that ReFlex allows applications to use remote Flash while maintaining their original performance with local Flash.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Jevdjic:2017:ASC, author = "Djordje Jevdjic and Karin Strauss and Luis Ceze and Henrique S. Malvar", title = "Approximate Storage of Compressed and Encrypted Videos", journal = j-SIGPLAN, volume = "52", number = "4", pages = "361--373", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037718", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The popularization of video capture devices has created strong storage demand for encoded videos. Approximate storage can ease this demand by enabling denser storage at the expense of occasional errors. Unfortunately, even minor storage errors, such as bit flips, can result in major visual damage in encoded videos. Similarly, video encryption, widely employed for privacy and digital rights management, may create long dependencies between bits that show little or no tolerance to storage errors. In this paper we propose VideoApp, a novel and efficient methodology to compute bit-level reliability requirements for encoded videos by tracking visual and metadata dependencies within encoded bitstreams. We further show how VideoApp can be used to trade video quality for storage density in an optimal way. We integrate our methodology into a popular H.264 encoder to partition an encoded video stream into multiple streams that can receive different levels of error correction according to their reliability needs. When applied to a dense and highly error-prone multi-level cell storage substrate, our variable error correction mechanism reduces the error correction overhead by half under the most error-intolerant encoder settings, achieving quality/density points that neither compression nor approximation can achieve alone. Finally, we define the basic invariants needed to support encrypted approximate video storage. We present an analysis of block cipher modes of operation, showing that some are fully compatible with approximation, enabling approximate and secure video storage systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Elyasi:2017:EIR, author = "Nima Elyasi and Mohammad Arjomand and Anand Sivasubramaniam and Mahmut T. Kandemir and Chita R. Das and Myoungsoo Jung", title = "Exploiting Intra-Request Slack to Improve {SSD} Performance", journal = j-SIGPLAN, volume = "52", number = "4", pages = "375--388", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037728", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With Solid State Disks (SSDs) offering high degrees of parallelism, SSD controllers place data and direct requests to exploit the maximum offered hardware parallelism. In the quest to maximize parallelism and utilization, sub-requests of a request that are directed to different flash chips by the scheduler can experience differential wait times since their individual queues are not coordinated and load balanced at all times. Since the macro request is considered complete only when its last sub-request completes, some of its sub-requests that complete earlier have to necessarily wait for this last sub-request. This paper opens the door to a new class of schedulers to leverage such slack between sub-requests in order to improve response times. Specifically, the paper presents the design and implementation of a slack-enabled re-ordering scheduler, called Slacker, for sub-requests issued to each flash chip. Layered under a modern SSD request scheduler, Slacker estimates the slack of each incoming sub-request to a flash chip and allows them to jump ahead of existing sub-requests with sufficient slack so as to not detrimentally impact their response times. Slacker is simple to implement and imposes only marginal additions to the hardware. Using a spectrum of 21 workloads with diverse read-write characteristics, we show that Slacker provides as much as 19.5\%, 13\% and 14.5\% improvement in response times, with average improvements of 12\%, 6.5\% and 8.5\%, for write-intensive, read-intensive and read-write balanced workloads, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Wang:2017:GSM, author = "Kai Wang and Aftab Hussain and Zhiqiang Zuo and Guoqing Xu and Ardalan Amiri Sani", title = "{Graspan}: a Single-machine Disk-based Graph System for Interprocedural Static Analyses of Large-scale Systems Code", journal = j-SIGPLAN, volume = "52", number = "4", pages = "389--404", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037744", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "There is more than a decade-long history of using static analysis to find bugs in systems such as Linux. Most of the existing static analyses developed for these systems are simple checkers that find bugs based on pattern matching. Despite the presence of many sophisticated interprocedural analyses, few of them have been employed to improve checkers for systems code due to their complex implementations and poor scalability. In this paper, we revisit the scalability problem of interprocedural static analysis from a ``Big Data'' perspective. That is, we turn sophisticated code analysis into Big Data analytics and leverage novel data processing techniques to solve this traditional programming language problem. We develop Graspan, a disk-based parallel graph system that uses an edge-pair centric computation model to compute dynamic transitive closures on very large program graphs. We implement context-sensitive pointer/alias and dataflow analyses on Graspan. An evaluation of these analyses on large codebases such as Linux shows that their Graspan implementations scale to millions of lines of code and are much simpler than their original implementations. Moreover, we show that these analyses can be used to augment the existing checkers; these augmented checkers uncovered 132 new NULL pointer bugs and 1308 unnecessary NULL tests in Linux 4.4.0-rc5, PostgreSQL 8.3.9, and Apache httpd 2.2.18.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Ren:2017:SDH, author = "Ao Ren and Zhe Li and Caiwen Ding and Qinru Qiu and Yanzhi Wang and Ji Li and Xuehai Qian and Bo Yuan", title = "{SC-DCNN}: Highly-Scalable Deep Convolutional Neural Network using Stochastic Computing", journal = j-SIGPLAN, volume = "52", number = "4", pages = "405--418", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037746", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the recent advance of wearable devices and Internet of Things (IoTs), it becomes attractive to implement the Deep Convolutional Neural Networks (DCNNs) in embedded and portable systems. Currently, executing the software-based DCNNs requires high-performance servers, restricting the widespread deployment on embedded and mobile IoT devices. To overcome this obstacle, considerable research efforts have been made to develop highly-parallel and specialized DCNN accelerators using GPGPUs, FPGAs or ASICs. Stochastic Computing (SC), which uses a bit-stream to represent a number within [-1, 1] by counting the number of ones in the bit-stream, has high potential for implementing DCNNs with high scalability and ultra-low hardware footprint. Since multiplications and additions can be calculated using AND gates and multiplexers in SC, significant reductions in power (energy) and hardware footprint can be achieved compared to the conventional binary arithmetic implementations. The tremendous savings in power (energy) and hardware resources allow immense design space for enhancing scalability and robustness for hardware DCNNs. This paper presents SC-DCNN, the first comprehensive design and optimization framework of SC-based DCNNs, using a bottom-up approach. We first present the designs of function blocks that perform the basic operations in DCNN, including inner product, pooling, and activation function. Then we propose four designs of feature extraction blocks, which are in charge of extracting features from input feature maps, by connecting different basic function blocks with joint optimization. Moreover, the efficient weight storage methods are proposed to reduce the area and power (energy) consumption. Putting all together, with feature extraction blocks carefully selected, SC-DCNN is holistically optimized to minimize area and power (energy) consumption while maintaining high network accuracy. Experimental results demonstrate that the LeNet5 implemented in SC-DCNN consumes only 17 mm$^2$ area and 1.53 W power, achieves throughput of 781250 images/s, area efficiency of 45946 images/s/ mm$^2$, and energy efficiency of 510734 images/J.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Ajay:2017:GIL, author = "Jerry Ajay and Chen Song and Aditya Singh Rathore and Chi Zhou and Wenyao Xu", title = "{$3$DGates}: an Instruction-Level Energy Analysis and Optimization of {$3$D} Printers", journal = j-SIGPLAN, volume = "52", number = "4", pages = "419--433", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037752", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As the next-generation manufacturing driven force, 3D printing technology is having a transformative effect on various industrial domains and has been widely applied in a broad spectrum of applications. It also progresses towards other versatile fields with portable battery-powered 3D printers working on a limited energy budget. While reducing manufacturing energy is an essential challenge in industrial sustainability and national economics, this growing trend motivates us to explore the energy consumption of the 3D printer for the purpose of energy efficiency. To this end, we perform an in-depth analysis of energy consumption in commercial, off-the-shelf 3D printers from an instruction-level perspective. We build an instruction-level energy model and an energy profiler to analyze the energy cost during the fabrication process. From the insights obtained by the energy profiler, we propose and implement a cross-layer energy optimization solution, called 3DGates, which spans the instruction-set, the compiler and the firmware. We evaluate 3DGates over 338 benchmarks on a 3D printer and achieve an overall energy reduction of 25\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Cox:2017:EAT, author = "Guilherme Cox and Abhishek Bhattacharjee", title = "Efficient Address Translation for Architectures with Multiple Page Sizes", journal = j-SIGPLAN, volume = "52", number = "4", pages = "435--448", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037704", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Processors and operating systems (OSes) support multiple memory page sizes. Superpages increase Translation Lookaside Buffer (TLB) hits, while small pages provide fine-grained memory protection. Ideally, TLBs should perform well for any distribution of page sizes. In reality, set-associative TLBs --- used frequently for their energy efficiency compared to fully-associative TLBs --- cannot (easily) support multiple page sizes concurrently. Instead, commercial systems typically implement separate set-associative TLBs for different page sizes. This means that when superpages are allocated aggressively, TLB misses may, counter intuitively, increase even if entries for small pages remain unused (and vice-versa). We invent MIX TLBs, energy-frugal set-associative structures that concurrently support all page sizes by exploiting superpage allocation patterns. MIX TLBs boost the performance (often by 10-30\%) of big-memory applications on native CPUs, virtualized CPUs, and GPUs. MIX TLBs are simple and require no OS or program changes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Lesokhin:2017:PFS, author = "Ilya Lesokhin and Haggai Eran and Shachar Raindel and Guy Shapiro and Sagi Grimberg and Liran Liss and Muli Ben-Yehuda and Nadav Amit and Dan Tsafrir", title = "Page Fault Support for Network Controllers", journal = j-SIGPLAN, volume = "52", number = "4", pages = "449--466", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037710", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Direct network I/O allows network controllers (NICs) to expose multiple instances of themselves, to be used by untrusted software without a trusted intermediary. Direct I/O thus frees researchers from legacy software, fueling studies that innovate in multitenant setups. Such studies, however, overwhelmingly ignore one serious problem: direct memory accesses (DMAs) of NICs disallow page faults, forcing systems to either pin entire address spaces to physical memory and thereby hinder memory utilization, or resort to APIs that pin/unpin memory buffers before/after they are DMAed, which complicates the programming model and hampers performance. We solve this problem by designing and implementing page fault support for InfiniBand and Ethernet NICs. A main challenge we tackle---unique to NICs---is handling receive DMAs that trigger page faults, leaving the NIC without memory to store the incoming data. We demonstrate that our solution provides all the benefits associated with ``regular'' virtual memory, notably (1) a simpler programming model that rids users from the need to pin, and (2) the ability to employ all the canonical memory optimizations, such as memory overcommitment and demand-paging based on actual use. We show that, as a result, benchmark performance improves by up to 1.9x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Hu:2017:TFC, author = "Yang Hu and Mingcong Song and Tao Li", title = "Towards {``Full Containerization''} in Containerized Network Function Virtualization", journal = j-SIGPLAN, volume = "52", number = "4", pages = "467--481", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037713", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With exploding traffic stuffing existing network infra-structure, today's telecommunication and cloud service providers resort to Network Function Virtualization (NFV) for greater agility and economics. Pioneer service provider such as AT{\&}T proposes to adopt container in NFV to achieve shorter Virtualized Network Function (VNF) provisioning time and better runtime performance. However, we characterize typical NFV work-loads on the containers and find that the performance is unsatisfactory. We observe that the shared host OS net-work stack is the main bottleneck, where the traffic flow processing involves a large amount of intermediate memory buffers and results in significant last level cache pollution. Existing OS memory allocation policies fail to exploit the locality and data sharing information among buffers. In this paper, we propose NetContainer, a software framework that achieves fine-grained hardware resource management for containerized NFV platform. NetContainer employs a cache access overheads guided page coloring scheme to coordinately address the inter-flow cache access overheads and intra-flow cache access overheads. It maps the memory buffer pages that manifest low cache access overheads (across a flow or among the flows) to the same last level cache partition. NetContainer exploits a footprint theory based method to estimate the cache access overheads and a Min-Cost Max-Flow model to guide the memory buffer mappings. We implement the NetContainer in Linux kernel and extensively evaluate it with real NFV workloads. Exper-imental results show that NetContainer outperforms conventional page coloring-based memory allocator by 48\% in terms of successful call rate.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Wu:2017:FEF, author = "Bo Wu and Xu Liu and Xiaobo Zhou and Changjun Jiang", title = "{FLEP}: Enabling Flexible and Efficient Preemption on {GPUs}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "483--496", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037742", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "GPUs are widely adopted in HPC and cloud computing platforms to accelerate general-purpose workloads. However, modern GPUs do not support flexible preemption, leading to performance and priority inversion problems in multi-tasking environments. In this paper, we propose and develop FLEP, the first software system that enables flexible kernel preemption and kernel scheduling on commodity GPUs. The FLEP compilation engine transforms the GPU program into preemptable forms, which can be interrupted during execution and yield all or part of the streaming multi-processors (SMs) in the GPU. The FLEP runtime engine intercepts all kernel invocations and determines which kernels and how those kernels should be preempted and scheduled. Experimental results on two-kernel co-runs demonstrate up to 24.2X speedup for high-priority kernels and up to 27X improvement on normalized average turnaround time for kernels with the same priority. FLEP reduces the preemption latency by up to 41\% compared to yielding the whole GPU when the waiting kernels only need several SMs. With all the benefits, FLEP only introduces 2.5\% runtime overhead, which is substantially lower than the kernel slicing approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Li:2017:SSA, author = "Kaiwei Li and Jianfei Chen and Wenguang Chen and Jun Zhu", title = "{SaberLDA}: Sparsity-Aware Learning of Topic Models on {GPUs}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "497--509", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037740", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Latent Dirichlet Allocation (LDA) is a popular tool for analyzing discrete count data such as text and images. Applications require LDA to handle both large datasets and a large number of topics. Though distributed CPU systems have been used, GPU-based systems have emerged as a promising alternative because of the high computational power and memory bandwidth of GPUs. However, existing GPU-based LDA systems cannot support a large number of topics because they use algorithms on dense data structures whose time and space complexity is linear to the number of topics. In this paper, we propose SaberLDA, a GPU-based LDA system that implements a sparsity-aware algorithm to achieve sublinear time complexity and scales well to learn a large number of topics. To address the challenges introduced by sparsity, we propose a novel data layout, a new warp-based sampling kernel, and an efficient sparse count matrix updating algorithm that improves locality, makes efficient utilization of GPU warps, and reduces memory consumption. Experiments show that SaberLDA can learn from billions-token-scale data with up to 10,000 topics, which is almost two orders of magnitude larger than that of the previous GPU-based systems. With a single GPU card, SaberLDA is able to learn 10,000 topics from a dataset of billions of tokens in a few hours, which is only achievable with clusters with tens of machines before.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Khazraee:2017:MNO, author = "Moein Khazraee and Lu Zhang and Luis Vega and Michael Bedford Taylor", title = "{Moonwalk}: {NRE} Optimization in {ASIC} Clouds", journal = j-SIGPLAN, volume = "52", number = "4", pages = "511--526", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037749", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud services are becoming increasingly globalized and data-center workloads are expanding exponentially. GPU and FPGA-based clouds have illustrated improvements in power and performance by accelerating compute-intensive workloads. ASIC-based clouds are a promising way to optimize the Total Cost of Ownership (TCO) of a given datacenter computation (e.g. YouTube transcoding) by reducing both energy consumption and marginal computation cost. The feasibility of an ASIC Cloud for a particular application is directly gated by the ability to manage the Non-Recurring Engineering (NRE) costs of designing and fabricating the ASIC, so that it is significantly lower (e.g. 2X) than the TCO of the best available alternative. In this paper, we show that technology node selection is a major tool for managing ASIC Cloud NRE, and allows the designer to trade off an accelerator's excess energy efficiency and cost performance for lower total cost. We explore NRE and cross-technology optimization of ASIC Clouds for four different applications: Bitcoin mining, YouTube-style video transcoding, Litecoin, and Deep Learning. We address these challenges and show large reductions in the NRE, potentially enabling ASIC Clouds to address a wider variety of datacenter workloads. Our results suggest that advanced nodes like 16nm will lead to sub-optimal TCO for many workloads, and that use of older nodes like 65nm can enable a greater diversity of ASIC Clouds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Park:2017:DRM, author = "Jason Jong Kyu Park and Yongjun Park and Scott Mahlke", title = "Dynamic Resource Management for Efficient Utilization of Multitasking {GPUs}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "527--540", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037707", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As graphics processing units (GPUs) are broadly adopted, running multiple applications on a GPU at the same time is beginning to attract wide attention. Recent proposals on multitasking GPUs have focused on either spatial multitasking, which partitions GPU resource at a streaming multiprocessor (SM) granularity, or simultaneous multikernel (SMK), which runs multiple kernels on the same SM. However, multitasking performance varies heavily depending on the resource partitions within each scheme, and the application mixes. In this paper, we propose GPU Maestro that performs dynamic resource management for efficient utilization of multitasking GPUs. GPU Maestro can discover the best performing GPU resource partition exploiting both spatial multitasking and SMK. Furthermore, dynamism within a kernel and interference between the kernels are automatically considered because GPU Maestro finds the best performing partition through direct measurements. Evaluations show that GPU Maestro can improve average system throughput by 20.2\% and 13.9\% over the baseline spatial multitasking and SMK, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Zhang:2017:ISC, author = "Rui Zhang and Natalie Stanley and Christopher Griggs and Andrew Chi and Cynthia Sturton", title = "Identifying Security Critical Properties for the Dynamic Verification of a Processor", journal = j-SIGPLAN, volume = "52", number = "4", pages = "541--554", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037734", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a methodology for identifying security critical properties for use in the dynamic verification of a processor. Such verification has been shown to be an effective way to prevent exploits of vulnerabilities in the processor, given a meaningful set of security properties. We use known processor errata to establish an initial set of security-critical invariants of the processor. We then use machine learning to infer an additional set of invariants that are not tied to any particular, known vulnerability, yet are critical to security. We build a tool chain implementing the approach and evaluate it for the open-source OR1200 RISC processor. We find that our tool can identify 19 (86.4\%) of the 22 manually crafted security-critical properties from prior work and generates 3 new security properties not covered in prior work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Ferraiuolo:2017:VPH, author = "Andrew Ferraiuolo and Rui Xu and Danfeng Zhang and Andrew C. Myers and G. Edward Suh", title = "Verification of a Practical Hardware Security Architecture Through Static Information Flow Analysis", journal = j-SIGPLAN, volume = "52", number = "4", pages = "555--568", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037739", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hardware-based mechanisms for software isolation are becoming increasingly popular, but implementing these mechanisms correctly has proved difficult, undermining the root of security. This work introduces an effective way to formally verify important properties of such hardware security mechanisms. In our approach, hardware is developed using a lightweight security-typed hardware description language (HDL) that performs static information flow analysis. We show the practicality of our approach by implementing and verifying a simplified but realistic multi-core prototype of the ARM TrustZone architecture. To make the security-typed HDL expressive enough to verify a realistic processor, we develop new type system features. Our experiments suggest that information flow analysis is efficient, and programmer effort is modest. We also show that information flow constraints are an effective way to detect hardware vulnerabilities, including several found in commercial processors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Chisnall:2017:CJS, author = "David Chisnall and Brooks Davis and Khilan Gudka and David Brazdil and Alexandre Joannou and Jonathan Woodruff and A. Theodore Markettos and J. Edward Maste and Robert Norton and Stacey Son and Michael Roe and Simon W. Moore and Peter G. Neumann and Ben Laurie and Robert N. M. Watson", title = "{CHERI JNI}: Sinking the {Java} Security Model into the {C}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "569--583", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037725", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Java provides security and robustness by building a high-level security model atop the foundation of memory protection. Unfortunately, any native code linked into a Java program --- including the million lines used to implement the standard library --- is able to bypass both the memory protection and the higher-level policies. We present a hardware-assisted implementation of the Java native code interface, which extends the guarantees required for Java's security model to native code. Our design supports safe direct access to buffers owned by the JVM, including hardware-enforced read-only access where appropriate. We also present Java language syntax to declaratively describe isolated compartments for native code. We show that it is possible to preserve the memory safety and isolation requirements of the Java security model in C code, allowing native code to run in the same process as Java code with the same impact on security as running equivalent Java code. Our approach has a negligible impact on performance, compared with the existing unsafe native code interface. We demonstrate a prototype implementation running on the CHERI microprocessor synthesized in FPGA.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Ge:2017:GGC, author = "Xinyang Ge and Weidong Cui and Trent Jaeger", title = "{GRIFFIN}: Guarding Control Flows Using {Intel} Processor Trace", journal = j-SIGPLAN, volume = "52", number = "4", pages = "585--598", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037716", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Researchers are actively exploring techniques to enforce control-flow integrity (CFI), which restricts program execution to a predefined set of targets for each indirect control transfer to prevent code-reuse attacks. While hardware-assisted CFI enforcement may have the potential for advantages in performance and flexibility over software instrumentation, current hardware-assisted defenses are either incomplete (i.e., do not enforce all control transfers) or less efficient in comparison. We find that the recent introduction of hardware features to log complete control-flow traces, such as Intel Processor Trace (PT), provides an opportunity to explore how efficient and flexible a hardware-assisted CFI enforcement system may become. While Intel PT was designed to aid in offline debugging and failure diagnosis, we explore its effectiveness for online CFI enforcement over unmodified binaries by designing a parallelized method for enforcing various types of CFI policies. We have implemented a prototype called GRIFFIN in the Linux 4.2 kernel that enables complete CFI enforcement over a variety of software, including the Firefox browser and its jitted code. Our experiments show that GRIFFIN can enforce fine-grained CFI policies with shadow stack as recommended by researchers at a performance that is comparable to software-only instrumentation techniques. In addition, we find that alternative logging approaches yield significant performance improvements for trace processing, identifying opportunities for further hardware assistance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Delimitrou:2017:BKW, author = "Christina Delimitrou and Christos Kozyrakis", title = "{Bolt}: {I} Know What You Did Last Summer\ldots{} In The Cloud", journal = j-SIGPLAN, volume = "52", number = "4", pages = "599--613", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037703", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud providers routinely schedule multiple applications per physical host to increase efficiency. The resulting interference on shared resources often leads to performance degradation and, more importantly, security vulnerabilities. Interference can leak important information ranging from a service's placement to confidential data, like private keys. We present Bolt, a practical system that accurately detects the type and characteristics of applications sharing a cloud platform based on the interference an adversary sees on shared resources. Bolt leverages online data mining techniques that only require 2-5 seconds for detection. In a multi-user study on EC2, Bolt correctly identifies the characteristics of 385 out of 436 diverse workloads. Extracting this information enables a wide spectrum of previously-impractical cloud attacks, including denial of service attacks (DoS) that increase tail latency by 140x, as well as resource freeing (RFA) and co-residency attacks. Finally, we show that while advanced isolation mechanisms, such as cache partitioning lower detection accuracy, they are insufficient to eliminate these vulnerabilities altogether. To do so, one must either disallow core sharing, or only allow it between threads of the same application, leading to significant inefficiencies and performance penalties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Kang:2017:NCI, author = "Yiping Kang and Johann Hauswald and Cao Gao and Austin Rovinski and Trevor Mudge and Jason Mars and Lingjia Tang", title = "Neurosurgeon: Collaborative Intelligence Between the Cloud and Mobile Edge", journal = j-SIGPLAN, volume = "52", number = "4", pages = "615--629", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037698", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The computation for today's intelligent personal assistants such as Apple Siri, Google Now, and Microsoft Cortana, is performed in the cloud. This cloud-only approach requires significant amounts of data to be sent to the cloud over the wireless network and puts significant computational pressure on the datacenter. However, as the computational resources in mobile devices become more powerful and energy efficient, questions arise as to whether this cloud-only processing is desirable moving forward, and what are the implications of pushing some or all of this compute to the mobile devices on the edge. In this paper, we examine the status quo approach of cloud-only processing and investigate computation partitioning strategies that effectively leverage both the cycles in the cloud and on the mobile device to achieve low latency, low energy consumption, and high datacenter throughput for this class of intelligent applications. Our study uses 8 intelligent applications spanning computer vision, speech, and natural language domains, all employing state-of-the-art Deep Neural Networks (DNNs) as the core machine learning technique. We find that given the characteristics of DNN algorithms, a fine-grained, layer-level computation partitioning strategy based on the data and computation variations of each layer within a DNN has significant latency and energy advantages over the status quo approach. Using this insight, we design Neurosurgeon, a lightweight scheduler to automatically partition DNN computation between mobile devices and datacenters at the granularity of neural network layers. Neurosurgeon does not require per-application profiling. It adapts to various DNN architectures, hardware platforms, wireless networks, and server load levels, intelligently partitioning computation for best latency or best mobile energy. We evaluate Neurosurgeon on a state-of-the-art mobile development platform and show that it improves end-to-end latency by 3.1X on average and up to 40.7X, reduces mobile energy consumption by 59.5\% on average and up to 94.7\%, and improves datacenter throughput by 1.5X on average and up to 6.7X.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Agarwal:2017:TAT, author = "Neha Agarwal and Thomas F. Wenisch", title = "{Thermostat}: Application-transparent Page Management for Two-tiered Main Memory", journal = j-SIGPLAN, volume = "52", number = "4", pages = "631--644", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037706", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The advent of new memory technologies that are denser and cheaper than commodity DRAM has renewed interest in two-tiered main memory schemes. Infrequently accessed application data can be stored in such memories to achieve significant memory cost savings. Past research on two-tiered main memory has assumed a 4KB page size. However, 2MB huge pages are performance critical in cloud applications with large memory footprints, especially in virtualized cloud environments, where nested paging drastically increases the cost of 4KB page management. We present Thermostat, an application-transparent huge-page-aware mechanism to place pages in a dual-technology hybrid memory system while achieving both the cost advantages of two-tiered memory and performance advantages of transparent huge pages. We present an online page classification mechanism that accurately classifies both 4KB and 2MB pages as hot or cold while incurring no observable performance overhead across several representative cloud applications. We implement Thermostat in Linux kernel version 4.5 and evaluate its effectiveness on representative cloud computing workloads running under KVM virtualization. We emulate slow memory with performance characteristics approximating near-future high-density memory technology and show that Thermostat migrates up to 50\% of application footprint to slow memory while limiting performance degradation to 3\%, thereby reducing memory cost up to 30\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Barbalace:2017:BBH, author = "Antonio Barbalace and Robert Lyerly and Christopher Jelesnianski and Anthony Carno and Ho-Ren Chuang and Vincent Legout and Binoy Ravindran", title = "Breaking the Boundaries in Heterogeneous-{ISA} Datacenters", journal = j-SIGPLAN, volume = "52", number = "4", pages = "645--659", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037738", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy efficiency is one of the most important design considerations in running modern datacenters. Datacenter operating systems rely on software techniques such as execution migration to achieve energy efficiency across pools of machines. Execution migration is possible in datacenters today because they consist mainly of homogeneous-ISA machines. However, recent market trends indicate that alternate ISAs such as ARM and PowerPC are pushing into the datacenter, meaning current execution migration techniques are no longer applicable. How can execution migration be applied in future heterogeneous-ISA datacenters? In this work we present a compiler, runtime, and an operating system extension for enabling execution migration between heterogeneous-ISA servers. We present a new multi-ISA binary architecture and heterogeneous-OS containers for facilitating efficient migration of natively-compiled applications. We build and evaluate a prototype of our design and demonstrate energy savings of up to 66\% for a workload running on an ARM and an x86 server interconnected by a high-speed network.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Lustig:2017:ASC, author = "Daniel Lustig and Andrew Wright and Alexandros Papakonstantinou and Olivier Giroux", title = "Automated Synthesis of Comprehensive Memory Model Litmus Test Suites", journal = j-SIGPLAN, volume = "52", number = "4", pages = "661--675", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037723", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The memory consistency model is a fundamental part of any shared memory architecture or programming model. Modern weak memory models are notoriously difficult to define and to implement correctly. Most real-world programming languages, compilers, and (micro)architectures therefore rely heavily on black-box testing methodologies. The success of such techniques requires that the suite of litmus tests used to perform the testing be comprehensive--it should ideally stress all obscure corner cases of the model and of its implementation. Most litmus test suites today are generated from some combination of manual effort and randomization; however, the complex and subtle nature of contemporary memory models means that manual effort is both error-prone and subject to incomplete coverage. This paper presents a methodology for synthesizing comprehensive litmus test suites directly from a memory model specification. By construction, these suites contain all tests satisfying a minimality criterion: that no synchronization mechanism in the test can be weakened without causing new behaviors to become observable. We formalize this notion using the Alloy modeling language, and we apply it to a number of existing and newly-proposed memory models. Our results show not only that this synthesis technique can automatically reproduce all manually-generated tests from existing suites, but also that it discovers new tests that are not as well studied.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Liu:2017:DAD, author = "Haopeng Liu and Guangpu Li and Jeffrey F. Lukman and Jiaxin Li and Shan Lu and Haryadi S. Gunawi and Chen Tian", title = "{DCatch}: Automatically Detecting Distributed Concurrency Bugs in Cloud Systems", journal = j-SIGPLAN, volume = "52", number = "4", pages = "677--691", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037735", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In big data and cloud computing era, reliability of distributed systems is extremely important. Unfortunately, distributed concurrency bugs, referred to as DCbugs, widely exist. They hide in the large state space of distributed cloud systems and manifest non-deterministically depending on the timing of distributed computation and communication. Effective techniques to detect DCbugs are desired. This paper presents a pilot solution, DCatch, in the world of DCbug detection. DCatch predicts DCbugs by analyzing correct execution of distributed systems. To build DCatch, we design a set of happens-before rules that model a wide variety of communication and concurrency mechanisms in real-world distributed cloud systems. We then build runtime tracing and trace analysis tools to effectively identify concurrent conflicting memory accesses in these systems. Finally, we design tools to help prune false positives and trigger DCbugs. We have evaluated DCatch on four representative open-source distributed cloud systems, Cassandra, Hadoop MapReduce, HBase, and ZooKeeper. By monitoring correct execution of seven workloads on these systems, DCatch reports 32 DCbugs, with 20 of them being truly harmful.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Mashtizadeh:2017:TPD, author = "Ali Jos{\'e} Mashtizadeh and Tal Garfinkel and David Terei and David Mazieres and Mendel Rosenblum", title = "Towards Practical Default-On Multi-Core Record\slash Replay", journal = j-SIGPLAN, volume = "52", number = "4", pages = "693--708", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037751", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Castor, a record/replay system for multi-core applications that provides consistently low and predictable overheads. With Castor, developers can leave record and replay on by default, making it practical to record and reproduce production bugs, or employ fault tolerance to recover from hardware failures. Castor is inspired by several observations: First, an efficient mechanism for logging non-deterministic events is critical for recording demanding workloads with low overhead. Through careful use of hardware we were able to increase log throughput by 10x or more, e.g., we could record a server handling 10x more requests per second for the same record overhead. Second, most applications can be recorded without modifying source code by using the compiler to instrument language level sources of non-determinism, in conjunction with more familiar techniques like shared library interposition. Third, while Castor cannot deterministically replay all data races, this limitation is generally unimportant in practice, contrary to what prior work has assumed. Castor currently supports applications written in C, C++, and Go on FreeBSD. We have evaluated Castor on parallel and server workloads, including a commercial implementation of memcached in Go, which runs Castor in production.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Huang:2017:PSA, author = "Jian Huang and Michael Allen-Bond and Xuechen Zhang", title = "{Pallas}: Semantic-Aware Checking for Finding Deep Bugs in Fast Path", journal = j-SIGPLAN, volume = "52", number = "4", pages = "709--722", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037743", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software optimization is constantly a serious concern for developing high-performance systems. To accelerate the workflow execution of a specific functionality, software developers usually define and implement a fast path to speed up the critical and commonly executed functions in the workflow. However, producing a bug-free fast path is nontrivial. Our study on the Linux kernel discloses that a committed fast path can have up to 19 follow-up patches for bug fixing, and most of them are deep semantic bugs, which are difficult to be pinpointed by existing bug-finding tools. In this paper, we present such a new category of software bugs based on our fast-path bug study across various system software including virtual memory manager, file systems, network, and device drivers. We investigate their root causes and identify five error-prone aspects in a fast path: path state, trigger condition, path output, fault handling, and assistant data structure. We find that many of the deep bugs can be prevented by applying static analysis incorporating simple semantic information. We extract a set of rules based on our findings and build a toolkit PALLAS to check fast-path bugs. The evaluation results show that PALLAS can effectively reveal fast-path bugs in a variety of systems including Linux kernel, mobile operating system, software-defined networking system, and web browser.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Kotra:2017:HSC, author = "Jagadish B. Kotra and Narges Shahidi and Zeshan A. Chishti and Mahmut T. Kandemir", title = "Hardware-Software Co-design to Mitigate {DRAM} Refresh Overheads: a Case for Refresh-Aware Process Scheduling", journal = j-SIGPLAN, volume = "52", number = "4", pages = "723--736", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037724", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "DRAM cells need periodic refresh to maintain data integrity. With high capacity DRAMs, DRAM refresh poses a significant performance bottleneck as the number of rows to be refreshed (and hence the refresh cycle time, tRFC) with each refresh command increases. Modern day DRAMs perform refresh at a rank-level, while LPDDRs used in mobile environments support refresh at a per-bank level. Rank-level refresh degrades the performance significantly since none of the banks in a rank can serve the on-demand requests. Per-bank refresh alleviates some of the performance bottlenecks as the other banks in a rank are available for on-demand requests. Typical DRAM retention time is in the order several of milliseconds, viz, 64msec for environments operating in temperatures below 85 deg C and 32msec for environments operating above 85 deg C. With systems moving towards increased consolidation (ex: virtualized environments), DRAM refresh becomes a significant bottleneck as it reduces the available overall DRAM bandwidth per task. In this work, we propose a hardware-software co-design to mitigate DRAM refresh overheads by exposing the hardware address mapping and DRAM refresh schedule to the Operating System. We propose a novel DRAM refresh-aware process scheduling algorithm in OS which schedules applications on cores such that none of the on-demand requests from the application are stalled by refreshes. Extensive evaluation of our proposed co-design on multi-programmed SPEC CPU2006 workloads show significant performance improvement compared to the previously proposed hardware only approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Kim:2017:KPC, author = "Jinchun Kim and Elvira Teran and Paul V. Gratz and Daniel A. Jim{\'e}nez and Seth H. Pugsley and Chris Wilkerson", title = "Kill the Program Counter: Reconstructing Program Behavior in the Processor Cache Hierarchy", journal = j-SIGPLAN, volume = "52", number = "4", pages = "737--749", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037701", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data prefetching and cache replacement algorithms have been intensively studied in the design of high performance microprocessors. Typically, the data prefetcher operates in the private caches and does not interact with the replacement policy in the shared Last-Level Cache (LLC). Similarly, most replacement policies do not consider demand and prefetch requests as different types of requests. In particular, program counter (PC)-based replacement policies cannot learn from prefetch requests since the data prefetcher does not generate a PC value. PC-based policies can also be negatively affected by compiler optimizations. In this paper, we propose a holistic cache management technique called Kill-the-PC (KPC) that overcomes the weaknesses of traditional prefetching and replacement policy algorithms. KPC cache management has three novel contributions. First, a prefetcher which approximates the future use distance of prefetch requests based on its prediction confidence. Second, a simple replacement policy provides similar or better performance than current state-of-the-art PC-based prediction using global hysteresis. Third, KPC integrates prefetching and replacement policy into a whole system which is greater than the sum of its parts. Information from the prefetcher is used to improve the performance of the replacement policy and vice-versa. Finally, KPC removes the need to propagate the PC through entire on-chip cache hierarchy while providing a holistic cache management approach with better performance than state-of-the-art PC-, and non-PC-based schemes. Our evaluation shows that KPC provides 8\% better performance than the best combination of existing prefetcher and replacement policy for multi-core workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Gao:2017:TSE, author = "Mingyu Gao and Jing Pu and Xuan Yang and Mark Horowitz and Christos Kozyrakis", title = "{TETRIS}: Scalable and Efficient Neural Network Acceleration with {$3$D} Memory", journal = j-SIGPLAN, volume = "52", number = "4", pages = "751--764", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037702", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The high accuracy of deep neural networks (NNs) has led to the development of NN accelerators that improve performance by two orders of magnitude. However, scaling these accelerators for higher performance with increasingly larger NNs exacerbates the cost and energy overheads of their memory systems, including the on-chip SRAM buffers and the off-chip DRAM channels. This paper presents the hardware architecture and software scheduling and partitioning techniques for TETRIS, a scalable NN accelerator using 3D memory. First, we show that the high throughput and low energy characteristics of 3D memory allow us to rebalance the NN accelerator design, using more area for processing elements and less area for SRAM buffers. Second, we move portions of the NN computations close to the DRAM banks to decrease bandwidth pressure and increase performance and energy efficiency. Third, we show that despite the use of small SRAM buffers, the presence of 3D memory simplifies dataflow scheduling for NN computations. We present an analytical scheduling scheme that matches the efficiency of schedules derived through exhaustive search. Finally, we develop a hybrid partitioning scheme that parallelizes the NN computations over multiple accelerators. Overall, we show that TETRIS improves mthe performance by 4.1x and reduces the energy by 1.5x over NN accelerators with conventional, low-power DRAM memory systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Song:2017:HBA, author = "Wonjun Song and Gwangsun Kim and Hyungjoon Jung and Jongwook Chung and Jung Ho Ahn and Jae W. Lee and John Kim", title = "History-Based Arbitration for Fairness in Processor-Interconnect of {NUMA} Servers", journal = j-SIGPLAN, volume = "52", number = "4", pages = "765--777", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037753", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "NUMA (non-uniform memory access) servers are commonly used in high-performance computing and datacenters. Within each server, a processor-interconnect (e.g., Intel QPI, AMD HyperTransport) is used to communicate between the different sockets or nodes. In this work, we explore the impact of the processor-interconnect on overall performance --- in particular, the performance unfairness caused by processor-interconnect arbitration. It is well known that locally-fair arbitration does not guarantee globally-fair bandwidth sharing as closer nodes receive more bandwidth in a multi-hop network. However, this work demonstrates that the opposite can occur in a commodity NUMA server where remote nodes receive higher bandwidth (and perform better). We analyze this problem and identify that this occurs because of external concentration used in router micro-architectures for processor-interconnects without globally-aware arbitration. While accessing remote memory can occur in any NUMA system, performance unfairness (or performance variation) is more critical in cloud computing and virtual machines with shared resources. We demonstrate how this unfairness creates significant performance variation when a workload is executed on the Xen virtualization platform. We then provide analysis using synthetic workloads to better understand the source of unfairness and eliminate the impact of other shared resources, including the shared last-level cache and main memory. To provide fairness, we propose a novel, history-based arbitration that tracks the history of arbitration grants made in the previous history window. A weighted arbitration is done based on the history to provide global fairness. Through simulations, we show our proposed history-based arbitration can provide global fairness and minimize the processor-interconnect performance unfairness at low cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Misra:2017:ELT, author = "Pulkit A. Misra and Jeffrey S. Chase and Johannes Gehrke and Alvin R. Lebeck", title = "Enabling Lightweight Transactions with Precision Time", journal = j-SIGPLAN, volume = "52", number = "4", pages = "779--794", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037722", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Distributed transactional storage is an important service in today's data centers. Achieving high performance without high complexity is often a challenge for these systems due to sophisticated consistency protocols and multiple layers of abstraction. In this paper we show how to combine two emerging technologies---Software-Defined Flash (SDF) and precise synchronized clocks---to improve performance and reduce complexity for transactional storage within the data center. We present a distributed transactional system (called MILANA) as a layer above a durable multi-version key-value store (called SEMEL) for read-heavy workloads within a data center. SEMEL exploits write behavior of SSDs to maintain a time-ordered sequence of versions for each key efficiently and durably. MILANA adds a variant of optimistic concurrency control above SEMEL's API to service read requests from a consistent snapshot and to enable clients to make fast local commit or abort decisions for read-only transactions. Experiments with the prototype reveal up to 43\% lower transaction abort rates using IEEE Precision Time Protocol (PTP) vs. the standard Network Time Protocol (NTP). Under the Retwis benchmark, client-local validation of read-only transactions yields a 35\% reduction in latency and 55\% increase in transaction throughput.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Liu:2017:ITN, author = "Ming Liu and Liang Luo and Jacob Nelson and Luis Ceze and Arvind Krishnamurthy and Kishore Atreya", title = "{IncBricks}: Toward In-Network Computation with an In-Network Cache", journal = j-SIGPLAN, volume = "52", number = "4", pages = "795--809", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037731", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The emergence of programmable network devices and the increasing data traffic of datacenters motivate the idea of in-network computation. By offloading compute operations onto intermediate networking devices (e.g., switches, network accelerators, middleboxes), one can (1) serve network requests on the fly with low latency; (2) reduce datacenter traffic and mitigate network congestion; and (3) save energy by running servers in a low-power mode. However, since (1) existing switch technology doesn't provide general computing capabilities, and (2) commodity datacenter networks are complex (e.g., hierarchical fat-tree topologies, multipath communication), enabling in-network computation inside a datacenter is challenging. In this paper, as a step towards in-network computing, we present IncBricks, an in-network caching fabric with basic computing primitives. IncBricks is a hardware-software co-designed system that supports caching in the network using a programmable network middlebox. As a key-value store accelerator, our prototype lowers request latency by over 30\% and doubles throughput for 1024 byte values in a common cluster configuration. Our results demonstrate the effectiveness of in-network computing and that efficient datacenter network request processing is possible if we carefully split the computation across the different programmable computing elements in a datacenter, including programmable switches, network accelerators, and end hosts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Akturk:2017:AAA, author = "Ismail Akturk and Ulya R. Karpuzcu", title = "{AMNESIAC}: Amnesic Automatic Computer", journal = j-SIGPLAN, volume = "52", number = "4", pages = "811--824", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037741", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Due to imbalances in technology scaling, the energy consumption of data storage and communication by far exceeds the energy consumption of actual data production, i.e., computation. As a consequence, recomputing data can become more energy efficient than storing and retrieving precomputed data. At the same time, recomputation can relax the pressure on the memory hierarchy and the communication bandwidth. This study hence assesses the energy efficiency prospects of trading computation for communication. We introduce an illustrative proof-of-concept design, identify practical limitations, and provide design guidelines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Bai:2017:VRE, author = "Yuxin Bai and Victor W. Lee and Engin Ipek", title = "Voltage Regulator Efficiency Aware Power Management", journal = j-SIGPLAN, volume = "52", number = "4", pages = "825--838", month = apr, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093336.3037717", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Conventional off-chip voltage regulators are typically bulky and slow, and are inefficient at exploiting system and workload variability using Dynamic Voltage and Frequency Scaling (DVFS). On-die integration of voltage regulators has the potential to increase the energy efficiency of computer systems by enabling power control at a fine granularity in both space and time. The energy conversion efficiency of on-chip regulators, however, is typically much lower than off-chip regulators, which results in significant energy losses. Fine-grained power control and high voltage regulator efficiency are difficult to achieve simultaneously, with either emerging on-chip or conventional off-chip regulators. A voltage conversion framework that relies on a hierarchy of off-chip switching regulators and on-chip linear regulators is proposed to enable fine-grained power control with a regulator efficiency greater than 90\%. A DVFS control policy that is based on a reinforcement learning (RL) approach is developed to exploit the proposed framework. Per-core RL agents learn and improve their control policies independently, while retaining the ability to coordinate their actions to accomplish system level power management objectives. When evaluated on a mix of 14 parallel and 13 multiprogrammed workloads, the proposed voltage conversion framework achieves 18\% greater energy efficiency than a conventional framework that uses on-chip switching regulators. Moreover, when the RL based DVFS control policy is used to control the proposed voltage conversion framework, the system achieves a 21\% higher energy efficiency over a baseline oracle policy with coarse-grained power control capability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '17 conference proceedings.", } @Article{Heinze:2017:TSA, author = "Thomas S. Heinze and Anders M{\o}ller and Fabio Strocco", title = "Type safety analysis for {Dart}", journal = j-SIGPLAN, volume = "52", number = "2", pages = "1--12", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989226", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Optional typing is traditionally viewed as a compromise between static and dynamic type checking, where code without type annotations is not checked until runtime. We demonstrate that optional type annotations in Dart programs can be integrated into a flow analysis to provide static type safety guarantees both for annotated and non-annotated parts of the code. We explore two approaches: one that uses type annotations for filtering, and one that uses them as specifications. What makes this particularly challenging for Dart is that its type system is unsound even for fully annotated code. Experimental results show that the technique is remarkably effective, even without context sensitivity: 99.3\% of all property lookup operations are reported type safe in a collection of benchmark programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Mezzetti:2017:TUP, author = "Gianluca Mezzetti and Anders M{\o}ller and Fabio Strocco", title = "Type unsoundness in practice: an empirical study of {Dart}", journal = j-SIGPLAN, volume = "52", number = "2", pages = "13--24", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989227", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The type system in the Dart programming language is deliberately designed to be unsound: for a number of reasons, it may happen that a program encounters type errors at runtime although the static type checker reports no warnings. According to the language designers, this ensures a pragmatic balance between the ability to catch bugs statically and allowing a flexible programming style without burdening the programmer with a lot of spurious type warnings. In this work, we attempt to experimentally validate these design choices. Through an empirical evaluation based on open source programs written in Dart totaling 2.4 M LOC, we explore how alternative, more sound choices affect the type warnings being produced. Our results show that some, but not all, sources of unsoundness can be justified. In particular, we find that unsoundness caused by bivariant function subtyping and method overriding does not seem to help programmers. Such information may be useful when designing future versions of the language or entirely new languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Park:2017:PSS, author = "Changhee Park and Hyeonseung Im and Sukyoung Ryu", title = "Precise and scalable static analysis of {jQuery} using a regular expression domain", journal = j-SIGPLAN, volume = "52", number = "2", pages = "25--36", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989228", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "jQuery is the most popular JavaScript library but the state-of-the-art static analyzers for JavaScript applications fail to analyze simple programs that use jQuery. In this paper, we present a novel abstract string domain whose elements are simple regular expressions that can represent prefix, infix, and postfix substrings of a string and even their sets. We formalize the new domain in the abstract interpretation framework with abstract models of strings and objects commonly used in the existing JavaScript analyzers. For practical use of the domain, we present polynomial-time inclusion decision rules between the regular expressions and prove that the rules exactly capture the actual inclusion relation. We have implemented the domain as an extension of the open-source JavaScript analyzer, SAFE, and we show that the extension significantly improves the scalability and precision of the baseline analyzer in analyzing programs that use jQuery.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{DeWael:2017:JTI, author = "Mattias {De Wael} and Janwillem Swalens and Wolfgang {De Meuter}", title = "Just-in-time inheritance: a dynamic and implicit multiple inheritance mechanism", journal = j-SIGPLAN, volume = "52", number = "2", pages = "37--47", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989229", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multiple inheritance is often criticised for the ambiguity that arises when multiple parents want to pass on a feature with the same name to their offspring. A survey of programming languages reveals that no programming language has an inherently implicit and dynamic approach to resolve this ambiguity. This paper identifies just-in-time inheritance as the first implicit and dynamic inheritance mechanism. The key idea of just-in-time inheritance is that one of the parents is favoured over the others, which resolves the ambiguity, and that the favoured parent can change at runtime. However, just-in-time inheritance is not the silver bullet to solve all ambiguity problems heir to multiple inheritance, because it is not applicable in all scenarios. We conclude that the applicability of just-in-time inheritance is to be found in systems where multiple inheritance is used to model an ``is-a OR is-a''-relation, rather than the more traditional ``is-a AND is-a''-relation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Meier:2017:PVM, author = "Remigius Meier and Armin Rigo and Thomas R. Gross", title = "Parallel virtual machines with {RPython}", journal = j-SIGPLAN, volume = "52", number = "2", pages = "48--59", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989233", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The RPython framework takes an interpreter for a dynamic language as its input and produces a Virtual Machine{\^A} (VM) for that language. RPython is being used to develop PyPy, a high-performance Python interpreter. However, the produced VM does not support parallel execution since the framework relies on a Global Interpreter Lock{\^A} (GIL): PyPy serialises the execution of multi-threaded Python programs. We describe the rationale and design of a new parallel execution model for RPython that allows the generation of parallel virtual machines while leaving the language semantics unchanged. This model then allows different implementations of concurrency control, and we discuss an implementation based on a GIL and an implementation based on Software Transactional Memory{\^A} (STM). To evaluate the benefits of either choice, we adapt PyPy to work with both implementations (GIL and STM). The evaluation shows that PyPy with STM improves the runtime of a set of multi-threaded Python programs over PyPy with a GIL by factors in the range of 1.87 $ \times $ up to 5.96 $ \times $ when executing on a processor with 8 cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Chari:2017:BEH, author = "Guido Chari and Diego Garbervetsky and Stefan Marr", title = "Building efficient and highly run-time adaptable virtual machines", journal = j-SIGPLAN, volume = "52", number = "2", pages = "60--71", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989234", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming language virtual machines (VMs) realize language semantics, enforce security properties, and execute applications efficiently. Fully Reflective Execution Environments (EEs) are VMs that additionally expose their whole structure and behavior to applications. This enables developers to observe and adapt VMs at run time. However, there is a belief that reflective EEs are not viable for practical usages because such flexibility would incur a high performance overhead. To refute this belief, we built a reflective EE on top of a highly optimizing dynamic compiler. We introduced a new optimization model that, based on the conjecture that variability of low-level (EE-level) reflective behavior is low in many scenarios, mitigates the most significant sources of the performance overheads related to the reflective capabilities in the EE. Our experiments indicate that reflective EEs can reach peak performance in the order of standard VMs. Concretely, that (a) if reflective mechanisms are not used the execution overhead is negligible compared to standard VMs, (b) VM operations can be redefined at language-level without incurring in significant overheads, (c) for several software adaptation tasks, applying the reflection at the VM level is not only lightweight in terms of engineering effort, but also competitive in terms of performance in comparison to other ad-hoc solutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Foley-Bourgon:2017:EIC, author = "Vincent Foley-Bourgon and Laurie Hendren", title = "Efficiently implementing the copy semantics of {MATLAB}'s arrays in {JavaScript}", journal = j-SIGPLAN, volume = "52", number = "2", pages = "72--83", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989235", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiling MATLAB---a dynamic, array-based language---to JavaScript is an attractive proposal: the output code can be deployed on a platform used by billions and can leverage the countless hours that have gone into making JavaScript JIT engines fast. But before that can happen, the original MATLAB code must be properly translated, making sure to bridge the semantic gaps of the two languages. An important area where MATLAB and JavaScript differ is in their handling of arrays: for example, in MATLAB, arrays are one-indexed and writing at an index beyond the end of an array extends it; in JavaScript, typed arrays are zero-indexed and writing out of bounds is a no-op. A MATLAB-to-JavaScript compiler must address these mismatches. Another salient and pervasive difference between the two languages is the assignment of arrays to variables: in MATLAB, this operation has value semantics, while in JavaScript is has reference semantics. In this paper, we present MatJuice --- a source-to-source, ahead-of-time compiler back-end for MATLAB --- and how it deals efficiently with this last issue. We present an intra-procedural data-flow analysis to track where each array variable may point to and which variables are possibly aliased. We also present the associated copy insertion transformation that uses the points-to information to insert explicit copies when necessary. The resulting JavaScript program respects the MATLAB value semantics and we show that it performs fewer run-time copies than some alternative approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Stadler:2017:ORL, author = "Lukas Stadler and Adam Welc and Christian Humer and Mick Jordan", title = "Optimizing {R} language execution via aggressive speculation", journal = j-SIGPLAN, volume = "52", number = "2", pages = "84--95", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989236", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The R language, from the point of view of language design and implementation, is a unique combination of various programming language concepts. It has functional characteristics like lazy evaluation of arguments, but also allows expressions to have arbitrary side effects. Many runtime data structures, for example variable scopes and functions, are accessible and can be modified while a program executes. Several different object models allow for structured programming, but the object models can interact in surprising ways with each other and with the base operations of R. R works well in practice, but it is complex, and it is a challenge for language developers trying to improve on the current state-of-the-art, which is the reference implementation --- GNU R. The goal of this work is to demonstrate that, given the right approach and the right set of tools, it is possible to create an implementation of the R language that provides significantly better performance while keeping compatibility with the original implementation. In this paper we describe novel optimizations backed up by aggressive speculation techniques and implemented within FastR, an alternative R language implementation, utilizing Truffle --- a JVM-based language development framework developed at Oracle Labs. We also provide experimental evidence demonstrating effectiveness of these optimizations in comparison with GNU R, as well as Renjin and TERR implementations of the R language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Hemann:2017:SEL, author = "Jason Hemann and Daniel P. Friedman and William E. Byrd and Matthew Might", title = "A small embedding of logic programming with a simple complete search", journal = j-SIGPLAN, volume = "52", number = "2", pages = "96--107", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989230", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a straightforward, call-by-value embedding of a small logic programming language with a simple complete search. We construct the entire language in 54 lines of Racket --- half of which implement unification. We then layer over it, in 43 lines, a reconstruction of an existing logic programming language, miniKanren, and attest to our implementation's pedagogical value. Evidence suggests our combination of expressiveness, concision, and elegance is compelling: since microKanren's release, it has spawned over 50 embeddings in over two dozen host languages, including Go, Haskell, Prolog and Smalltalk.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Warth:2017:MSA, author = "Alessandro Warth and Patrick Dubroy and Tony Garnock-Jones", title = "Modular semantic actions", journal = j-SIGPLAN, volume = "52", number = "2", pages = "108--119", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989231", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parser generators give programmers a convenient and declarative way to write parsers and other language-processing applications, but their mechanisms for extension and code reuse often leave something to be desired. We introduce Ohm, a parser generator in which both grammars and their interpretations can be extended in safe and modular ways. Unlike many similar tools, Ohm completely separates grammars and semantic actions, avoiding the problems that arise when these two concerns are mixed. This paper describes the particular way in which Ohm achieves this separation, and discusses the resulting benefits to modularity and extensibility.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Marr:2017:CLC, author = "Stefan Marr and Benoit Daloze and Hanspeter M{\"o}ssenb{\"o}ck", title = "Cross-language compiler benchmarking: are we fast yet?", journal = j-SIGPLAN, volume = "52", number = "2", pages = "120--131", month = feb, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093334.2989232", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Comparing the performance of programming languages is difficult because they differ in many aspects including preferred programming abstractions, available frameworks, and their runtime systems. Nonetheless, the question about relative performance comes up repeatedly in the research community, industry, and wider audience of enthusiasts. This paper presents 14 benchmarks and a novel methodology to assess the compiler effectiveness across language implementations. Using a set of common language abstractions, the benchmarks are implemented in Java, JavaScript, Ruby, Crystal, Newspeak, and Smalltalk. We show that the benchmarks exhibit a wide range of characteristics using language-agnostic metrics. Using four different languages on top of the same compiler, we show that the benchmarks perform similarly and therefore allow for a comparison of compiler effectiveness across languages. Based on anecdotes, we argue that these benchmarks help language implementers to identify performance bugs and optimization potential by comparing to other language implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '16 conference proceedings.", } @Article{Rompf:2017:LMS, author = "Tiark Rompf", title = "Lightweight modular staging {(LMS)}: generate all the things! (keynote)", journal = j-SIGPLAN, volume = "52", number = "3", pages = "1--1", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993237", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent years have seen a surge of interest in staging and generative programming, driven by the increasing difficulty of making high-level code run fast on modern hardware. While the mechanics of program generation are relatively well understood, we have only begun to understand how to develop systems in a generative way. The Lightweight Modular Staging (LMS) platform forms the core of a research agenda to make generative programming more widely accessible, through powerful libraries and a growing selection of case studies that illuminate design patterns and crystallize best practices for high-level and effective generative programming. This talk will reflect on the foundations of LMS, on applications, achievements, challenges, as well as ongoing and future work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Biboudis:2017:RJD, author = "Aggelos Biboudis and Pablo Inostroza and Tijs van der Storm", title = "{Recaf}: {Java} dialects as libraries", journal = j-SIGPLAN, volume = "52", number = "3", pages = "2--13", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993239", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mainstream programming languages like Java have limited support for language extensibility. Without mechanisms for syntactic abstraction, new programming styles can only be embedded in the form of libraries, limiting expressiveness. In this paper, we present Recaf, a lightweight tool for creating Java dialects; effectively extending Java with new language constructs and user defined semantics. The Recaf compiler generically transforms designated method bodies to code that is parameterized by a semantic factory (Object Algebra), defined in plain Java. The implementation of such a factory defines the desired runtime semantics. We applied our design to produce several examples from a diverse set of programming styles and two case studies: we define (i) extensions for generators, asynchronous computations and asynchronous streams and (ii) a Domain-Specific Language (DSL) for Parsing Expression Grammars (PEGs), in a few lines of code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Wang:2017:CJ, author = "Yanlin Wang and Haoyuan Zhang and Bruno C. d. S. Oliveira and Marco Servetto", title = "Classless {Java}", journal = j-SIGPLAN, volume = "52", number = "3", pages = "14--24", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993238", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents an OO style without classes, which we call interface-based object-oriented programming (IB). IB is a natural extension of closely related ideas such as traits. Abstract state operations provide a new way to deal with state, which allows for flexibility not available in class-based languages. In IB state can be type-refined in subtypes. The combination of a purely IB style and type-refinement enables powerful idioms using multiple inheritance and state. To introduce IB to programmers we created Classless Java: an embedding of IB directly into Java. Classless Java uses annotation processing for code generation and relies on new features of Java 8 for interfaces. The code generation techniques used in Classless Java have interesting properties, including guarantees that the generated code is type-safe and good integration with IDEs. Usefulness of IB and Classless Java is shown with examples and case studies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Zacharopoulos:2017:EMM, author = "Theologos Zacharopoulos and Pablo Inostroza and Tijs van der Storm", title = "Extensible modeling with managed data in {Java}", journal = j-SIGPLAN, volume = "52", number = "3", pages = "25--35", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993240", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many model-driven development (MDD) tools employ specialized frameworks and modeling languages, and assume that the semantics of models is provided by some form of code generation. As a result, programming against models is cumbersome and does not integrate well with ordinary programming languages and IDEs. In this paper we present MD4J, a modeling approach for embedding metamodels directly in Java, using plain interfaces and annotations. The semantics is provided by data managers that create and manipulate models. This architecture enables two kinds of extensibility. First, the data managers can be changed or extended to obtain different base semantics of a model. This allows a kind of aspect-oriented programming. Second, the metamodels themselves can be extended with additional fields and methods to modularly enrich a modeling language. We illustrate our approach using the example of state machines, discuss the implementation, and evaluate it with two case-studies: the execution of UML activity diagrams and an aspect-oriented refactoring of JHotDraw.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Rosa:2017:APV, author = "Andrea Ros{\`a} and Lydia Y. Chen and Walter Binder", title = "Actor profiling in virtual execution environments", journal = j-SIGPLAN, volume = "52", number = "3", pages = "36--46", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993241", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nowadays, many virtual execution environments benefit from concurrency offered by the actor model. Unfortunately, while actors are used in many applications, existing profiling tools are not much effective in analyzing the performance of applications using actors. In this paper, we present a new instrumentation-based technique to profile actors in virtual execution environments. Our technique adopts platform-independent profiling metrics that minimize the perturbations induced by the instrumentation logic and allow comparing profiling results across different platforms. In particular, our technique measures the initialization cost, the amount of executed computations, and the messages sent and received by each actor. We implement our technique within a profiling tool for Akka actors on the Java platform. Evaluation results show that our profiling technique helps performance analysis of actor utilization and communication between actors in large-scale computing frameworks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Konat:2017:BDS, author = "Gabri{\"e}l Konat and Sebastian Erdweg and Eelco Visser", title = "Bootstrapping domain-specific meta-languages in language workbenches", journal = j-SIGPLAN, volume = "52", number = "3", pages = "47--58", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993242", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is common practice to bootstrap compilers of programming languages. By using the compiled language to implement the compiler, compiler developers can code in their own high-level language and gain a large-scale test case. In this paper, we investigate bootstrapping of compiler-compilers as they occur in language workbenches. Language workbenches support the development of compilers through the application of multiple collaborating domain-specific meta-languages for defining a language's syntax, analysis, code generation, and editor support. We analyze the bootstrapping problem of language workbenches in detail, propose a method for sound bootstrapping based on fixpoint compilation, and show how to conduct breaking meta-language changes in a bootstrapped language workbench. We have applied sound bootstrapping to the Spoofax language workbench and report on our experience.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Philips:2017:DDD, author = "Laure Philips and Joeri {De Koster} and Wolfgang {De Meuter} and Coen {De Roover}", title = "Dependence-driven delimited {CPS} transformation for {JavaScript}", journal = j-SIGPLAN, volume = "52", number = "3", pages = "59--69", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993243", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In today's web applications asynchronous requests to remote services using callbacks or futures are omnipresent. The continuation of such a non-blocking task is represented as a callback function that will later be called with the result of the request. This style of programming where the remainder of a computation is captured in a continuation function is called continuation-passing style (CPS). This style of programming can quickly lead to a phenomenon called ``callback hell'', which has a negative impact on the maintainability of applications that employ this style. Several alternatives to callbacks are therefore gaining traction within the web domain. For example, there are a number of frameworks that rely on automatically transforming sequential style code into the continuation-passing style. However, these frameworks often employ a conservative approach in which each function call is transformed into CPS. This conservative approach can sequentialise requests that could otherwise be run in parallel. So-called delimited continuations can remedy, but require special marks that have to be manually inserted in the code for marking the beginning and end of the continuation. In this paper we propose an alternative strategy in which we apply a delimited CPS transformation that operates on a Program Dependence Graph instead to find the limits of each continuation.We implement this strategy in JavaScript and demonstrate its applicability to various web programming scenarios.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Lee:2017:SRE, author = "Mina Lee and Sunbeom So and Hakjoo Oh", title = "Synthesizing regular expressions from examples for introductory automata assignments", journal = j-SIGPLAN, volume = "52", number = "3", pages = "70--80", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993244", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a method for synthesizing regular expressions for introductory automata assignments. Given a set of positive and negative examples, the method automatically synthesizes the simplest possible regular expression that accepts all the positive examples while rejecting all the negative examples. The key novelty is the search-based synthesis algorithm that leverages ideas from over- and under-approximations to effectively prune out a large search space. We have implemented our technique in a tool and evaluated it with non-trivial benchmark problems that students often struggle with. The results show that our system can synthesize desired regular expressions in 6.7 seconds on the average, so that it can be interactively used by students to enhance their understanding of regular expressions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Omar:2017:PSF, author = "Cyrus Omar and Jonathan Aldrich", title = "Programmable semantic fragments: the design and implementation of {\tt typy}", journal = j-SIGPLAN, volume = "52", number = "3", pages = "81--92", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993245", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces typy, a statically typed programming language embedded by reflection into Python. typy features a fragmentary semantics, i.e. it delegates semantic control over each term, drawn from Python's fixed concrete and abstract syntax, to some contextually relevant user-defined semantic fragment. The delegated fragment programmatically (1) typechecks the term (following a bidirectional protocol); and (2) assigns dynamic meaning to the term by computing a translation to Python. We argue that this design is expressive with examples of fragments that express the static and dynamic semantics of (1) functional records; (2) labeled sums (with nested pattern matching a la ML); (3) a variation on JavaScript's prototypal object system; and (4) typed foreign interfaces to Python and OpenCL. These semantic structures are, or would need to be, defined primitively in conventionally structured languages. We further argue that this design is compositionally well-behaved. It avoids the expression problem and the problems of grammar composition because the syntax is fixed. Moreover, programs are semantically stable under fragment composition (i.e. defining a new fragment will not change the meaning of existing program components.)", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Kienzle:2017:DDV, author = "J{\"o}rg Kienzle and Gunter Mussbacher and Philippe Collet and Omar Alam", title = "Delaying decisions in variable concern hierarchies", journal = j-SIGPLAN, volume = "52", number = "3", pages = "93--103", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993246", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concern-Oriented Reuse (CORE) proposes a new way of structuring model-driven software development, where models of the system are modularized by domains of abstraction within units of reuse called concerns. Within a CORE concern, models are further decomposed and modularized by features. This paper extends CORE with a technique that enables developers of high-level concerns to reuse lower-level concerns without unnecessarily committing to a specific feature selection. The developer can select the functionality that is minimally needed to continue development, and reexpose relevant alternative lower-level features of the reused concern in the reusing concern's interface. This effectively delays decision making about alternative functionality until the higher-level reuse context, where more detailed requirements are known and further decisions can be made. The paper describes the algorithms for composing the variation (i.e., feature and impact models), customization, and usage interfaces of a concern, as well as the concern's realization models and finally an entire concern hierarchy, as is necessary to support delayed decision making in CORE.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Adam:2017:ACG, author = "Sorin Adam and Marco Kuhrmann and Ulrik Pagh Schultz", title = "Automatic code generation in practice: experiences with embedded robot controllers", journal = j-SIGPLAN, volume = "52", number = "3", pages = "104--108", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993247", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mobile robots often use a distributed architecture in which software components are deployed to heterogeneous hardware modules. Ensuring the consistency with the designed architecture is a complex task, notably if functional safety requirements have to be fulfilled. We propose to use a domain-specific language to specify those requirements and to allow for generating a safety-enforcing layer of code, which is deployed to the robot. The paper at hand reports experiences in practically applying code generation to mobile robots. For two cases, we discuss how we addressed challenges, e.g., regarding weaving code generation into proprietary development environments and testing of manually written code. We find that a DSL based on the same conceptual model can be used across different kinds of hardware modules, but a significant adaptation effort is required in practical scenarios involving different kinds of hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Braz:2017:CCA, author = "Larissa Braz and Rohit Gheyi and Melina Mongiovi and M{\'a}rcio Ribeiro and Fl{\'a}vio Medeiros and Leopoldo Teixeira", title = "A change-centric approach to compile configurable systems with {\tt \#ifdef}s", journal = j-SIGPLAN, volume = "52", number = "3", pages = "109--119", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993250", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Configurable systems typically use \#ifdefs to denote variability. Generating and compiling all configurations may be time-consuming. An alternative consists of using variability-aware parsers, such as TypeChef. However, they may not scale. In practice, compiling the complete systems may be costly. Therefore, developers can use sampling strategies to compile only a subset of the configurations. We propose a change-centric approach to compile configurable systems with \#ifdefs by analyzing only configurations impacted by a code change (transformation). We implement it in a tool called CHECKCONFIGMX, which reports the new compilation errors introduced by the transformation. We perform an empirical study to evaluate 3,913 transformations applied to the 14 largest files of BusyBox, Apache HTTPD, and Expat configurable systems. CHECKCONFIGMX finds 595 compilation errors of 20 types introduced by 41 developers in 214 commits (5.46\% of the analyzed transformations). In our study, it reduces by at least 50\% (an average of 99\%) the effort of evaluating the analyzed transformations by comparing with the exhaustive approach without considering a feature model. CHECKCONFIGMX may help developers to reduce compilation effort to evaluate fine-grained transformations applied to configurable systems with \#ifdefs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Pereira:2017:FBP, author = "Juliana Alves Pereira and Pawel Matuszyk and Sebastian Krieter and Myra Spiliopoulou and Gunter Saake", title = "A feature-based personalized recommender system for product-line configuration", journal = j-SIGPLAN, volume = "52", number = "3", pages = "120--131", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993249", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's competitive marketplace requires the industry to understand unique and particular needs of their customers. Product line practices enable companies to create individual products for every customer by providing an interdependent set of features. Users configure personalized products by consecutively selecting desired features based on their individual needs. However, as most features are interdependent, users must understand the impact of their gradual selections in order to make valid decisions. Thus, especially when dealing with large feature models, specialized assistance is needed to guide the users in configuring their product. Recently, recommender systems have proved to be an appropriate mean to assist users in finding information and making decisions. In this paper, we propose an advanced feature recommender system that provides personalized recommendations to users. In detail, we offer four main contributions: (i) We provide a recommender system that suggests relevant features to ease the decision-making process. (ii) Based on this system, we provide visual support to users that guides them through the decision-making process and allows them to focus on valid and relevant parts of the configuration space. (iii) We provide an interactive open-source configurator tool encompassing all those features. (iv) In order to demonstrate the performance of our approach, we compare three different recommender algorithms in two real case studies derived from business experience.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Kowal:2017:EAF, author = "Matthias Kowal and Sofia Ananieva and Thomas Th{\"u}m", title = "Explaining anomalies in feature models", journal = j-SIGPLAN, volume = "52", number = "3", pages = "132--143", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993248", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The development of variable software, in general, and feature models, in particular, is an error-prone and time-consuming task. It gets increasingly more challenging with industrial-size models containing hundreds or thousands of features and constraints. Each change may lead to anomalies in the feature model such as making some features impossible to select. While the detection of anomalies is well-researched, giving explanations is still a challenge. Explanations must be as accurate and understandable as possible to support the developer in repairing the source of an error. We propose an efficient and generic algorithm for explaining different anomalies in feature models. Additionally, we achieve a benefit for the developer by computing short explanations expressed in a user-friendly manner and by emphasizing specific parts in explanations that are more likely to be the cause of an anomaly. We provide an open-source implementation in FeatureIDE and show its scalability for industrial-size feature models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Al-Hajjaji:2017:IEP, author = "Mustafa Al-Hajjaji and Sebastian Krieter and Thomas Th{\"u}m and Malte Lochau and Gunter Saake", title = "{IncLing}: efficient product-line testing using incremental pairwise sampling", journal = j-SIGPLAN, volume = "52", number = "3", pages = "144--155", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993253", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A software product line comprises a family of software products that share a common set of features. It enables customers to compose software systems from a managed set of features. Testing every product of a product line individually is often infeasible due to the exponential number of possible products in the number of features. Several approaches have been proposed to restrict the number of products to be tested by sampling a subset of products achieving sufficient combinatorial interaction coverage. However, existing sampling algorithms do not scale well to large product lines, as they require a considerable amount of time to generate the samples. Moreover, samples are not available until a sampling algorithm completely terminates. As testing time is usually limited, we propose an incremental approach of product sampling for pairwise interaction testing (called IncLing), which enables developers to generate samples on demand in a step-wise manner. Furthermore, IncLing uses heuristics to efficiently achieve pairwise interaction coverage with a reasonable number of products. We evaluated IncLing by comparing it against existing sampling algorithms using feature models of different sizes. The results of our approach indicate efficiency improvements for product-line testing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Rothberg:2017:TSC, author = "Valentin Rothberg and Christian Dietrich and Andreas Ziegler and Daniel Lohmann", title = "Towards scalable configuration testing in variable software", journal = j-SIGPLAN, volume = "52", number = "3", pages = "156--167", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993252", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Testing a software product line such as Linux implies building the source with different configurations. Manual approaches to generate configurations that enable code of interest are doomed to fail due to the high amount of variation points distributed over the feature model, the build system and the source code. Research has proposed various approaches to generate covering configurations, but the algorithms show many drawbacks related to run-time, exhaustiveness and the amount of generated configurations. Hence, analyzing an entire Linux source can yield more than 30 thousand configurations and thereby exceeds the limited budget and resources for build testing. In this paper, we present an approach to fill the gap between a systematic generation of configurations and the necessity to fully build software in order to test it. By merging previously generated configurations, we reduce the number of necessary builds and enable global variability-aware testing. We reduce the problem of merging configurations to finding maximum cliques in a graph. We evaluate the approach on the Linux kernel, compare the results to common practices in industry, and show that our implementation scales even when facing graphs with millions of edges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Steindorfer:2017:TSP, author = "Michael J. Steindorfer and Jurgen J. Vinju", title = "Towards a software product line of trie-based collections", journal = j-SIGPLAN, volume = "52", number = "3", pages = "168--172", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993251", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Collection data structures in standard libraries of programming languages are designed to excel for the average case by carefully balancing memory footprint and runtime performance. These implicit design decisions and hard-coded trade-offs do constrain users from using an optimal variant for a given problem. Although a wide range of specialized collections is available for the Java Virtual Machine (JVM), they introduce yet another dependency and complicate user adoption by requiring specific Application Program Interfaces (APIs) incompatible with the standard library. A product line for collection data structures would relieve library designers from optimizing for the general case. Furthermore, a product line allows evolving the potentially large code base of a collection family efficiently. The challenge is to find a small core framework for collection data structures which covers all variations without exhaustively listing them, while supporting good performance at the same time. We claim that the concept of Array Mapped Tries (AMTs) embodies a high degree of commonality in the sub-domain of immutable collection data structures. AMTs are flexible enough to cover most of the variability, while minimizing code bloat in the generator and the generated code. We implemented a Data Structure Code Generator (DSCG) that emits immutable collections based on an AMT skeleton foundation. The generated data structures outperform competitive hand-optimized implementations, and the generator still allows for customization towards specific workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Al-Hajjaji:2017:TDT, author = "Mustafa Al-Hajjaji and Jens Meinicke and Sebastian Krieter and Reimar Schr{\"o}ter and Thomas Th{\"u}m and Thomas Leich and Gunter Saake", title = "Tool demo: testing configurable systems with {FeatureIDE}", journal = j-SIGPLAN, volume = "52", number = "3", pages = "173--177", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993254", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most software systems are designed to provide custom functionality using configuration options. Testing such systems is challenging as running tests of a single configuration is often not sufficient, because defects may appear in other configurations. Ideally, all configurations of a software system should be tested, which is usually not applicable in practice due to the combinatorial explosion with respect to the configuration options. Multiple sampling strategies aim to reduce the set of tested configurations to a feasible amount, such as T-wise sampling, random configurations, and user-defined configurations. However, these strategies are often not applied in practice as they require manual effort or a specialized testing framework. Within our tool FeatureIDE, we integrate all aforementioned strategies and reduce the manual effort by automating the process of generating and testing configurations. Furthermore, we provide support for unit testing to avoid redundant test executions and for variability-aware testing. With this extension of FeatureIDE, we aim to make recent testing techniques for configurable systems applicable in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Makki:2017:ART, author = "Majid Makki and Dimitri {Van Landuyt} and Wouter Joosen", title = "Automated regression testing of {BPMN 2.0} processes: a capture and replay framework for continuous delivery", journal = j-SIGPLAN, volume = "52", number = "3", pages = "178--189", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993257", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Regression testing is a form of software quality assurance (QA) that involves comparing the behavior of a newer version of a software artifact to its earlier correct behavior, and signaling the QA engineer when deviations are detected. Given the large potential in automated generation and execution of regression test cases for business process models in the context of running systems, powerful tools are required to make this practically feasible, more specifically to limit the potential impact on production systems, and to reduce the manual effort required from QA engineers. In this paper, we present a regression testing automation framework that implements the capture {\&} replay paradigm in the context of BPMN 2.0, a domain-specific language for modeling and executing business processes. The framework employs parallelization techniques and efficient communication patterns to reduce the performance overhead of capturing. Based on inputs from the QA engineer, it manipulates the BPMN2 model before executing tests for isolating the latter from external dependencies (e.g. human actors or expensive web services) and for avoiding undesired side-effects. Finally, it performs a regression detection algorithm and reports the results to the QA engineer. We have implemented our framework on top of a BPMN2-compliant execution engine, namely jBPM, and performed functional validations and evaluations of its performance and fault-tolerance. The results, indicating 3.9\% average capturing performance overhead, demonstrate that the implemented framework can be the foundation of a practical regression testing tool for BPMN 2.0, and a key enabler for continuous delivery of business process-driven applications and services.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Hammer:2017:VOV, author = "Matthew A. Hammer and Bor-Yuh Evan Chang and David {Van Horn}", title = "A vision for online verification-validation", journal = j-SIGPLAN, volume = "52", number = "3", pages = "190--201", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993255", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's programmers face a false choice between creating software that is extensible and software that is correct. Specifically, dynamic languages permit software that is richly extensible (via dynamic code loading, dynamic object extension, and various forms of reflection), and today's programmers exploit this flexibility to ``bring their own language features'' to enrich extensible languages (e.g., by using common JavaScript libraries). Meanwhile, such library-based language extensions generally lack enforcement of their abstractions, leading to programming errors that are complex to avoid and predict. To offer verification for this extensible world, we propose online verification-validation (OVV), which consists of language and VM design that enables a ``phaseless'' approach to program analysis, in contrast to the standard static-dynamic phase distinction. Phaseless analysis freely interposes abstract interpretation with concrete execution, allowing analyses to use dynamic (concrete) information to prove universal (abstract) properties about future execution. In this paper, we present a conceptual overview of OVV through a motivating example program that uses a hypothetical database library. We present a generic semantics for OVV, and an extension to this semantics that offers a simple gradual type system for the database library primitives. The result of instantiating this gradual type system in an OVV setting is a checker that can progressively type successive continuations of the program until a continuation is fully verified. To evaluate the proposed vision of OVV for this example, we implement the VM semantics (in Rust), and show that this design permits progressive typing in this manner.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Boussaa:2017:ANF, author = "Mohamed Boussaa and Olivier Barais and Benoit Baudry and Gerson Suny{\'e}", title = "Automatic non-functional testing of code generators families", journal = j-SIGPLAN, volume = "52", number = "3", pages = "202--212", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993256", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The intensive use of generative programming techniques provides an elegant engineering solution to deal with the heterogeneity of platforms and technological stacks. The use of domain-specific languages for example, leads to the creation of numerous code generators that automatically translate highlevel system specifications into multi-target executable code. Producing correct and efficient code generator is complex and error-prone. Although software designers provide generally high-level test suites to verify the functional outcome of generated code, it remains challenging and tedious to verify the behavior of produced code in terms of non-functional properties. This paper describes a practical approach based on a runtime monitoring infrastructure to automatically check the potential inefficient code generators. This infrastructure, based on system containers as execution platforms, allows code-generator developers to evaluate the generated code performance. We evaluate our approach by analyzing the performance of Haxe, a popular high-level programming language that involves a set of cross-platform code generators. Experimental results show that our approach is able to detect some performance inconsistencies that reveal real issues in Haxe code generators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '16 conference proceedings.", } @Article{Abadi:2016:TLF, author = "Mart{\'\i}n Abadi", title = "{TensorFlow}: learning functions at scale", journal = j-SIGPLAN, volume = "51", number = "9", pages = "1--1", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2976746", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. Its computational model is based on dataflow graphs with mutable state. Graph nodes may be mapped to different machines in a cluster, and within each machine to CPUs, GPUs, and other devices. TensorFlow supports a variety of applications, but it particularly targets training and inference with deep neural networks. It serves as a platform for research and for deploying machine learning systems across many areas, such as speech recognition, computer vision, robotics, information retrieval, and natural language processing. In this talk, we describe TensorFlow and outline some of its applications. We also discuss the question of what TensorFlow and deep learning may have to do with functional programming. Although TensorFlow is not purely functional, many of its uses are concerned with optimizing functions (during training), then with applying those functions (during inference). These functions are defined as compositions of simple primitives (as is common in functional programming), with internal data representations that are learned rather than manually designed. TensorFlow is joint work with many other people in the Google Brain team and elsewhere. More information is available at tensorflow.org.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Ryu:2016:JFB, author = "Sukyoung Ryu", title = "Journey to find bugs in {JavaScript} web applications in the wild", journal = j-SIGPLAN, volume = "51", number = "9", pages = "2--2", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2976747", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Analyzing real-world JavaScript web applications is a challenging task. On top of understanding the semantics of JavaScript, it requires modeling of web documents, platform objects, and interactions between them. Not only the JavaScript language itself but also its usage patterns are extremely dynamic. JavaScript can generate code and run it during evaluation, and most web applications load JavaScript code dynamically. Such dynamic characteristics of JavaScript web applications make pure static analysis approaches inapplicable. In this talk, we present our attempts to analyze JavaScript web applications in the wild mostly statically using various approaches. From pure JavaScript programs to JavaScript web applications using platform-specific libraries and dynamic code loading, we explain technical challenges in analyzing each of them and how we built an open-source analysis framework for JavaScript, SAFE, that addresses the challenges incrementally. In spite of active research accomplishments in analysis of JavaScript web applications, many issues still remain to be resolved such as events, callback functions, and hybrid web applications. We discuss possible future research directions and open challenges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Licata:2016:FPG, author = "Dan Licata", title = "A functional programmer's guide to homotopy type theory", journal = j-SIGPLAN, volume = "51", number = "9", pages = "3--3", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2976748", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dependent type theories are functional programming languages with types rich enough to do computer-checked mathematics and software verification. Homotopy type theory is a recent area of work that connects dependent type theory to the mathematical disciplines of homotopy theory and higher-dimensional category theory. From a programming point of view, these connections have revealed that all types in dependent type theory support a certain generic program that had not previously been exploited. Specifically, each type can be equipped with computationally relevant witnesses of equality of elements of that type, and all types support a generic program that transports elements along these equalities. One mechanism for equipping types with non-trivial witnesses of equality is Voevodsky's univalence axiom, which implies that equality of types themselves is witnessed by type isomorphism. Another is higher inductive types, an extended datatype schema that allows identifications between different datatype constructors. While these new mechanisms were originally formulated as axiomatic extensions of type theory, recent work has investigated their computational meaning, leading to the development of new programming languages that better support them. In this talk, I will illustrate what univalence and higher inductive types mean in programming terms. I will also discuss how studying some related semantic settings can reveal additional structure on types; for example, moving from groupoids (categories where all maps are invertible) to general categories yields an account of coercions instead of equalities. Overall, I hope to convey some of the beauty and richness of these connections between disciplines, which we are just beginning to understand.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Castro:2016:FPS, author = "David Castro and Kevin Hammond and Susmit Sarkar", title = "Farms, pipes, streams and reforestation: reasoning about structured parallel processes using types and hylomorphisms", journal = j-SIGPLAN, volume = "51", number = "9", pages = "4--17", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951920", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The increasing importance of parallelism has motivated the creation of better abstractions for writing parallel software, including structured parallelism using nested algorithmic skeletons. Such approaches provide high-level abstractions that avoid common problems, such as race conditions, and often allow strong cost models to be defined. However, choosing a combination of algorithmic skeletons that yields good parallel speedups for a program on some specific parallel architecture remains a difficult task. In order to achieve this, it is necessary to simultaneously reason both about the costs of different parallel structures and about the semantic equivalences between them. This paper presents a new type-based mechanism that enables strong static reasoning about these properties. We exploit well-known properties of a very general recursion pattern, hylomorphisms, and give a denotational semantics for structured parallel processes in terms of these hylomorphisms. Using our approach, it is possible to determine formally whether it is possible to introduce a desired parallel structure into a program without altering its functional behaviour, and also to choose a version of that parallel structure that minimises some given cost model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Acar:2016:DCC, author = "Umut A. Acar and Arthur Chargu{\'e}raud and Mike Rainey and Filip Sieczkowski", title = "Dag-calculus: a calculus for parallel computation", journal = j-SIGPLAN, volume = "51", number = "9", pages = "18--32", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951946", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Increasing availability of multicore systems has led to greater focus on the design and implementation of languages for writing parallel programs. Such languages support various abstractions for parallelism, such as fork-join, async-finish, futures. While they may seem similar, these abstractions lead to different semantics, language design and implementation decisions, and can significantly impact the performance of end-user applications. In this paper, we consider the question of whether it would be possible to unify various paradigms of parallel computing. To this end, we propose a calculus, called dag calculus, that can encode fork-join, async-finish, and futures, and possibly others. We describe dag calculus and its semantics, establish translations from the aforementioned paradigms into dag calculus. These translations establish that dag calculus is sufficiently powerful for encoding programs written in prevailing paradigms of parallelism. We present concurrent algorithms and data structures for realizing dag calculus on multicore hardware and prove that the proposed techniques are consistent with the semantics. Finally, we present an implementation of the calculus and evaluate it empirically by comparing its performance to highly optimized code from prior work. The results show that the calculus is expressive and that it competes well with, and sometimes outperforms, the state of the art.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Borgstrom:2016:LCF, author = "Johannes Borgstr{\"o}m and Ugo {Dal Lago} and Andrew D. Gordon and Marcin Szymczak", title = "A lambda-calculus foundation for universal probabilistic programming", journal = j-SIGPLAN, volume = "51", number = "9", pages = "33--46", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951942", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop the operational semantics of an untyped probabilistic \lambda -calculus with continuous distributions, and both hard and soft constraints,as a foundation for universal probabilistic programming languages such as Church, Anglican, and Venture. Our first contribution is to adapt the classic operational semantics of \lambda -calculus to a continuous setting via creating a measure space on terms and defining step-indexed approximations. We prove equivalence of big-step and small-step formulations of this distribution-based semantics. To move closer to inference techniques, we also define the sampling-based semantics of a term as a function from a trace of random samples to a value. We show that the distribution induced by integration over the space of traces equals the distribution-based semantics. Our second contribution is to formalize the implementation technique of trace Markov chain Monte Carlo (MCMC) for our calculus and to show its correctness. A key step is defining sufficient conditions for the distribution induced by trace MCMC to converge to the distribution-based semantics. To the best of our knowledge, this is the first rigorous correctness proof for trace MCMC for a higher-order functional language, or for a language with soft constraints.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Ismail:2016:DPD, author = "Wazim Mohammed Ismail and Chung-chieh Shan", title = "Deriving a probability density calculator (functional pearl)", journal = j-SIGPLAN, volume = "51", number = "9", pages = "47--59", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951922", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Given an expression that denotes a probability distribution, often we want a corresponding density function, to use in probabilistic inference. Fortunately, the task of finding a density has been automated. It turns out that we can derive a compositional procedure for finding a density, by equational reasoning about integrals, starting with the mathematical specification of what a density is. Moreover, the density found can be run as an estimation algorithm, as well as simplified as an exact formula to improve the estimate.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Tan:2016:NVC, author = "Yong Kiam Tan and Magnus O. Myreen and Ramana Kumar and Anthony Fox and Scott Owens and Michael Norrish", title = "A new verified compiler backend for {CakeML}", journal = j-SIGPLAN, volume = "51", number = "9", pages = "60--73", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951924", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We have developed and mechanically verified a new compiler backend for CakeML. Our new compiler features a sequence of intermediate languages that allows it to incrementally compile away high-level features and enables verification at the right levels of semantic detail. In this way, it resembles mainstream (unverified) compilers for strict functional languages. The compiler supports efficient curried multi-argument functions, configurable data representations, exceptions that unwind the call stack, register allocation, and more. The compiler targets several architectures: x86-64, ARMv6, ARMv8, MIPS-64, and RISC-V. In this paper, we present the overall structure of the compiler, including its 12 intermediate languages, and explain how everything fits together. We focus particularly on the interaction between the verification of the register allocator and the garbage collector, and memory representations. The entire development has been carried out within the HOL4 theorem prover.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Downen:2016:SCC, author = "Paul Downen and Luke Maurer and Zena M. Ariola and Simon Peyton Jones", title = "Sequent calculus as a compiler intermediate language", journal = j-SIGPLAN, volume = "51", number = "9", pages = "74--88", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951931", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The $ \lambda $ -calculus is popular as an intermediate language for practical compilers. But in the world of logic it has a lesser-known twin, born at the same time, called the sequent calculus. Perhaps that would make for a good intermediate language, too? To explore this question we designed Sequent Core, a practically-oriented core calculus based on the sequent calculus, and used it to re-implement a substantial chunk of the Glasgow Haskell Compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{OConnor:2016:RTR, author = "Liam O'Connor and Zilin Chen and Christine Rizkallah and Sidney Amani and Japheth Lim and Toby Murray and Yutaka Nagashima and Thomas Sewell and Gerwin Klein", title = "Refinement through restraint: bringing down the cost of verification", journal = j-SIGPLAN, volume = "51", number = "9", pages = "89--102", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951940", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a framework aimed at significantly reducing the cost of verifying certain classes of systems software, such as file systems. Our framework allows for equational reasoning about systems code written in our new language, Cogent. Cogent is a restricted, polymorphic, higher-order, and purely functional language with linear types and without the need for a trusted runtime or garbage collector. Linear types allow us to assign two semantics to the language: one imperative, suitable for efficient C code generation; and one functional, suitable for equational reasoning and verification. As Cogent is a restricted language, it is designed to easily interoperate with existing C functions and to connect to existing C verification frameworks. Our framework is based on certifying compilation: For a well-typed Cogent program, our compiler produces C code, a high-level shallow embedding of its semantics in Isabelle/HOL, and a proof that the C code correctly refines this embedding. Thus one can reason about the full semantics of real-world systems code productively and equationally, while retaining the interoperability and leanness of C. The compiler certificate is a series of language-level proofs and per-program translation validation phases, combined into one coherent top-level theorem in Isabelle/HOL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{New:2016:FAC, author = "Max S. New and William J. Bowman and Amal Ahmed", title = "Fully abstract compilation via universal embedding", journal = j-SIGPLAN, volume = "51", number = "9", pages = "103--116", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951941", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A fully abstract compiler guarantees that two source components are observationally equivalent in the source language if and only if their translations are observationally equivalent in the target. Full abstraction implies the translation is secure: target-language attackers can make no more observations of a compiled component than a source-language attacker interacting with the original source component. Proving full abstraction for realistic compilers is challenging because realistic target languages contain features (such as control effects) unavailable in the source, while proofs of full abstraction require showing that every target context to which a compiled component may be linked can be back-translated to a behaviorally equivalent source context. We prove the first full abstraction result for a translation whose target language contains exceptions, but the source does not. Our translation---specifically, closure conversion of simply typed $ \lambda $-calculus with recursive types---uses types at the target level to ensure that a compiled component is never linked with attackers that have more distinguishing power than source-level attackers. We present a new back-translation technique based on a shallow embedding of the target language into the source language at a dynamic type. Then boundaries are inserted that mediate terms between the untyped embedding and the strongly-typed source. This technique allows back-translating non-terminating programs, target features that are untypeable in the source, and well-bracketed effects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Dimoulas:2016:OLP, author = "Christos Dimoulas and Max S. New and Robert Bruce Findler and Matthias Felleisen", title = "{Oh Lord}, please don't let contracts be misunderstood (functional pearl)", journal = j-SIGPLAN, volume = "51", number = "9", pages = "117--131", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951930", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Contracts feel misunderstood, especially those with a higher-order soul. While software engineers appreciate contracts as tools for articulating the interface between components, functional programmers desperately search for their types and meaning, completely forgetting about their pragmatics. This gem presents a novel analysis of contract systems. Applied to the higher-order kind, this analysis reveals their large and clearly unappreciated software engineering potential. Three sample applications illustrate where this kind of exploration may lead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Cicek:2016:TTI, author = "Ezgi {\c{C}}i{\c{c}}ek and Zoe Paraskevopoulou and Deepak Garg", title = "A type theory for incremental computational complexity with control flow changes", journal = j-SIGPLAN, volume = "51", number = "9", pages = "132--145", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951950", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Incremental computation aims to speed up re-runs of a program after its inputs have been modified slightly. It works by recording a trace of the program's first run and propagating changes through the trace in incremental runs, trying to re-use as much of the original trace as possible. The recent work CostIt is a type and effect system to establish the time complexity of incremental runs of a program, as a function of input changes. However, CostIt is limited in two ways. First, it prohibits input changes that influence control flow. This makes it impossible to type programs that, for instance, branch on inputs that may change. Second, the soundness of CostIt is proved relative to an abstract cost semantics, but it is unclear how the semantics can be realized. In this paper, we address both these limitations. We present DuCostIt, a re-design of CostIt, that combines reasoning about costs of change propagation and costs of from-scratch evaluation. The latter lifts the restriction on control flow changes. To obtain the type system, we refine Flow Caml, a type system for information flow analysis, with cost effects. Additionally, we inherit from CostIt index refinements to track data structure sizes and a co-monadic type. Using a combination of binary and unary step-indexed logical relations, we prove DuCostIt's cost analysis sound relative to not only an abstract cost semantics, but also a concrete semantics, which is obtained by translation to an ML-like language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Takeda:2016:CBE, author = "Kotaro Takeda and Naoki Kobayashi and Kazuya Yaguchi and Ayumi Shinohara", title = "Compact bit encoding schemes for simply-typed lambda-terms", journal = j-SIGPLAN, volume = "51", number = "9", pages = "146--157", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951918", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We consider the problem of how to compactly encode simply-typed \lambda -terms into bit strings. The work has been motivated by Kobayashi et al.'s recent work on higher-order data compression, where data are encoded as functional programs (or, \lambda -terms) that generate them. To exploit its good compression power, the compression scheme has to come with a method for compactly encoding the \lambda -terms into bit strings. To this end, we propose two type-based bit-encoding schemes; the first one encodes a \lambda -term into a sequence of symbols by using type information, and then applies arithmetic coding to convert the sequence to a bit string. The second one is more sophisticated; we prepare a context-free grammar (CFG) that describes only well-typed terms, and then use a variation of arithmetic coding specialized for the CFG. We have implemented both schemes and confirmed that they often output more compact codes than previous bit encoding schemes for \lambda -terms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Mu:2016:QGO, author = "Shin-Cheng Mu and Yu-Hsi Chiang and Yu-Han Lyu", title = "Queueing and glueing for optimal partitioning (functional pearl)", journal = j-SIGPLAN, volume = "51", number = "9", pages = "158--167", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951923", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The queueing-glueing algorithm is the nickname we give to an algorithmic pattern that provides amortised linear time solutions to a number of optimal list partition problems that have a peculiar property: at various moments we know that two of three candidate solutions could be optimal. The algorithm works by keeping a queue of lists, glueing them from one end, while chopping from the other end, hence the name. We give a formal derivation of the algorithm, and demonstrate it with several non-trivial examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Christiansen:2016:ASP, author = "Jan Christiansen and Nikita Danilenko and Sandra Dylus", title = "All sorts of permutations (functional pearl)", journal = j-SIGPLAN, volume = "51", number = "9", pages = "168--179", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951949", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The combination of non-determinism and sorting is mostly associated with permutation sort, a sorting algorithm that is not very useful for sorting and has an awful running time. In this paper we look at the combination of non-determinism and sorting in a different light: given a sorting function, we apply it to a non-deterministic predicate to gain a function that enumerates permutations of the input list. We get to the bottom of necessary properties of the sorting algorithms and predicates in play as well as discuss variations of the modelled non-determinism. On top of that, we formulate and prove a theorem stating that no matter which sorting function we use, the corresponding permutation function enumerates all permutations of the input list. We use free theorems, which are derived from the type of a function alone, to prove the statement.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Serrano:2016:GH, author = "Manuel Serrano and Vincent Prunet", title = "A glimpse of {Hopjs}", journal = j-SIGPLAN, volume = "51", number = "9", pages = "180--192", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951916", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hop.js is a multitier programming environment for JavaScript. It allows a single JavaScript program to describe the client-side and the server-side components of a web application. Its runtime environment ensures consistent executions of the application on the server and on the client. This paper overviews the Hop.js design. It shows the JavaScript extensions that makes it possible to conceive web applications globally. It presents how Hop.js interacts with the outside world. It also briefly presents the Hop.js implementation. It presents the Hop.js web server implementation, the handling of server-side parallelism, and the JavaScript and HTML compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Sergey:2016:ERG, author = "Ilya Sergey", title = "Experience report: growing and shrinking polygons for random testing of computational geometry algorithms", journal = j-SIGPLAN, volume = "51", number = "9", pages = "193--199", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951927", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper documents our experience of adapting and using the QuickCheck-style approach for extensive randomised property-based testing of computational geometry algorithms. The need in rigorous evaluation of computational geometry procedures has naturally arisen in our quest of organising a medium-size programming contest for second year university students-an experiment we conducted as an attempt to introduce them to computational geometry. The main effort in organising the event was implementation of a solid infrastructure for testing and ranking solutions. For this, we employed functional programming techniques. The choice of the language and the paradigm made it possible for us to engineer, from scratch and in a very short period of time, a series of robust geometric primitives and algorithms, as well as implement a scalable framework for their randomised testing. We describe the main insights, enabling efficient random testing of geometric procedures, and report on our experience of using the testing framework, which helped us to detect and fix a number of issues not just in our programming artefacts, but also in the published algorithms we had implemented.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Emoto:2016:TLV, author = "Kento Emoto and Kiminori Matsuzaki and Zhenjiang Hu and Akimasa Morihata and Hideya Iwasaki", title = "Think like a vertex, behave like a function! {A} functional {DSL} for vertex-centric big graph processing", journal = j-SIGPLAN, volume = "51", number = "9", pages = "200--213", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951938", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The vertex-centric programming model, known as ``think like a vertex'', is being used more and more to support various big graph processing methods through iterative supersteps that execute in parallel a user-defined vertex program over each vertex of a graph. However, the imperative and message-passing style of existing systems makes defining a vertex program unintuitive. In this paper, we show that one can benefit more from ``Thinking like a vertex'' by ``Behaving like a function'' rather than ``Acting like a procedure'' with full use of side effects and explicit control of message passing, state, and termination. We propose a functional approach to vertex-centric graph processing in which the computation at every vertex is abstracted as a higher-order function and present Fregel, a new domain-specific language. Fregel has clear functional semantics, supports declarative description of vertex computation, and can be automatically translated into Pregel, an emerging imperative-style distributed graph processing framework, and thereby achieve promising performance. Experimental results for several typical examples show the promise of this functional approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Arntzenius:2016:DFD, author = "Michael Arntzenius and Neelakantan R. Krishnaswami", title = "{Datafun}: a functional {Datalog}", journal = j-SIGPLAN, volume = "51", number = "9", pages = "214--227", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951948", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Datalog may be considered either an unusually powerful query language or a carefully limited logic programming language. Datalog is declarative, expressive, and optimizable, and has been applied successfully in a wide variety of problem domains. However, most use-cases require extending Datalog in an application-specific manner. In this paper we define Datafun, an analogue of Datalog supporting higher-order functional programming. The key idea is to track monotonicity with types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Seidel:2016:DWS, author = "Eric L. Seidel and Ranjit Jhala and Westley Weimer", title = "Dynamic witnesses for static type errors (or, ill-typed programs usually go wrong)", journal = j-SIGPLAN, volume = "51", number = "9", pages = "228--242", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951915", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Static type errors are a common stumbling block for newcomers to typed functional languages. We present a dynamic approach to explaining type errors by generating counterexample witness inputs that illustrate how an ill-typed program goes wrong. First, given an ill-typed function, we symbolically execute the body to synthesize witness values that make the program go wrong. We prove that our procedure synthesizes general witnesses in that if a witness is found, then for all inhabited input types, there exist values that can make the function go wrong. Second, we show how to extend the above procedure to produce a reduction graph that can be used to interactively visualize and debug witness executions. Third, we evaluate the coverage of our approach on two data sets comprising over 4,500 ill-typed student programs. Our technique is able to generate witnesses for 88\% of the programs, and our reduction graph yields small counterexamples for 81\% of the witnesses. Finally, we evaluate whether our witnesses help students understand and fix type errors, and find that students presented with our witnesses show a greater understanding of type errors than those presented with a standard error message.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Watanabe:2016:ADF, author = "Keiichi Watanabe and Ryosuke Sato and Takeshi Tsukada and Naoki Kobayashi", title = "Automatically disproving fair termination of higher-order functional programs", journal = j-SIGPLAN, volume = "51", number = "9", pages = "243--255", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951919", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose an automated method for disproving fair termination of higher-order functional programs, which is complementary to Murase et al.'s recent method for proving fair termination. A program is said to be fair terminating if it has no infinite execution trace that satisfies a given fairness constraint. Fair termination is an important property because program verification problems for arbitrary \omega -regular temporal properties can be transformed to those of fair termination. Our method reduces the problem of disproving fair termination to higher-order model checking by using predicate abstraction and CEGAR. Given a program, we convert it to an abstract program that generates an approximation of the (possibly infinite) execution traces of the original program, so that the original program has a fair infinite execution trace if the tree generated by the abstract program satisfies a certain property. The method is a non-trivial extension of Kuwahara et al.'s method for disproving plain termination.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Jung:2016:HOG, author = "Ralf Jung and Robbert Krebbers and Lars Birkedal and Derek Dreyer", title = "Higher-order ghost state", journal = j-SIGPLAN, volume = "51", number = "9", pages = "256--269", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951943", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The development of concurrent separation logic (CSL) has sparked a long line of work on modular verification of sophisticated concurrent programs. Two of the most important features supported by several existing extensions to CSL are higher-order quantification and custom ghost state. However, none of the logics that support both of these features reap the full potential of their combination. In particular, none of them provide general support for a feature we dub ``higher-order ghost state'': the ability to store arbitrary higher-order separation-logic predicates in ghost variables. In this paper, we propose higher-order ghost state as a interesting and useful extension to CSL, which we formalize in the framework of Jung et al.'s recently developed Iris logic. To justify its soundness, we develop a novel algebraic structure called CMRAs (``cameras''), which can be thought of as ``step-indexed partial commutative monoids''. Finally, we show that Iris proofs utilizing higher-order ghost state can be effectively formalized in Coq, and discuss the challenges we faced in formalizing them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Cockx:2016:UEP, author = "Jesper Cockx and Dominique Devriese and Frank Piessens", title = "Unifiers as equivalences: proof-relevant unification of dependently typed data", journal = j-SIGPLAN, volume = "51", number = "9", pages = "270--283", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951917", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dependently typed languages such as Agda, Coq and Idris use a syntactic first-order unification algorithm to check definitions by dependent pattern matching. However, these algorithms don't adequately consider the types of the terms being unified, leading to various unintended results. As a consequence, they require ad hoc restrictions to preserve soundness, but this makes them very hard to prove correct, modify, or extend. This paper proposes a framework for reasoning formally about unification in a dependently typed setting. In this framework, unification rules compute not just a unifier but also a corresponding correctness proof in the form of an equivalence between two sets of equations. By rephrasing the standard unification rules in a proof-relevant manner, they are guaranteed to preserve soundness of the theory. In addition, it enables us to safely add new rules that can exploit the dependencies between the types of equations. Using our framework, we reimplemented the unification algorithm used by Agda. As a result, we were able to replace previous ad hoc restrictions with formally verified unification rules, fixing a number of bugs in the process. We are convinced this will also enable the addition of new and interesting unification rules in the future, without compromising soundness along the way.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Christiansen:2016:ERE, author = "David Christiansen and Edwin Brady", title = "Elaborator reflection: extending {Idris} in {Idris}", journal = j-SIGPLAN, volume = "51", number = "9", pages = "284--297", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951932", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many programming languages and proof assistants are defined by elaboration from a high-level language with a great deal of implicit information to a highly explicit core language. In many advanced languages, these elaboration facilities contain powerful tools for program construction, but these tools are rarely designed to be repurposed by users. We describe elaborator reflection, a paradigm for metaprogramming in which the elaboration machinery is made directly available to metaprograms, as well as a concrete realization of elaborator reflection in Idris, a functional language with full dependent types. We demonstrate the applicability of Idris's reflected elaboration framework to a number of realistic problems, we discuss the motivation for the specific features of its design, and we explore the broader meaning of elaborator reflection as it can relate to other languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Dagand:2016:PTE, author = "Pierre-Evariste Dagand and Nicolas Tabareau and {\'E}ric Tanter", title = "Partial type equivalences for verified dependent interoperability", journal = j-SIGPLAN, volume = "51", number = "9", pages = "298--310", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951933", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Full-spectrum dependent types promise to enable the development of correct-by-construction software. However, even certified software needs to interact with simply-typed or untyped programs, be it to perform system calls, or to use legacy libraries. Trading static guarantees for runtime checks, the dependent interoperability framework provides a mechanism by which simply-typed values can safely be coerced to dependent types and, conversely, dependently-typed programs can defensively be exported to a simply-typed application. In this paper, we give a semantic account of dependent interoperability. Our presentation relies on and is guided by a pervading notion of type equivalence, whose importance has been emphasized in recent work on homotopy type theory. Specifically, we develop the notion of partial type equivalences as a key foundation for dependent interoperability. Our framework is developed in Coq; it is thus constructive and verified in the strictest sense of the terms. Using our library, users can specify domain-specific partial equivalences between data structures. Our library then takes care of the (sometimes, heavy) lifting that leads to interoperable programs. It thus becomes possible, as we shall illustrate, to internalize and hand-tune the extraction of dependently-typed programs to interoperable OCaml programs within Coq itself.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Darais:2016:CGC, author = "David Darais and David {Van Horn}", title = "Constructive {Galois} connections: taming the {Galois} connection framework for mechanized metatheory", journal = j-SIGPLAN, volume = "51", number = "9", pages = "311--324", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951934", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Galois connections are a foundational tool for structuring abstraction in semantics and their use lies at the heart of the theory of abstract interpretation. Yet, mechanization of Galois connections remains limited to restricted modes of use, preventing their general application in mechanized metatheory and certified programming. This paper presents constructive Galois connections, a variant of Galois connections that is effective both on paper and in proof assistants; is complete with respect to a large subset of classical Galois connections; and enables more general reasoning principles, including the ``calculational'' style advocated by Cousot. To design constructive Galois connection we identify a restricted mode of use of classical ones which is both general and amenable to mechanization in dependently-typed functional programming languages. Crucial to our metatheory is the addition of monadic structure to Galois connections to control a ``specification effect''. Effectful calculations may reason classically, while pure calculations have extractable computational content. Explicitly moving between the worlds of specification and implementation is enabled by our metatheory. To validate our approach, we provide two case studies in mechanizing existing proofs from the literature: one uses calculational abstract interpretation to design a static analyzer, the other forms a semantic basis for gradual typing. Both mechanized proofs closely follow their original paper-and-pencil counterparts, employ reasoning principles not captured by previous mechanization approaches, support the extraction of verified algorithms, and are novel.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Blazy:2016:AMF, author = "Sandrine Blazy and Vincent Laporte and David Pichardie", title = "An abstract memory functor for verified {C} static analyzers", journal = j-SIGPLAN, volume = "51", number = "9", pages = "325--337", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951937", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Abstract interpretation provides advanced techniques to infer numerical invariants on programs. There is an abundant literature about numerical abstract domains that operate on scalar variables. This work deals with lifting these techniques to a realistic C memory model. We present an abstract memory functor that takes as argument any standard numerical abstract domain, and builds a memory abstract domain that finely tracks properties about memory contents, taking into account union types, pointer arithmetic and type casts. This functor is implemented and verified inside the Coq proof assistant with respect to the CompCert compiler memory model. Using the Coq extraction mechanism, it is fully executable and used by the Verasco C static analyzer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{McDonell:2016:GTS, author = "Trevor L. McDonell and Timothy A. K. Zakian and Matteo Cimini and Ryan R. Newton", title = "Ghostbuster: a tool for simplifying and converting {GADTs}", journal = j-SIGPLAN, volume = "51", number = "9", pages = "338--350", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951914", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Generalized Algebraic Dataypes, or simply GADTs, can encode non-trivial properties in the types of the constructors. Once such properties are encoded in a datatype, however, all code manipulating that datatype must provide proof that it maintains these properties in order to typecheck. In this paper, we take a step towards gradualizing these obligations. We introduce a tool, Ghostbuster, that produces simplified versions of GADTs which elide selected type parameters, thereby weakening the guarantees of the simplified datatype in exchange for reducing the obligations necessary to manipulate it. Like ornaments, these simplified datatypes preserve the recursive structure of the original, but unlike ornaments we focus on information-preserving bidirectional transformations. Ghostbuster generates type-safe conversion functions between the original and simplified datatypes, which we prove are the identity function when composed. We evaluate a prototype tool for Haskell against thousands of GADTs found on the Hackage package database, generating simpler Haskell'98 datatypes and round-trip conversion functions between the two.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Thibodeau:2016:ICT, author = "David Thibodeau and Andrew Cave and Brigitte Pientka", title = "Indexed codata types", journal = j-SIGPLAN, volume = "51", number = "9", pages = "351--363", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951929", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Indexed data types allow us to specify and verify many interesting invariants about finite data in a general purpose programming language. In this paper we investigate the dual idea: indexed codata types, which allow us to describe data-dependencies about infinite data structures. Unlike finite data which is defined by constructors, we define infinite data by observations. Dual to pattern matching on indexed data which may refine the type indices, we define copattern matching on indexed codata where type indices guard observations we can make. Our key technical contributions are three-fold: first, we extend Levy's call-by-push value language with support for indexed (co)data and deep (co)pattern matching; second, we provide a clean foundation for dependent (co)pattern matching using equality constraints; third, we describe a small-step semantics using a continuation-based abstract machine, define coverage for indexed (co)patterns, and prove type safety. This is an important step towards building a foundation where (co)data type definitions and dependent types can coexist.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Oliveira:2016:DIT, author = "Bruno C. d. S. Oliveira and Zhiyuan Shi and Jo{\~a}o Alpuim", title = "Disjoint intersection types", journal = j-SIGPLAN, volume = "51", number = "9", pages = "364--377", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951945", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dunfield showed that a simply typed core calculus with intersection types and a merge operator is able to capture various programming language features. While his calculus is type-safe, it is not coherent: different derivations for the same expression can elaborate to expressions that evaluate to different values. The lack of coherence is an important disadvantage for adoption of his core calculus in implementations of programming languages, as the semantics of the programming language becomes implementation-dependent. This paper presents \lambda _i: a coherent and type-safe calculus with a form of intersection types and a merge operator. Coherence is achieved by ensuring that intersection types are disjoint and programs are sufficiently annotated to avoid type ambiguity. We propose a definition of disjointness where two types A and B are disjoint only if certain set of types are common supertypes of A and B. We investigate three different variants of \lambda _i, with three variants of disjointness. In the simplest variant, which does not allow \top types, two types are disjoint if they do not share any common supertypes at all. The other two variants introduce \top types and refine the notion of disjointness to allow two types to be disjoint when the only the set of common supertypes are top-like. The difference between the two variants with \top types is on the definition of top-like types, which has an impact on which types are allowed on intersections. We present a type system that prevents intersection types that are not disjoint, as well as an algorithmic specifications to determine whether two types are disjoint for all three variants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Castagna:2016:STT, author = "Giuseppe Castagna and Tommaso Petrucciani and Kim Nguy{\~{\^e}}n", title = "Set-theoretic types for polymorphic variants", journal = j-SIGPLAN, volume = "51", number = "9", pages = "378--391", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951928", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Polymorphic variants are a useful feature of the OCaml language whose current definition and implementation rely on kinding constraints to simulate a subtyping relation via unification. This yields an awkward formalization and results in a type system whose behaviour is in some cases unintuitive and/or unduly restrictive. In this work, we present an alternative formalization of polymorphic variants, based on set-theoretic types and subtyping, that yields a cleaner and more streamlined system. Our formalization is more expressive than the current one (it types more programs while preserving type safety), it can internalize some meta-theoretic properties, and it removes some pathological cases of the current implementation resulting in a more intuitive and, thus, predictable type system. More generally, this work shows how to add full-fledged union types to functional languages of the ML family that usually rely on the Hindley-Milner type system. As an aside, our system also improves the theory of semantic subtyping, notably by proving completeness for the type reconstruction algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Raghunathan:2016:HMM, author = "Ram Raghunathan and Stefan K. Muller and Umut A. Acar and Guy Blelloch", title = "Hierarchical memory management for parallel programs", journal = j-SIGPLAN, volume = "51", number = "9", pages = "392--406", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951935", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An important feature of functional programs is that they are parallel by default. Implementing an efficient parallel functional language, however, is a major challenge, in part because the high rate of allocation and freeing associated with functional programs requires an efficient and scalable memory manager. In this paper, we present a technique for parallel memory management for strict functional languages with nested parallelism. At the highest level of abstraction, the approach consists of a technique to organize memory as a hierarchy of heaps, and an algorithm for performing automatic memory reclamation by taking advantage of a disentanglement property of parallel functional programs. More specifically, the idea is to assign to each parallel task its own heap in memory and organize the heaps in a hierarchy/tree that mirrors the hierarchy of tasks. We present a nested-parallel calculus that specifies hierarchical heaps and prove in this calculus a disentanglement property, which prohibits a task from accessing objects allocated by another task that might execute in parallel. Leveraging the disentanglement property, we present a garbage collection technique that can operate on any subtree in the memory hierarchy concurrently as other tasks (and/or other collections) proceed in parallel. We prove the safety of this collector by formalizing it in the context of our parallel calculus. In addition, we describe how the proposed techniques can be implemented on modern shared-memory machines and present a prototype implementation as an extension to MLton, a high-performance compiler for the Standard ML language. Finally, we evaluate the performance of this implementation on a number of parallel benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Gilray:2016:ACP, author = "Thomas Gilray and Michael D. Adams and Matthew Might", title = "Allocation characterizes polyvariance: a unified methodology for polyvariant control-flow analysis", journal = j-SIGPLAN, volume = "51", number = "9", pages = "407--420", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951936", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The polyvariance of a static analysis is the degree to which it structurally differentiates approximations of program values. Polyvariant techniques come in a number of different flavors that represent alternative heuristics for managing the trade-off an analysis strikes between precision and complexity. For example, call sensitivity supposes that values will tend to correlate with recent call sites, object sensitivity supposes that values will correlate with the allocation points of related objects, the Cartesian product algorithm supposes correlations between the values of arguments to the same function, and so forth. In this paper, we describe a unified methodology for implementing and understanding polyvariance in a higher-order setting (i.e., for control-flow analyses). We do this by extending the method of abstracting abstract machines (AAM), a systematic approach to producing an abstract interpretation of abstract-machine semantics. AAM eliminates recursion within a language's semantics by passing around an explicit store, and thus places importance on the strategy an analysis uses for allocating abstract addresses within the abstract heap or store. We build on AAM by showing that the design space of possible abstract allocators exactly and uniquely corresponds to the design space of polyvariant strategies. This allows us to both unify and generalize polyvariance as tunings of a single function. Changes to the behavior of this function easily recapitulate classic styles of analysis and produce novel variations, combinations of techniques, and fundamentally new techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Ueno:2016:FCG, author = "Katsuhiro Ueno and Atsushi Ohori", title = "A fully concurrent garbage collector for functional programs on multicore processors", journal = j-SIGPLAN, volume = "51", number = "9", pages = "421--433", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951944", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a concurrent garbage collection method for functional programs running on a multicore processor. It is a concurrent extension of our bitmap-marking non-moving collector with Yuasa's snapshot-at-the-beginning strategy. Our collector is unobtrusive in the sense of the Doligez-Leroy-Gonthier collector; the collector does not stop any mutator thread nor does it force them to synchronize globally. The only critical sections between a mutator and the collector are the code to enqueue/dequeue a 32 kB allocation segment to/from a global segment list and the write barrier code to push an object pointer onto the collector's stack. Most of these data structures can be implemented in standard lock-free data structures. This achieves both efficient allocation and unobtrusive collection in a multicore system. The proposed method has been implemented in SML\#, a full-scale Standard ML compiler supporting multiple native threads on multicore CPUs. Our benchmark tests show a drastically short pause time with reasonably low overhead compared to the sequential bitmap-marking collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Lindley:2016:TBS, author = "Sam Lindley and J. Garrett Morris", title = "Talking bananas: structural recursion for session types", journal = j-SIGPLAN, volume = "51", number = "9", pages = "434--447", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951921", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Session types provide static guarantees that concurrent programs respect communication protocols. We give a novel account of recursive session types in the context of GV, a small concurrent extension of the linear \lambda -calculus. We extend GV with recursive types and catamorphisms, following the initial algebra semantics of recursion, and show that doing so naturally gives rise to recursive session types. We show that this principled approach to recursion resolves long-standing problems in the treatment of duality for recursive session types. We characterize the expressiveness of GV concurrency by giving a CPS translation to (non-concurrent) \lambda -calculus and proving that reduction in GV is simulated by full reduction in \lambda -calculus. This shows that GV remains terminating in the presence of positive recursive types, and that such arguments extend to other extensions of GV, such as polymorphism or non-linear types, by appeal to normalization results for sequential \lambda -calculi. We also show that GV remains deadlock free and deterministic in the presence of recursive types. Finally, we extend CP, a session-typed process calculus based on linear logic, with recursive types, and show that doing so preserves the connection between reduction in GV and cut elimination in CP.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Morris:2016:BBW, author = "J. Garrett Morris", title = "The best of both worlds: linear functional programming without compromise", journal = j-SIGPLAN, volume = "51", number = "9", pages = "448--461", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951925", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a linear functional calculus with both the safety guarantees expressible with linear types and the rich language of combinators and composition provided by functional programming. Unlike previous combinations of linear typing and functional programming, we compromise neither the linear side (for example, our linear values are first-class citizens of the language) nor the functional side (for example, we do not require duplicate definitions of compositions for linear and unrestricted functions). To do so, we must generalize abstraction and application to encompass both linear and unrestricted functions. We capture the typing of the generalized constructs with a novel use of qualified types. Our system maintains the metatheoretic properties of the theory of qualified types, including principal types and decidable type inference. Finally, we give a formal basis for our claims of expressiveness, by showing that evaluation respects linearity, and that our language is a conservative extension of existing functional calculi.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Thiemann:2016:CFS, author = "Peter Thiemann and Vasco T. Vasconcelos", title = "Context-free session types", journal = j-SIGPLAN, volume = "51", number = "9", pages = "462--475", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951926", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Session types describe structured communication on heterogeneously typed channels at a high level. Their tail-recursive structure imposes a protocol that can be described by a regular language. The types of transmitted values are drawn from the underlying functional language, abstracting from the details of serializing values of structured data types. Context-free session types extend session types by allowing nested protocols that are not restricted to tail recursion. Nested protocols correspond to deterministic context-free languages. Such protocols are interesting in their own right, but they are particularly suited to describe the low-level serialization of tree-structured data in a type-safe way. We establish the metatheory of context-free session types, prove that they properly generalize standard (two-party) session types, and take first steps towards type checking by showing that type equivalence is decidable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Gaboardi:2016:CEC, author = "Marco Gaboardi and Shin-ya Katsumata and Dominic Orchard and Flavien Breuvart and Tarmo Uustalu", title = "Combining effects and coeffects via grading", journal = j-SIGPLAN, volume = "51", number = "9", pages = "476--489", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951939", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effects and coeffects are two general, complementary aspects of program behaviour. They roughly correspond to computations which change the execution context (effects) versus computations which make demands on the context (coeffects). Effectful features include partiality, non-determinism, input-output, state, and exceptions. Coeffectful features include resource demands, variable access, notions of linearity, and data input requirements. The effectful or coeffectful behaviour of a program can be captured and described via type-based analyses, with fine grained information provided by monoidal effect annotations and semiring coeffects. Various recent work has proposed models for such typed calculi in terms of graded (strong) monads for effects and graded (monoidal) comonads for coeffects. Effects and coeffects have been studied separately so far, but in practice many computations are both effectful and coeffectful, e.g., possibly throwing exceptions but with resource requirements. To remedy this, we introduce a new general calculus with a combined effect-coeffect system. This can describe both the changes and requirements that a program has on its context, as well as interactions between these effectful and coeffectful features of computation. The effect-coeffect system has a denotational model in terms of effect-graded monads and coeffect-graded comonads where interaction is expressed via the novel concept of graded distributive laws. This graded semantics unifies the syntactic type theory with the denotational model. We show that our calculus can be instantiated to describe in a natural way various different kinds of interaction between a program and its evaluation context.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Pirog:2016:SDF, author = "Maciej Pir{\'o}g and Nicolas Wu", title = "String diagrams for free monads (functional pearl)", journal = j-SIGPLAN, volume = "51", number = "9", pages = "490--501", month = sep, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022670.2951947", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show how one can reason about free monads using their universal properties rather than any concrete implementation. We introduce a graphical, two-dimensional calculus tailor-made to accommodate these properties.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ICFP '16 conference proceedings.", } @Article{Wade:2017:AVJ, author = "April W. Wade and Prasad A. Kulkarni and Michael R. Jantz", title = "{AOT} vs. {JIT}: impact of profile data on code quality", journal = j-SIGPLAN, volume = "52", number = "4", pages = "1--10", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081037", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Just-in-time (JIT) compilation during program execution and ahead-of-time (AOT) compilation during software installation are alternate techniques used by managed language virtual machines (VM) to generate optimized native code while simultaneously achieving binary code portability and high execution performance. Profile data collected by JIT compilers at run-time can enable profile-guided optimizations (PGO) to customize the generated native code to different program inputs. AOT compilation removes the speed and energy overhead of online profile collection and dynamic compilation, but may not be able to achieve the quality and performance of customized native code. The goal of this work is to investigate and quantify the implications of the AOT compilation model on the quality of the generated native code for current VMs. First, we quantify the quality of native code generated by the two compilation models for a state-of-the-art (HotSpot) Java VM. Second, we determine how the amount of profile data collected affects the quality of generated code. Third, we develop a mechanism to determine the accuracy or similarity for different profile data for a given program run, and investigate how the accuracy of profile data affects its ability to effectively guide PGOs. Finally, we categorize the profile data types in our VM and explore the contribution of each such category to performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Taylor:2017:AOO, author = "Ben Taylor and Vicent Sanz Marco and Zheng Wang", title = "Adaptive optimization for {OpenCL} programs on embedded heterogeneous systems", journal = j-SIGPLAN, volume = "52", number = "4", pages = "11--20", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081040", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous multi-core architectures consisting of CPUs and GPUs are commonplace in today's embedded systems. These architectures offer potential for energy efficient computing if the application task is mapped to the right core. Realizing such potential is challenging due to the complex and evolving nature of hardware and applications. This paper presents an automatic approach to map OpenCL kernels onto heterogeneous multi-cores for a given optimization criterion --- whether it is faster runtime, lower energy consumption or a trade-off between them. This is achieved by developing a machine learning based approach to predict which processor to use to run the OpenCL kernel and the host program, and at what frequency the processor should operate. Instead of hand-tuning a model for each optimization metric, we use machine learning to develop a unified framework that first automatically learns the optimization heuristic for each metric off-line, then uses the learned knowledge to schedule OpenCL kernels at runtime based on code and runtime information of the program. We apply our approach to a set of representative OpenCL benchmarks and evaluate it on an ARM big.LITTLE mobile platform. Our approach achieves over 93\% of the performance delivered by a perfect predictor.We obtain, on average, 1.2x, 1.6x, and 1.8x improvement respectively for runtime, energy consumption and the energy delay product when compared to a comparative heterogeneous-aware OpenCL task mapping scheme.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Reiche:2017:AVI, author = "Oliver Reiche and Christof Kobylko and Frank Hannig and J{\"u}rgen Teich", title = "Auto-vectorization for image processing {DSLs}", journal = j-SIGPLAN, volume = "52", number = "4", pages = "21--30", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081039", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The parallelization of programs and distributing their workloads to multiple threads can be a challenging task. In addition to multi-threading, harnessing vector units in CPUs proves highly desirable. However, employing vector units to speed up programs can be quite tedious. Either a program developer solely relies on the auto-vectorization capabilities of the compiler or he manually applies vector intrinsics, which is extremely error-prone, difficult to maintain, and not portable at all. Based on whole-function vectorization, a method to replace control flow with data flow, we propose auto-vectorization techniques for image processing DSLs in the context of source-to-source compilation. The approach does not require the input to be available in SSA form. Moreover, we formulate constraints under which the vectorization analysis and code transformations may be greatly simplified in the context of image processing DSLs. As part of our methodology, we present control flow to data flow transformation as a source-to-source translation. Moreover, we propose a method to efficiently analyze algorithms with mixed bit-width data types to determine the optimal SIMD width, independently of the target instruction set. The techniques are integrated into an open source DSL framework. Subsequently, the vectorization capabilities are compared to a variety of existing state-of-the-art C/C++ compilers. A geometric mean speedup of up to 3.14 is observed for benchmarks taken from ISPC and image processing, compared to non-vectorized executions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Fu:2017:DTS, author = "Sheng-Yu Fu and Ding-Yong Hong and Yu-Ping Liu and Jan-Jan Wu and Wei-Chung Hsu", title = "Dynamic translation of structured Loads\slash Stores and register mapping for architectures with {SIMD} extensions", journal = j-SIGPLAN, volume = "52", number = "4", pages = "31--40", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081029", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "More and more modern processors have been supporting non-contiguous SIMD data accesses. However, translating such instructions has been overlooked in the Dynamic Binary Translation (DBT) area. For example, in the popular QEMU dynamic binary translator, guest memory instructions with strides are emulated by a sequence of scalar instructions, leaving a significant room for performance improvement when the host machines have SIMD instructions available. Structured loads/stores, such as VLDn/VSTn in ARM NEON, are one type of strided SIMD data access instructions. They are widely used in signal processing, multimedia, mathematical and 2D matrix transposition applications. Efficient translation of such structured loads/stores is a critical issue when migrating ARM executables to other ISAs. However, it is quite challenging since not only the translation of structured loads/stores is not trivial, but also the difference between guest and host register configurations must be taken into consideration. In this work, we present the design and implementation of translating structured loads/stores in DBT, including target code generation as well as efficient SIMD register mapping. Our proposed register mapping mechanisms are not limited to handling structured loads/stores, they can be extended to deal with normal SIMD instructions. On a set of OpenCV benchmarks, our QEMU-based system has achieved a maximum speedup of 5.41x, with an average improvement of 2.93x. On a set of BLAS benchmarks, our system has also obtained a maximum speedup of 2.19x and an average improvement of 1.63x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Jiang:2017:OFU, author = "Weiwen Jiang and Edwin H.-M. Sha and Qingfeng Zhuge and Hailiang Dong and Xianzhang Chen", title = "Optimal functional unit assignment and voltage selection for pipelined {MPSoC} with guaranteed probability on time performance", journal = j-SIGPLAN, volume = "52", number = "4", pages = "41--50", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081036", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Pipelined heterogeneous multiprocessor system-on-chip (MPSoC) can provide high throughput for streaming applications. In the design of such systems, time performance and system cost are the most concerning issues. By analyzing runtime behaviors of benchmarks in real-world platforms, we find that execution times of tasks are not fixed but spread with probabilities. In terms of this feature, we model execution times of tasks as random variables. In this paper, we study how to design high-performance and low-cost MPSoC systems to execute a set of such tasks with data dependencies in a pipelined fashion. Our objective is to obtain the optimal functional unit assignment and voltage selection for the pipelined MPSoC systems, such that the system cost is minimized while timing constraints can be met with a given guaranteed probability. For each required probability, our proposed algorithm can efficiently obtain the optimal solution. Experiments show that other existing algorithms cannot find feasible solutions in most cases, but ours can. Even for those solutions that other algorithms can obtain, ours can reach 30\% reductions in total cost compared with others.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Lee:2017:IIP, author = "Gyeongmin Lee and Seonyeong Heo and Bongjun Kim and Jong Kim and Hanjun Kim", title = "Integrated {IoT} programming with selective abstraction", journal = j-SIGPLAN, volume = "52", number = "4", pages = "51--60", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081031", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The explosion of networked devices has driven a new computing environment called the Internet of Things (IoT), enabling various services such as home automation and health monitoring. Despite the promising applicability of the IoT, developing an IoT service is challenging for programmers, because the programmers should integrate multiple programmable devices and heterogeneous third-party devices. Recent works have proposed integrated programming platforms, but they either require device-specific implementation for third-party devices without any device abstraction, or abstract all the devices to the standard interfaces requiring unnecessary abstraction of programmable devices. To integrate IoT devices with selective abstraction, this work revisits the object oriented programming (OOP) model, and proposes a new language extension and its compiler-runtime framework, called Esperanto. With three annotations that map each object to its corresponding IoT device, the Esperanto language allows programmers to integrate multiple programmable devices into one OOP program and to abstract similar third-party devices into their common ancestor classes. Given the annotations, the Esperanto compiler automatically partitions the integrated program into multiple sub-programs for each programmable IoT device, and inserts communication and synchronization code. Moreover, for the ancestor classes, the Esperanto runtime dynamically identifies connected third-party devices, and links their corresponding descendent objects. Compared to an existing approach on the integrated IoT programming, Esperanto requires 33.3\% fewer lines of code to implement 5 IoT services, and reduces their response time by 44.8\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Zhang:2017:TSB, author = "Min Zhang and Yunhui Ying", title = "Towards {SMT-based} {LTL} model checking of clock constraint specification language for real-time and embedded systems", journal = j-SIGPLAN, volume = "52", number = "4", pages = "61--70", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081035", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Clock Constraint Specification Language (CCSL) is a formal language companion to MARTE (shorthand for Modeling and Analysis of Real-Time and Embedded systems), a UML profile used to facilitate the design and analysis of real-time and embedded systems. CCSL is proposed to specify constraints on the occurrences of events in systems. However, the language lacks efficient verification support to formally analyze temporal properties, which are important properties to real-time and embedded systems. In this paper, we propose an SMT-based approach to model checking of the temporal properties specified in Linear Temporal Logic (LTL) for CCSL by transforming CCSL constraints and LTL formulas into SMT formulas. We implement a prototype tool for the proposed approach and use the state-of-the-art tool Z3 as its underlying SMT solver. We model two practical real-time and embedded systems, i.e., a traffic light controller and a power window system in CCSL , and model check LTL properties of them using the proposed approach. Experimental results demonstrate the effectiveness and efficiency of our approach.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Zheng:2017:ITS, author = "Wenguang Zheng and Hui Wu and Chuanyao Nie", title = "Integrating task scheduling and cache locking for multicore real-time embedded systems", journal = j-SIGPLAN, volume = "52", number = "4", pages = "71--80", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081033", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern embedded processors provide hardware support for cache locking, a mechanism used to facilitate the WCET (Worst-Case Execution Time) calculation of a task. We investigate the problem of integrating task scheduling and cache locking for a set of preemptible tasks with individual release times and deadlines on a multi-core processor with two-level caches. We propose a novel integrated approach that schedules the task set and allocates the locked cache contents of each task to the local caches (L1 caches) and the level-two cache (L2 cache). Our approach consists of three major components, the task scheduler, the L1 cache allocator, and the L2 cache allocator. The task scheduler aims at minimizing the number of task preemptions. The L1 cache allocator converts the interference graph of all the tasks scheduled on each core into a DAG by considering the preemptions between tasks and allocates the L1 cache space to each task. The L2 cache allocator converts the interference graph of all the tasks into a DAG by using a k-longest-path-based graph orientation algorithm and allocates the L2 cache space to each task. Both cache allocators significantly improve the cache utilization for all the caches due to the efficient use of the interference graphs of tasks. We have implemented our approach and compared it with the extended version of the preemption tree-based approach and the static analysis approach without cache locking by using a set of benchmarks from the MRTC WCET benchmark suite and SNU real-time benchmarks. Compared to the extended version of the preemption tree-based approach, the maximum WCRT (Worst Case Response Time) improvement of our approach is 15\%. Compared to the static analysis approach, the maximum WCRT improvement of our approach is 37\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Wang:2017:TME, author = "Yi Wang and Mingxu Zhang and Jing Yang", title = "Towards memory-efficient processing-in-memory architecture for convolutional neural networks", journal = j-SIGPLAN, volume = "52", number = "4", pages = "81--90", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081032", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Convolutional neural networks (CNNs) are widely adopted in artificial intelligent systems. In contrast to conventional computing centric applications, the computational and memory resources of CNN applications are mixed together in the network weights. This incurs a significant amount of data movement, especially for highdimensional convolutions. Although recent embedded 3D-stacked Processing-in-Memory (PIM) architecture alleviates this memory bottleneck to provide fast near-data processing, memory is still a limiting factor of the entire system. An unsolved key challenge is how to efficiently allocate convolutions to 3D-stacked PIM to combine the advantages of both neural and computational processing. This paper presents Memolution, a compiler-based memory efficient data allocation strategy for convolutional neural networks on PIM architecture. Memolution offers thread-level parallelism that can fully exploit the computational power of PIM architecture. The objective is to capture the characteristics of neural network applications and present a hardware-independent design to transparently allocate CNN applications onto the underlining hardware resources provided by PIM. We demonstrate the viability of the proposed technique using a variety of realistic convolutional neural network applications. Our extensive evaluations show that, Memolution significantly improves performance and the cache utilization compared to the baseline scheme.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Ding:2017:UNS, author = "Xianzhong Ding and Zhiyong Zhang and Zhiping Jia and Lei Ju and Mengying Zhao and Huawei Huang", title = "Unified {nvTCAM} and {sTCAM} architecture for improving packet matching performance", journal = j-SIGPLAN, volume = "52", number = "4", pages = "91--100", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081034", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software-Defined Networking (SDN) allows controlling applications to install fine-grained forwarding policies in the underlying switches. Ternary Content Addressable Memory (TCAM) enables fast lookups in hardware switches with flexible wildcard rule patterns. However, the performance of packet processing is severely constrained by the capacity of TCAM, which aggravates the processing burden and latency issues. In this paper, we propose a hybrid TCAM architecture which consists of NVM-based TCAM (nvTCAM) and SRAM-based TCAM (sTCAM), utilizing nvTCAM to cache the most popular rules to improve cache-hit-ratio while relying on a very small-size sTCAM to handle cache-miss traffic to effectively decrease update latency. Considering the special rule dependency, we present an efficient Rule Migration Replacement (RMR) policy to make full utilization of both nvTCAM and sTCAM to obtain better performance. Experimental results show that the proposed architecture outperforms current TCAM architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Pan:2017:LPM, author = "Chen Pan and Mimi Xie and Yongpan Liu and Yanzhi Wang and Chun Jason Xue and Yuangang Wang and Yiran Chen and Jingtong Hu", title = "A lightweight progress maximization scheduler for non-volatile processor under unstable energy harvesting", journal = j-SIGPLAN, volume = "52", number = "4", pages = "101--110", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081038", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy harvesting techniques become increasingly popular as power supplies for embedded systems. However, the harvested energy is intrinsically unstable. Thus, the program execution may be interrupted frequently. Although the development of non-volatile processors (NVP) can save and restore execution states, both hardware and software challenges exist for energy harvesting powered embedded systems. On the hardware side, existing power detector only signals the ``poor'' quality of the harvested power based on a preset threshold voltage. The inappropriate setting of this threshold will make the NVP based embedded system suffer from either unnecessary checkpointing or checkpointing failures. On the software side, not all tasks can be checkpointed. Once the power is off, these tasks will have to restart from the beginning. In this paper, a task scheduler is proposed to maximize task progress by prioritizing tasks which cannot be checkpointed when power is weak so that they can finish before the power outage. To assist task scheduling, three additional modules including voltage monitor, checkpointing handler, and routine handler, are proposed. Experimental results show increased overall task progress and reduced energy consumption.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Dietrich:2017:OVA, author = "Christian Dietrich and Daniel Lohmann", title = "{OSEK-V}: application-specific {RTOS} instantiation in hardware", journal = j-SIGPLAN, volume = "52", number = "4", pages = "111--120", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081030", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The employment of a real-time operating system (RTOS) in an embedded control systems is often an all-or-nothing decision: While the RTOS-abstractions provide for easier software composition and development, the price in terms of event latencies and memory costs are high. Especially in HW/SW codesign settings, system developers try to avoid the employment of a full-blown RTOS as far as possible. In OSEK-V, we mitigate this trade-off by a very aggressive tailoring of the concrete RTOS instance into the hardware. Instead of implementing generic OS components as custom hardware devices, we capture the actually possible application-kernel interactions as a finite-state machine and integrate the tailored RTOS semantics directly into the processor pipeline. In our experimental results with an OSEK-based implementation of a quadrotor flight controller into the Rocket/RISC-V softcore, we thereby can significantly reduce event latencies, interrupt lock times, and memory footprint at moderate costs in terms of FPGA resources.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '17 conference proceedings.", } @Article{Pai:2016:CTO, author = "Sreepathi Pai and Keshav Pingali", title = "A compiler for throughput optimization of graph algorithms on {GPUs}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "1--19", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984015", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing high-performance GPU implementations of graph algorithms can be challenging. In this paper, we argue that three optimizations called throughput optimizations are key to high-performance for this application class. These optimizations describe a large implementation space making it unrealistic for programmers to implement them by hand. To address this problem, we have implemented these optimizations in a compiler that produces CUDA code from an intermediate-level program representation called IrGL. Compared to state-of-the-art handwritten CUDA implementations of eight graph applications, code generated by the IrGL compiler is up to 5.95x times faster (median 1.4x) for five applications and never more than 30\% slower for the others. Throughput optimizations contribute an improvement up to 4.16x (median 1.4x) to the performance of unoptimized IrGL code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Surendran:2016:APP, author = "Rishi Surendran and Vivek Sarkar", title = "Automatic parallelization of pure method calls via conditional future synthesis", journal = j-SIGPLAN, volume = "51", number = "10", pages = "20--38", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984035", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a novel approach for using futures to automatically parallelize the execution of pure method calls. Our approach is built on three new techniques to address the challenge of automatic parallelization via future synthesis: candidate future synthesis, parallelism benefit analysis, and threshold expression synthesis. During candidate future synthesis, our system annotates pure method calls as async expressions and synthesizes a parallel program with future objects and their type declarations. Next, the system performs a parallel benefit analysis to determine which async expressions may need to be executed sequentially due to overhead reasons, based on execution profile information collected from multiple test inputs. Finally, threshold expression synthesis uses the output from parallelism benefit analysis to synthesize predicate expressions that can be used to determine at runtime if a specific pure method call should be executed sequentially or in parallel. We have implemented our approach, and the results obtained from an experimental evaluation of the complete system on a range of sequential Java benchmarks are very encouraging. Our evaluation shows that our approach can provide significant parallel speedups of up to 7.4 $ \times $ (geometric mean of 3.69 $ \times $) relative to the sequential programs when using 8 processor cores, with zero programmer effort beyond providing the sequential program and test cases for parallelism benefit analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Sorensen:2016:PIW, author = "Tyler Sorensen and Alastair F. Donaldson and Mark Batty and Ganesh Gopalakrishnan and Zvonimir Rakamari{\'c}", title = "Portable inter-workgroup barrier synchronisation for {GPUs}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "39--58", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984032", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the growing popularity of GPGPU programming, there is not yet a portable and formally-specified barrier that one can use to synchronise across workgroups. Moreover, the occupancy-bound execution model of GPUs breaks assumptions inherent in traditional software execution barriers, exposing them to deadlock. We present an occupancy discovery protocol that dynamically discovers a safe estimate of the occupancy for a given GPU and kernel, allowing for a starvation-free (and hence, deadlock-free) inter-workgroup barrier by restricting the number of workgroups according to this estimate. We implement this idea by adapting an existing, previously non-portable, GPU inter-workgroup barrier to use OpenCL 2.0 atomic operations, and prove that the barrier meets its natural specification in terms of synchronisation. We assess the portability of our approach over eight GPUs spanning four vendors, comparing the performance of our method against alternative methods. Our key findings include: (1){\^A} the recall of our discovery protocol is nearly 100\%; (2){\^A} runtime comparisons vary substantially across GPUs and applications; and (3){\^A} our method provides portable and safe inter-workgroup synchronisation across the applications we study.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Doeraene:2016:PIW, author = "S{\'e}bastien Doeraene and Tobias Schlatter", title = "Parallel incremental whole-program optimizations for {Scala.js}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "59--73", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984013", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Whole-program optimizations are powerful tools that can dramatically improve performance, size and other aspects of programs. Because they depend on global knowledge, they must typically be reapplied to the whole program when small changes are made, which makes them too slow for the development cycle. This is an issue for some environments that require, or benefit a lot from, whole-program optimizations, such as compilation to JavaScript or to the Dalvik VM, because their development cycle is slowed down either by the lack of optimizations, or by the time spent on applying them. We present a new approach to designing incremental whole-program optimizers for object-oriented and functional languages: when part of a program changes, only the portions affected by the changes are reoptimized. An incremental optimizer using this approach for Scala.js, the Scala to JavaScript compiler, demonstrates speedups from 10x to 100x compared to its batch version. As a result, the optimizer's running time becomes insignificant compared to separate compilation, making it fit for use on every compilation run during the development cycle. We also show how to parallelize the incremental algorithm to take advantage of multicore hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Stefanescu:2016:SBP, author = "Andrei Stefanescu and Daejun Park and Shijiao Yuwen and Yilong Li and Grigore Rosu", title = "Semantics-based program verifiers for all languages", journal = j-SIGPLAN, volume = "51", number = "10", pages = "74--91", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984027", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a language-independent verification framework that can be instantiated with an operational semantics to automatically generate a program verifier. The framework treats both the operational semantics and the program correctness specifications as reachability rules between matching logic patterns, and uses the sound and relatively complete reachability logic proof system to prove the specifications using the semantics. We instantiate the framework with the semantics of one academic language, KernelC, as well as with three recent semantics of real-world languages, C, Java, and JavaScript, developed independently of our verification infrastructure. We evaluate our approach empirically and show that the generated program verifiers can check automatically the full functional correctness of challenging heap-manipulating programs implementing operations on list and tree data structures, like AVL trees. This is the first approach that can turn the operational semantics of real-world languages into correct-by-construction automatic verifiers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Sergey:2016:HSS, author = "Ilya Sergey and Aleksandar Nanevski and Anindya Banerjee and Germ{\'a}n Andr{\'e}s Delbianco", title = "{Hoare}-style specifications as correctness conditions for non-linearizable concurrent objects", journal = j-SIGPLAN, volume = "51", number = "10", pages = "92--110", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Designing efficient concurrent objects often requires abandoning the standard specification technique of linearizability in favor of more relaxed correctness conditions. However, the variety of alternatives makes it difficult to choose which condition to employ, and how to compose them when using objects specified by different conditions. In this work, we propose a uniform alternative in the form of Hoare logic, which can explicitly capture--in the auxiliary state--the interference of environment threads. We demonstrate the expressiveness of our method by verifying a number of concurrent objects and their clients, which have so far been specified only by non-standard conditions of concurrency-aware linearizability, quiescent, and quantitative quiescent consistency. We report on the implementation of the ideas in an existing Coq-based tool, providing the first mechanized proofs for all the examples in the paper.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Nienhuis:2016:OSC, author = "Kyndylan Nienhuis and Kayvan Memarian and Peter Sewell", title = "An operational semantics for {C\slash C++11} concurrency", journal = j-SIGPLAN, volume = "51", number = "10", pages = "111--128", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983997", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The C/C++11 concurrency model balances two goals: it is relaxed enough to be efficiently implementable and (leaving aside the ``thin-air'' problem) it is strong enough to give useful guarantees to programmers. It is mathematically precise and has been used in verification research and compiler testing. However, the model is expressed in an axiomatic style, as predicates on complete candidate executions. This suffices for computing the set of allowed executions of a small litmus test, but it does not directly support the incremental construction of executions of larger programs. It is also at odds with conventional operational semantics, as used implicitly in the rest of the C/C++ standards. Our main contribution is the development of an operational model for C/C++11 concurrency. This covers all the features of the previous formalised axiomatic model, and we have a mechanised proof that the two are equivalent, in Isabelle/HOL. We also integrate this semantics with an operational semantics for sequential C (described elsewhere); the combined semantics can incrementally execute programs in a small fragment of C. Doing this uncovered several new aspects of the C/C++11 model: we show that one cannot build an equivalent operational model that simply follows program order, sequential consistent order, or the synchronises-with order. The first negative result is forced by hardware-observable behaviour, but the latter two are not, and so might be ameliorated by changing C/C++11. More generally, we hope that this work, with its focus on incremental construction of executions, will inform the future design of new concurrency models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Dan:2016:MAR, author = "Andrei Marian Dan and Patrick Lam and Torsten Hoefler and Martin Vechev", title = "Modeling and analysis of remote memory access programming", journal = j-SIGPLAN, volume = "51", number = "10", pages = "129--144", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984033", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent advances in networking hardware have led to a new generation of Remote Memory Access (RMA) networks in which processors from different machines can communicate directly, bypassing the operating system and allowing higher performance. Researchers and practitioners have proposed libraries and programming models for RMA to enable the development of applications running on these networks, However, the memory models implied by these RMA libraries and languages are often loosely specified, poorly understood, and differ depending on the underlying network architecture and other factors. Hence, it is difficult to precisely reason about the semantics of RMA programs or how changes in the network architecture affect them. We address this problem with the following contributions: (i) a coreRMA language which serves as a common foundation, formalizing the essential characteristics of RMA programming; (ii) complete axiomatic semantics for that language; (iii) integration of our semantics with an existing constraint solver, enabling us to exhaustively generate coreRMA programs (litmus tests) up to a specified bound and check whether the tests satisfy their specification; and (iv) extensive validation of our semantics on real-world RMA systems. We generated and ran 7441 litmus tests using each of the low-level RMA network APIs: DMAPP, VPI Verbs, and Portals 4. Our results confirmed that our model successfully captures behaviors exhibited by these networks. Moreover, we found RMA programs that behave inconsistently with existing documentation, confirmed by network experts. Our work provides an important step towards understanding existing RMA networks, thus influencing the design of future RMA interfaces and hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Itzhaky:2016:DDC, author = "Shachar Itzhaky and Rohit Singh and Armando Solar-Lezama and Kuat Yessenov and Yongquan Lu and Charles Leiserson and Rezaul Chowdhury", title = "Deriving divide-and-conquer dynamic programming algorithms using solver-aided transformations", journal = j-SIGPLAN, volume = "51", number = "10", pages = "145--164", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983993", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a framework allowing domain experts to manipulate computational terms in the interest of deriving better, more efficient implementations.It employs deductive reasoning to generate provably correct efficient implementations from a very high-level specification of an algorithm, and inductive constraint-based synthesis to improve automation. Semantic information is encoded into program terms through the use of refinement types. In this paper, we develop the technique in the context of a system called Bellmania that uses solver-aided tactics to derive parallel divide-and-conquer implementations of dynamic programming algorithms that have better locality and are significantly more efficient than traditional loop-based implementations. Bellmania includes a high-level language for specifying dynamic programming algorithms and a calculus that facilitates gradual transformation of these specifications into efficient implementations. These transformations formalize the divide-and conquer technique; a visualization interface helps users to interactively guide the process, while an SMT-based back-end verifies each step and takes care of low-level reasoning required for parallelism. We have used the system to generate provably correct implementations of several algorithms, including some important algorithms from computational biology, and show that the performance is comparable to that of the best manually optimized code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Srinivasan:2016:SMC, author = "Venkatesh Srinivasan and Tushar Sharma and Thomas Reps", title = "Speeding up machine-code synthesis", journal = j-SIGPLAN, volume = "51", number = "10", pages = "165--180", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984006", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Machine-code synthesis is the problem of searching for an instruction sequence that implements a semantic specification, given as a formula in quantifier-free bit-vector logic (QFBV). Instruction sets like Intel's IA-32 have around 43,000 unique instruction schemas; this huge instruction pool, along with the exponential cost inherent in enumerative synthesis, results in an enormous search space for a machine-code synthesizer: even for relatively small specifications, the synthesizer might take several hours or days to find an implementation. In this paper, we present several improvements to the algorithms used in a state-of-the-art machine-code synthesizer McSynth. In addition to a novel pruning heuristic, our improvements incorporate a number of ideas known from the literature, which we adapt in novel ways for the purpose of speeding up machine-code synthesis. Our experiments for Intel's IA-32 instruction set show that our improvements enable synthesis of code for 12 out of 14 formulas on which McSynth times out, speeding up the synthesis time by at least 1981X, and for the remaining formulas, speeds up synthesis by 3X.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Panchekha:2016:ARW, author = "Pavel Panchekha and Emina Torlak", title = "Automated reasoning for web page layout", journal = j-SIGPLAN, volume = "51", number = "10", pages = "181--194", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984010", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Web pages define their appearance using Cascading Style Sheets, a modular language for layout of tree-structured documents. In principle, using CSS is easy: the developer specifies declarative constraints on the layout of an HTML document (such as the positioning of nodes in the HTML tree), and the browser solves the constraints to produce a box-based rendering of that document. In practice, however, the subtleties of CSS semantics make it difficult to develop stylesheets that produce the intended layout across different user preferences and browser settings. This paper presents the first mechanized formalization of a substantial fragment of the CSS semantics. This formalization is equipped with an efficient reduction to the theory of quantifier-free linear real arithmetic, enabling effective automated reasoning about CSS stylesheets and their behavior. We implement this reduction in Cassius, a solver-aided framework for building semantics-aware tools for CSS. To demonstrate the utility of Cassius, we prototype new tools for automated verification, debugging, and synthesis of CSS code. We show that these tools work on fragments of real-world websites, and that Cassius is a practical first step toward solver-aided programming for the web.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Wang:2016:FFS, author = "Xinyu Wang and Sumit Gulwani and Rishabh Singh", title = "{FIDEX}: filtering spreadsheet data using examples", journal = j-SIGPLAN, volume = "51", number = "10", pages = "195--213", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984030", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data filtering in spreadsheets is a common problem faced by millions of end-users. The task of data filtering requires a computational model that can separate intended positive and negative string instances. We present a system, FIDEX, that can efficiently learn desired data filtering expressions from a small set of positive and negative string examples. There are two key ideas of our approach. First, we design an expressive DSL to represent disjunctive filter expressions needed for several real-world data filtering tasks. Second, we develop an efficient synthesis algorithm for incrementally learning consistent filter expressions in the DSL from very few positive and negative examples. A DAG-based data structure is used to succinctly represent a large number of filter expressions, and two corresponding operators are defined for algorithmically handling positive and negative examples, namely, the intersection and subtraction operators. FIDEX is able to learn data filters for 452 out of 460 real-world data filtering tasks in real time (0.22s), using only 2.2 positive string instances and 2.7 negative string instances on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Moore:2016:EAC, author = "Scott Moore and Christos Dimoulas and Robert Bruce Findler and Matthew Flatt and Stephen Chong", title = "Extensible access control with authorization contracts", journal = j-SIGPLAN, volume = "51", number = "10", pages = "214--233", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984021", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing programming language access control frameworks do not meet the needs of all software components. We propose an expressive framework for implementing access control monitors for components. The basis of the framework is a novel concept: the authority environment. An authority environment associates rights with an execution context. The building blocks of access control monitors in our framework are authorization contracts: software contracts that manage authority environments. We demonstrate the expressiveness of our framework by implementing a diverse set of existing access control mechanisms and writing custom access control monitors for three realistic case studies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Osvald:2016:GGT, author = "Leo Osvald and Gr{\'e}gory Essertel and Xilun Wu and Lilliam I. Gonz{\'a}lez Alay{\'o}n and Tiark Rompf", title = "Gentrification gone too far? {Affordable} 2nd-class values for fun and (co-)effect", journal = j-SIGPLAN, volume = "51", number = "10", pages = "234--251", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984009", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "First-class functions dramatically increase expressiveness, at the expense of static guarantees. In ALGOL or PASCAL, functions could be passed as arguments but never escape their defining scope. Therefore, function arguments could serve as temporary access tokens or capabilities, enabling callees to perform some action, but only for the duration of the call. In modern languages, such programming patterns are no longer available. The central thrust of this paper is to re-introduce second-class functions and other values alongside first-class entities in modern languages. We formalize second-class values with stack-bounded lifetimes as an extension to simply-typed $ \lambda $ calculus, and for richer type systems such as F$_{ < \colon }$ and systems with path-dependent types. We generalize the binary first- vs second-class distinction to arbitrary privilege lattices, with the underlying type lattice as a special case. In this setting, abstract types naturally enable privilege parametricity. We prove type soundness and lifetime properties in Coq. We implement our system as an extension of Scala, and present several case studies. First, we modify the Scala Collections library and add privilege annotations to all higher-order functions. Privilege parametricity is key to retain the high degree of code-reuse between sequential and parallel as well as lazy and eager collections. Second, we use scoped capabilities to introduce a model of checked exceptions in the Scala library, with only few changes to the code. Third, we employ second-class capabilities for memory safety in a region-based off-heap memory library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{DiLorenzo:2016:IFD, author = "Jonathan DiLorenzo and Richard Zhang and Erin Menzies and Kathleen Fisher and Nate Foster", title = "Incremental forest: a {DSL} for efficiently managing filestores", journal = j-SIGPLAN, volume = "51", number = "10", pages = "252--271", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984034", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "File systems are often used to store persistent application data, but manipulating file systems using standard APIs can be difficult for programmers. Forest is a domain-specific language that bridges the gap between the on-disk and in-memory representations of file system data. Given a high-level specification of the structure, contents, and properties of a collection of directories, files, and symbolic links, the Forest compiler generates tools for loading, storing, and validating that data. Unfortunately, the initial implementation of Forest offered few mechanisms for controlling cost --- e.g., the run-time system could load gigabytes of data, even if only a few bytes were needed. This paper introduces Incremental Forest (iForest), an extension to Forest with an explicit delay construct that programmers can use to precisely control costs. We describe the design of iForest using a series of running examples, present a formal semantics in a core calculus, and define a simple cost model that accurately characterizes the resources needed to use a given specification. We propose skins, which allow programmers to modify the delay structure of a specification in a compositional way, and develop a static type system for ensuring compatibility between specifications and skins. We prove the soundness and completeness of the type system and a variety of algebraic properties of skins. We describe an OCaml implementation and evaluate its performance on applications developed in collaboration with watershed hydrologists.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Haller:2016:LLA, author = "Philipp Haller and Alex Loiko", title = "{LaCasa}: lightweight affinity and object capabilities in {Scala}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "272--291", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984042", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aliasing is a known source of challenges in the context of imperative object-oriented languages, which have led to important advances in type systems for aliasing control. However, their large-scale adoption has turned out to be a surprisingly difficult challenge. While new language designs show promise, they do not address the need of aliasing control in existing languages. This paper presents a new approach to isolation and uniqueness in an existing, widely-used language, Scala. The approach is unique in the way it addresses some of the most important obstacles to the adoption of type system extensions for aliasing control. First, adaptation of existing code requires only a minimal set of annotations. Only a single bit of information is required per class. Surprisingly, the paper shows that this information can be provided by the object-capability discipline, widely-used in program security. We formalize our approach as a type system and prove key soundness theorems. The type system is implemented for the full Scala language, providing, for the first time, a sound integration with Scala's local type inference. Finally, we empirically evaluate the conformity of existing Scala open-source code on a corpus of over 75,000 LOC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{PerezDeRosso:2016:PCM, author = "Santiago {Perez De Rosso} and Daniel Jackson", title = "Purposes, concepts, misfits, and a redesign of git", journal = j-SIGPLAN, volume = "51", number = "10", pages = "292--310", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984018", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Git is a widely used version control system that is powerful but complicated. Its complexity may not be an inevitable consequence of its power but rather evidence of flaws in its design. To explore this hypothesis, we analyzed the design of Git using a theory that identifies concepts, purposes, and misfits. Some well-known difficulties with Git are described, and explained as misfits in which underlying concepts fail to meet their intended purpose. Based on this analysis, we designed a reworking of Git (called Gitless) that attempts to remedy these flaws. To correlate misfits with issues reported by users, we conducted a study of Stack Overflow questions. And to determine whether users experienced fewer complications using Gitless in place of Git, we conducted a small user study. Results suggest our approach can be profitable in identifying, analyzing, and fixing design problems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Kim:2016:AAP, author = "Dohyeong Kim and Yonghwi Kwon and Peng Liu and I. Luk Kim and David Mitchel Perry and Xiangyu Zhang and Gustavo Rodriguez-Rivera", title = "{Apex}: automatic programming assignment error explanation", journal = j-SIGPLAN, volume = "51", number = "10", pages = "311--327", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984031", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents Apex, a system that can automatically generate explanations for programming assignment bugs, regarding where the bugs are and how the root causes led to the runtime failures. It works by comparing the passing execution of a correct implementation (provided by the instructor) and the failing execution of the buggy implementation (submitted by the student). The technique overcomes a number of technical challenges caused by syntactic and semantic differences of the two implementations. It collects the symbolic traces of the executions and matches assignment statements in the two execution traces by reasoning about symbolic equivalence. It then matches predicates by aligning the control dependences of the matched assignment statements, avoiding direct matching of path conditions which are usually quite different. Our evaluation shows that Apex is every effective for 205 buggy real world student submissions of 4 programming assignments, and a set of 15 programming assignment type of buggy programs collected from stackoverflow.com, precisely pinpointing the root causes and capturing the causality for 94.5\% of them. The evaluation on a standard benchmark set with over 700 student bugs shows similar results. A user study in the classroom shows that Apex has substantially improved student productivity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Hanappi:2016:ARC, author = "Oliver Hanappi and Waldemar Hummer and Schahram Dustdar", title = "Asserting reliable convergence for configuration management scripts", journal = j-SIGPLAN, volume = "51", number = "10", pages = "328--343", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984000", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The rise of elastically scaling applications that frequently deploy new machines has led to the adoption of DevOps practices across the cloud engineering stack. So-called configuration management tools utilize scripts that are based on declarative resource descriptions and make the system converge to the desired state. It is crucial for convergent configurations to be able to gracefully handle transient faults, e.g., network outages when downloading and installing software packages. In this paper we introduce a conceptual framework for asserting reliable convergence in configuration management. Based on a formal definition of configuration scripts and their resources, we utilize state transition graphs to test whether a script makes the system converge to the desired state under different conditions. In our generalized model, configuration actions are partially ordered, often resulting in prohibitively many possible execution orders. To reduce this problem space, we define and analyze a property called preservation, and we show that if preservation holds for all pairs of resources, then convergence holds for the entire configuration. Our implementation builds on Puppet, but the approach is equally applicable to other frameworks like Chef, Ansible, etc. We perform a comprehensive evaluation based on real world Puppet scripts and show the effectiveness of the approach. Our tool is able to detect all idempotence and convergence related issues in a set of existing Puppet scripts with known issues as well as some hitherto undiscovered bugs in a large random sample of scripts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Treichler:2016:DP, author = "Sean Treichler and Michael Bauer and Rahul Sharma and Elliott Slaughter and Alex Aiken", title = "Dependent partitioning", journal = j-SIGPLAN, volume = "51", number = "10", pages = "344--358", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984016", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A key problem in parallel programming is how data is partitioned: divided into subsets that can be operated on in parallel and, in distributed memory machines, spread across multiple address spaces. We present a dependent partitioning framework that allows an application to concisely describe relationships between partitions. Applications first establish independent partitions, which may contain arbitrary subsets of application data, permitting the expression of arbitrary application-specific data distributions. Dependent partitions are then derived from these using the dependent partitioning operations provided by the framework. By directly capturing inter-partition relationships, our framework can soundly and precisely reason about programs to perform important program analyses crucial to ensuring correctness and achieving good performance. As an example of the reasoning made possible, we present a static analysis that discharges most consistency checks on partitioned data during compilation. We describe an implementation of our framework within Regent, a language designed for the Legion programming model. The use of dependent partitioning constructs results in a 86-96\% decrease in the lines of code required to describe the partitioning, eliminates many of the expensive dynamic checks required for soundness by the current Regent partitioning implementation, and speeds up the computation of partitions by 2.6-12.7X even on a single thread. Additionally, we show that a distributed implementation incorporated into the Legion runtime system allows partitioning of data sets that are too large to fit on a single node and yields a further 29X speedup of partitioning operations on 64 nodes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Kulkarni:2016:APA, author = "Sulekha Kulkarni and Ravi Mangal and Xin Zhang and Mayur Naik", title = "Accelerating program analyses by cross-program training", journal = j-SIGPLAN, volume = "51", number = "10", pages = "359--377", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984023", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Practical programs share large modules of code. However, many program analyses are ineffective at reusing analysis results for shared code across programs. We present POLYMER, an analysis optimizer to address this problem. POLYMER runs the analysis offline on a corpus of training programs and learns analysis facts over shared code. It prunes the learnt facts to eliminate intermediate computations and then reuses these pruned facts to accelerate the analysis of other programs that share code with the training corpus. We have implemented POLYMER to accelerate analyses specified in Datalog, and apply it to optimize two analyses for Java programs: a call-graph analysis that is flow- and context-insensitive, and a points-to analysis that is flow- and context-sensitive. We evaluate the resulting analyses on ten programs from the DaCapo suite that share the JDK library. POLYMER achieves average speedups of 2.6$ \times $ for the call-graph analysis and 5.2$ \times $ for the points-to analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Srinivasan:2016:IAS, author = "Venkatesh Srinivasan and Thomas Reps", title = "An improved algorithm for slicing machine code", journal = j-SIGPLAN, volume = "51", number = "10", pages = "378--393", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984003", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Machine-code slicing is an important primitive for building binary analysis and rewriting tools, such as taint trackers, fault localizers, and partial evaluators. However, it is not easy to create a machine-code slicer that exhibits a high level of precision. Moreover, the problem of creating such a tool is compounded by the fact that a small amount of local imprecision can be amplified via cascade effects. Most instructions in instruction sets such as Intel's IA-32 and ARM are multi-assignments: they have several inputs and several outputs (registers, flags, and memory locations). This aspect of the instruction set introduces a granularity issue during slicing: there are often instructions at which we would like the slice to include only a subset of the instruction's semantics, whereas the slice is forced to include the entire instruction. Consequently, the slice computed by state-of-the-art tools is very imprecise, often including essentially the entire program. This paper presents an algorithm to slice machine code more accurately. To counter the granularity issue, our algorithm performs slicing at the microcode level, instead of the instruction level, and obtains a more precise microcode slice. To reconstitute a machine-code program from a microcode slice, our algorithm uses machine-code synthesis. Our experiments on IA-32 binaries of FreeBSD utilities show that, in comparison to slices computed by a state-of-the-art tool, our algorithm reduces the size of backward slices by 33\%, and forward slices by 70\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Petrashko:2016:CGL, author = "Dmitry Petrashko and Vlad Ureche and Ondrej Lhot{\'a}k and Martin Odersky", title = "Call graphs for languages with parametric polymorphism", journal = j-SIGPLAN, volume = "51", number = "10", pages = "394--409", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983991", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The performance of contemporary object oriented languages depends on optimizations such as devirtualization, inlining, and specialization, and these in turn depend on precise call graph analysis. Existing call graph analyses do not take advantage of the information provided by the rich type systems of contemporary languages, in particular generic type arguments. Many existing approaches analyze Java bytecode, in which generic types have been erased. This paper shows that this discarded information is actually very useful as the context in a context-sensitive analysis, where it significantly improves precision and keeps the running time small. Specifically, we propose and evaluate call graph construction algorithms in which the contexts of a method are (i) the type arguments passed to its type parameters, and (ii) the static types of the arguments passed to its term parameters. The use of static types from the caller as context is effective because it allows more precise dispatch of call sites inside the callee. Our evaluation indicates that the average number of contexts required per method is small. We implement the analysis in the Dotty compiler for Scala, and evaluate it on programs that use the type-parametric Scala collections library and on the Dotty compiler itself. The context-sensitive analysis runs 1.4x faster than a context-insensitive one and discovers 20\% more monomorphic call sites at the same time. When applied to method specialization, the imprecision in a context-insensitive call graph would require the average method to be cloned 22 times, whereas the context-sensitive call graph indicates a much more practical 1.00 to 1.50 clones per method. We applied the proposed analysis to automatically specialize generic methods. The resulting automatic transformation achieves the same performance as state-of-the-art techniques requiring manual annotations, while reducing the size of the generated bytecode by up to 5 $ \times $.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Chandra:2016:TIS, author = "Satish Chandra and Colin S. Gordon and Jean-Baptiste Jeannin and Cole Schlesinger and Manu Sridharan and Frank Tip and Youngil Choi", title = "Type inference for static compilation of {JavaScript}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "410--429", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984017", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a type system and inference algorithm for a rich subset of JavaScript equipped with objects, structural subtyping, prototype inheritance, and first-class methods. The type system supports abstract and recursive objects, and is expressive enough to accommodate several standard benchmarks with only minor workarounds. The invariants enforced by the types enable an ahead-of-time compiler to carry out optimizations typically beyond the reach of static compilers for dynamic languages. Unlike previous inference techniques for prototype inheritance, our algorithm uses a combination of lower and upper bound propagation to infer types and discover type errors in all code, including uninvoked functions. The inference is expressed in a simple constraint language, designed to leverage off-the-shelf fixed point solvers. We prove soundness for both the type system and inference algorithm. An experimental evaluation showed that the inference is powerful, handling the aforementioned benchmarks with no manual type annotation, and that the inferred types enable effective static compilation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Samak:2016:DSF, author = "Malavika Samak and Omer Tripp and Murali Krishna Ramanathan", title = "Directed synthesis of failing concurrent executions", journal = j-SIGPLAN, volume = "51", number = "10", pages = "430--446", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984040", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Detecting concurrency-induced bugs in multithreaded libraries can be challenging due to the intricacies associated with their manifestation. This includes invocation of multiple methods, synthesis of inputs to the methods to reach the failing location, and crafting of thread interleavings that cause the erroneous behavior. Neither fuzzing-based testing techniques nor over-approximate static analyses are well positioned to detect such subtle defects while retaining high accuracy alongside satisfactory coverage. In this paper, we propose a directed, iterative and scalable testing engine that combines the strengths of static and dynamic analysis to help synthesize concurrent executions to expose complex concurrency-induced bugs. Our engine accepts as input the library, its client (either sequential or concurrent) and a specification of correctness. Then, it iteratively refines the client to generate an execution that can break the input specification. Each step of the iterative process includes statically identifying sub-goals towards the goal of failing the specification, generating a plan toward meeting these goals, and merging of the paths traversed dynamically with the plan computed statically via constraint solving to generate a new client. The engine reports full reproduction scenarios, guaranteed to be true, for the bugs it finds. We have created a prototype of our approach named MINION. We validated MINION by applying it to well-tested concurrent classes from popular Java libraries, including the latest versions of OpenJDK and Google-Guava. We were able to detect 31 real crashes across 10 classes in a total of 23 minutes, including previously unknown bugs. Comparison with three other tools reveals that combined, they report only 9 of the 31 crashes (and no other crashes beyond MINION). This is because several of these bugs manifest under deeply nested path conditions (observed maximum of 11), deep nesting of method invocations (observed maximum of 6) and multiple refinement iterations to generate the crash-inducing client.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Huang:2016:MCR, author = "Shiyou Huang and Jeff Huang", title = "Maximal causality reduction for {TSO} and {PSO}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "447--461", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984025", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Verifying concurrent programs is challenging due to the exponentially large thread interleaving space. The problem is exacerbated by relaxed memory models such as Total Store Order (TSO) and Partial Store Order (PSO) which further explode the interleaving space by reordering instructions. A recent advance, Maximal Causality Reduction (MCR), has shown great promise to improve verification effectiveness by maximally reducing redundant explorations. However, the original MCR only works for the Sequential Consistency (SC) memory model, but not for TSO and PSO. In this paper, we develop novel extensions to MCR by solving two key problems under TSO and PSO: (1) generating interleavings that can reach new states by encoding the operational semantics of TSO and PSO with first-order logical constraints and solving them with SMT solvers, and (2) enforcing TSO and PSO interleavings by developing novel replay algorithms that allow executions out of the program order. We show that our approach successfully enables MCR to effectively explore TSO and PSO interleavings. We have compared our approach with a recent Dynamic Partial Order Reduction (DPOR) algorithm for TSO and PSO and a SAT-based stateless model checking approach. Our results show that our approach is much more effective than the other approaches for both state-space exploration and bug finding --- on average it explores 5-10X fewer executions and finds many bugs that the other tools cannot find.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Huang:2016:PMR, author = "Jeff Huang and Arun K. Rajagopalan", title = "Precise and maximal race detection from incomplete traces", journal = j-SIGPLAN, volume = "51", number = "10", pages = "462--476", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984024", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present RDIT, a novel dynamic technique to detect data races in multithreaded programs with incomplete trace information, i.e., in the presence of missing events. RDIT is both precise and maximal: it does not report any false alarms and it detects a maximal set of true traces from the observed incomplete trace. RDIT is underpinned by a sound BarrierPair model that abstracts away the missing events by capturing the invocation data of their enclosing methods. By making the least conservative abstraction that a missing method introduces synchronization only when it has a memory address in scope that overlaps with other events or other missing methods, and by formulating maximal thread causality as logical constraints, RDIT guarantees to precisely detect races with maximal capability. RDIT has been applied in seven real-world large concurrent systems and has detected dozens of true races with zero false alarms. Comparatively, existing algorithms such as Happens-Before, Causal-Precedes, and Maximal-Causality which are known to be precise all report many false alarms when missing synchronizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Blum:2016:SMC, author = "Ben Blum and Garth Gibson", title = "Stateless model checking with data-race preemption points", journal = j-SIGPLAN, volume = "51", number = "10", pages = "477--493", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984036", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Stateless model checking is a powerful technique for testing concurrent programs, but suffers from exponential state space explosion when the test input parameters are too large. Several reduction techniques can mitigate this explosion, but even after pruning equivalent interleavings, the state space size is often intractable. Most prior tools are limited to preempting only on synchronization APIs, which reduces the space further, but can miss unsynchronized thread communication bugs. Data race detection, another concurrency testing approach, focuses on suspicious memory access pairs during a single test execution. It avoids concerns of state space size, but may report races that do not lead to observable failures, which jeopardizes a user's willingness to use the analysis. We present Quicksand, a new stateless model checking framework which manages the exploration of many state spaces using different preemption points. It uses state space estimation to prioritize jobs most likely to complete in a fixed CPU budget, and it incorporates data-race analysis to add new preemption points on the fly. Preempting threads during a data race's instructions can automatically classify the race as buggy or benign, and uncovers new bugs not reachable by prior model checkers. It also enables full verification of all possible schedules when every data race is verified as benign within the CPU budget. In our evaluation, Quicksand found 1.25x as many bugs and verified 4.3x as many tests compared to prior model checking approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Gollamudi:2016:AEE, author = "Anitha Gollamudi and Stephen Chong", title = "Automatic enforcement of expressive security policies using enclaves", journal = j-SIGPLAN, volume = "51", number = "10", pages = "494--513", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984002", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hardware-based enclave protection mechanisms, such as Intel's SGX, ARM's TrustZone, and Apple's Secure Enclave, can protect code and data from powerful low-level attackers. In this work, we use enclaves to enforce strong application-specific information security policies. We present IMP$_E$, a novel calculus that captures the essence of SGX-like enclave mechanisms, and show that a security-type system for IMP$_E$ can enforce expressive confidentiality policies (including erasure policies and delimited release policies) against powerful low-level attackers, including attackers that can arbitrarily corrupt non-enclave code, and, under some circumstances, corrupt enclave code. We present a translation from an expressive security-typed calculus (that is not aware of enclaves) to IMP$_E$. The translation automatically places code and data into enclaves to enforce the security policies of the source program.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Colin:2016:CTC, author = "Alexei Colin and Brandon Lucia", title = "{Chain}: tasks and channels for reliable intermittent programs", journal = j-SIGPLAN, volume = "51", number = "10", pages = "514--530", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983995", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Energy harvesting computers enable general-purpose computing using energy collected from their environment. Energy-autonomy of such devices has great potential, but their intermittent power supply poses a challenge. Intermittent program execution compromises progress and leaves state inconsistent. This work describes Chain: a new model for programming intermittent devices. A Chain program is a set of programmer-defined tasks that compute and exchange data through channels. Chain guarantees forward progress at task granularity. A task is restartable and never sees inconsistent state, because its input and output channels are separated. Our system supports language features for expressing advanced data exchange patterns and for encapsulating reusable functionality. Chain fundamentally differs from state-of-the-art checkpointing approaches and does not incur the associated overhead. We implement Chain as C language extensions and a runtime library. We used Chain to implement four applications: machine learning, encryption, compression, and sensing. In experiments, Chain ensured consistency where prior approaches failed and improved throughput by 2-7x over the leading state-of-the-art system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Bonetta:2016:GSM, author = "Daniele Bonetta and Luca Salucci and Stefan Marr and Walter Binder", title = "{GEMs}: shared-memory parallel programming for {Node.js}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "531--547", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984039", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript is the most popular programming language for client-side Web applications, and Node.js has popularized the language for server-side computing, too. In this domain, the minimal support for parallel programming remains however a major limitation. In this paper we introduce a novel parallel programming abstraction called Generic Messages (GEMs). GEMs allow one to combine message passing and shared-memory parallelism, extending the classes of parallel applications that can be built with Node.js. GEMs have customizable semantics and enable several forms of thread safety, isolation, and concurrency control. GEMs are designed as convenient JavaScript abstractions that expose high-level and safe parallelism models to the developer. Experiments show that GEMs outperform equivalent Node.js applications thanks to their usage of shared memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Peters:2016:OCF, author = "Arthur Michener Peters and David Kitchin and John A. Thywissen and William R. Cook", title = "{OrcO}: a concurrency-first approach to objects", journal = j-SIGPLAN, volume = "51", number = "10", pages = "548--567", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984022", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The majority of modern programming languages provide concurrency and object-orientation in some form. However, object-oriented concurrency remains cumbersome in many situations. We introduce the language OrcO, Orc with concurrent Objects, which enables a flexible style of concurrent object-oriented programming. OrcO extends the Orc programming language by adding abstractions for programming-in-the-large; namely objects, classes, and inheritance. OrcO objects are designed to be orthogonal to concurrency, allowing the concurrent structure and object structure of a program to evolve independently. This paper describes OrcO's goals and design and provides examples of how OrcO can be used to deftly handle events, object management, and object composition.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Ancona:2016:SSI, author = "Davide Ancona and Andrea Corradi", title = "Semantic subtyping for imperative object-oriented languages", journal = j-SIGPLAN, volume = "51", number = "10", pages = "568--587", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983992", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Semantic subtyping is an approach for defining sound and complete procedures to decide subtyping for expressive types, including union and intersection types; although it has been exploited especially in functional languages for XML based programming, recently it has been partially investigated in the context of object-oriented languages, and a sound and complete subtyping algorithm has been proposed for record types, but restricted to immutable fields, with union and recursive types interpreted coinductively to support cyclic objects. In this work we address the problem of studying semantic subtyping for imperative object-oriented languages, where fields can be mutable; in particular, we add read/write field annotations to record types, and, besides union, we consider intersection types as well, while maintaining coinductive interpretation of recursive types. In this way, we get a richer notion of type with a flexible subtyping relation, able to express a variety of type invariants useful for enforcing static guarantees for mutable objects. The addition of these features radically changes the definition of subtyping, and, hence, the corresponding decision procedure, and surprisingly invalidates some subtyping laws that hold in the functional setting. We propose an intuitive model where mutable record values contain type information to specify the values that can be correctly stored in fields. Such a model, and the corresponding subtyping rules, require particular care to avoid circularity between coinductive judgments and their negations which, by duality, have to be interpreted inductively. A sound and complete subtyping algorithm is provided, together with a prototype implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Brachthauser:2016:PFC, author = "Jonathan Immanuel Brachth{\"a}user and Tillmann Rendel and Klaus Ostermann", title = "Parsing with first-class derivatives", journal = j-SIGPLAN, volume = "51", number = "10", pages = "588--606", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984026", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Brzozowski derivatives, well known in the context of regular expressions, have recently been rediscovered to give a simplified explanation to parsers of context-free languages. We add derivatives as a novel first-class feature to a standard parser combinator language. First-class derivatives enable an inversion of the control flow, allowing to implement modular parsers for languages that previously required separate pre-processing steps or cross-cutting modifications of the parsers. We show that our framework offers new opportunities for reuse and supports a modular definition of interesting use cases of layout-sensitive parsing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Kell:2016:MLE, author = "Stephen Kell and Dominic P. Mulligan and Peter Sewell", title = "The missing link: explaining {ELF} static linking, semantically", journal = j-SIGPLAN, volume = "51", number = "10", pages = "607--623", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983996", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Beneath the surface, software usually depends on complex linker behaviour to work as intended. Even linking {\tt hello\_world.c} is surprisingly involved, and systems software such as {\tt libc} and operating system kernels rely on a host of linker features. But linking is poorly understood by working programmers and has largely been neglected by language researchers. In this paper we survey the many use-cases that linkers support and the poorly specified linker speak by which they are controlled: metadata in object files, command-line options, and linker-script language. We provide the first validated formalisation of a realistic executable and linkable format (ELF), and capture aspects of the Application Binary Interfaces for four mainstream platforms (AArch64, AMD64, Power64, and IA32). Using these, we develop an executable specification of static linking, covering (among other things) enough to link small C programs (we use the example of bzip2) into a correctly running executable. We provide our specification in Lem and Isabelle/HOL forms. This is the first formal specification of mainstream linking. We have used the Isabelle/HOL version to prove a sample correctness property for one case of AMD64 ABI relocation, demonstrating that the specification supports formal proof, and as a first step towards the much more ambitious goal of verified linking. Our work should enable several novel strands of research, including linker-aware verified compilation and program analysis, and better languages for controlling linking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Rompf:2016:TSD, author = "Tiark Rompf and Nada Amin", title = "Type soundness for dependent object types {(DOT)}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "624--641", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984008", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scala's type system unifies aspects of ML modules, object-oriented, and functional programming. The Dependent Object Types (DOT) family of calculi has been proposed as a new theoretic foundation for Scala and similar expressive languages. Unfortunately, type soundness has only been established for restricted subsets of DOT. In fact, it has been shown that important Scala features such as type refinement or a subtyping relation with lattice structure break at least one key metatheoretic property such as environment narrowing or invertible subtyping transitivity, which are usually required for a type soundness proof. The main contribution of this paper is to demonstrate how, perhaps surprisingly, even though these properties are lost in their full generality, a rich DOT calculus that includes recursive type refinement and a subtyping lattice with intersection types can still be proved sound. The key insight is that subtyping transitivity only needs to be invertible in code paths executed at runtime, with contexts consisting entirely of valid runtime objects, whereas inconsistent subtyping contexts can be permitted for code that is never executed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Daloze:2016:ETS, author = "Benoit Daloze and Stefan Marr and Daniele Bonetta and Hanspeter M{\"o}ssenb{\"o}ck", title = "Efficient and thread-safe objects for dynamically-typed languages", journal = j-SIGPLAN, volume = "51", number = "10", pages = "642--659", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984001", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We are in the multi-core era. Dynamically-typed languages are in widespread use, but their support for multithreading still lags behind. One of the reasons is that the sophisticated techniques they use to efficiently represent their dynamic object models are often unsafe in multithreaded environments. This paper defines safety requirements for dynamic object models in multithreaded environments. Based on these requirements, a language-agnostic and thread-safe object model is designed that maintains the efficiency of sequential approaches. This is achieved by ensuring that field reads do not require synchronization and field updates only need to synchronize on objects shared between threads. Basing our work on JRuby+Truffle, we show that our safe object model has zero overhead on peak performance for thread-local objects and only 3\% average overhead on parallel benchmarks where field updates require synchronization. Thus, it can be a foundation for safe and efficient multithreaded VMs for a wide range of dynamic languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Chapman:2016:HSH, author = "Keith Chapman and Antony L. Hosking and J. Eliot B. Moss", title = "Hybrid {STM\slash HTM} for nested transactions on {OpenJDK}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "660--676", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984029", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional memory (TM) has long been advocated as a promising pathway to more automated concurrency control for scaling concurrent programs running on parallel hardware. Software TM (STM) has the benefit of being able to run general transactional programs, but at the significant cost of overheads imposed to log memory accesses, mediate access conflicts, and maintain other transaction metadata. Recently, hardware manufacturers have begun to offer commodity hardware TM (HTM) support in their processors wherein the transaction metadata is maintained ``for free'' in hardware. However, HTM approaches are only best-effort: they cannot successfully run all transactional programs, whether because of hardware capacity issues (causing large transactions to fail), or compatibility restrictions on the processor instructions permitted within hardware transactions (causing transactions that execute those instructions to fail). In such cases, programs must include failure-handling code to attempt the computation by some other software means, since retrying the transaction would be futile. Thus, a canonical use of HTM is lock elision: replacing lock regions with transactions, retrying some number of times in the case of conflicts, but falling back to locking when HTM fails for other reasons. Here, we describe how software and hardware schemes can combine seamlessly into a hybrid system in support of transactional programs, allowing use of low-cost HTM when it works, but reverting to STM when it doesn't. We describe heuristics used to make this choice dynamically and automatically, but allowing the transition back to HTM opportunistically. Our implementation is for an extension of Java having syntax for both open and closed nested transactions, and boosting, running on the OpenJDK, with dynamic injection of STM mechanisms (into code variants used under STM) and HTM instructions (into code variants used under HTM). Both schemes are compatible to allow different threads to run concurrently with either mechanism, while preserving transaction safety. Using a standard synthetic benchmark we demonstrate that HTM offers significant acceleration of both closed and open nested transactions, while yielding parallel scaling up to the limits of the hardware, whereupon scaling in software continues but with the penalty to throughput imposed by software mechanisms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Bhandari:2016:MFR, author = "Kumud Bhandari and Dhruva R. Chakrabarti and Hans-J. Boehm", title = "{Makalu}: fast recoverable allocation of non-volatile memory", journal = j-SIGPLAN, volume = "51", number = "10", pages = "677--694", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984019", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Byte addressable non-volatile memory (NVRAM) is likely to supplement, and perhaps eventually replace, DRAM. Applications can then persist data structures directly in memory instead of serializing them and storing them onto a durable block device. However, failures during execution can leave data structures in NVRAM unreachable or corrupt. In this paper, we present Makalu, a system that addresses non-volatile memory management. Makalu offers an integrated allocator and recovery-time garbage collector that maintains internal consistency, avoids NVRAM memory leaks, and is efficient, all in the face of failures. We show that a careful allocator design can support a less restrictive and a much more familiar programming model than existing persistent memory allocators. Our allocator significantly reduces the per allocation persistence overhead by lazily persisting non-essential metadata and by employing a post-failure recovery-time garbage collector. Experimental results show that the resulting online speed and scalability of our allocator are comparable to well-known transient allocators, and significantly better than state-of-the-art persistent allocators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Nunez:2016:PGC, author = "Diogenes Nunez and Samuel Z. Guyer and Emery D. Berger", title = "Prioritized garbage collection: explicit {GC} support for software caches", journal = j-SIGPLAN, volume = "51", number = "10", pages = "695--710", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984028", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmers routinely trade space for time to increase performance, often in the form of caching or memoization. In managed languages like Java or JavaScript, however, this space-time tradeoff is complex. Using more space translates into higher garbage collection costs, especially at the limit of available memory. Existing runtime systems provide limited support for space-sensitive algorithms, forcing programmers into difficult and often brittle choices about provisioning. This paper presents prioritized garbage collection, a cooperative programming language and runtime solution to this problem. Prioritized GC provides an interface similar to soft references, called priority references, which identify objects that the collector can reclaim eagerly if necessary. The key difference is an API for defining the policy that governs when priority references are cleared and in what order. Application code specifies a priority value for each reference and a target memory bound. The collector reclaims references, lowest priority first, until the total memory footprint of the cache fits within the bound. We use this API to implement a space-aware least-recently-used (LRU) cache, called a Sache, that is a drop-in replacement for existing caches, such as Google's Guava library. The garbage collector automatically grows and shrinks the Sache in response to available memory and workload with minimal provisioning information from the programmer. Using a Sache, it is almost impossible for an application to experience a memory leak, memory pressure, or an out-of-memory crash caused by software caching.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Steimann:2016:CRA, author = "Friedrich Steimann and J{\"o}rg Hagemann and Bastian Ulke", title = "Computing repair alternatives for malformed programs using constraint attribute grammars", journal = j-SIGPLAN, volume = "51", number = "10", pages = "711--730", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984007", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Attribute grammars decorate the nodes of a program's parse tree with attributes whose values are defined by equations encoding the (static) semantics of a programming language. We show how replacing the equations of an attribute grammar with equivalent constraints that can be solved by a constraint solver allows us to compute repairs of a malformed program solely from a specification that was originally designed for checking its well-formedness. We present two repair modes --- shallow and deep fixing --- whose computed repair alternatives are guaranteed to repair every error on which they are invoked. While shallow fixing may introduce new errors, deep fixing never does; to make it tractable, we implement it using neighborhood search. We demonstrate the feasibility of our approach by implementing it on top of ExtendJ, an attribute grammar based Java compiler, and by applying it to an example from the Java EE context, detecting and fixing well-formedness errors (both real and injected) in a body of 14 open-source subject programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Raychev:2016:PMC, author = "Veselin Raychev and Pavol Bielik and Martin Vechev", title = "Probabilistic model for code with decision trees", journal = j-SIGPLAN, volume = "51", number = "10", pages = "731--747", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984041", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we introduce a new approach for learning precise and general probabilistic models of code based on decision tree learning. Our approach directly benefits an emerging class of statistical programming tools which leverage probabilistic models of code learned over large codebases (e.g., GitHub) to make predictions about new programs (e.g., code completion, repair, etc). The key idea is to phrase the problem of learning a probabilistic model of code as learning a decision tree in a domain specific language over abstract syntax trees (called TGen). This allows us to condition the prediction of a program element on a dynamically computed context. Further, our problem formulation enables us to easily instantiate known decision tree learning algorithms such as ID3, but also to obtain new variants we refer to as ID3+ and E13, not previously explored and ones that outperform ID3 in prediction accuracy. Our approach is general and can be used to learn a probabilistic model of any programming language. We implemented our approach in a system called Deep3 and evaluated it for the challenging task of learning probabilistic models of JavaScript and Python. Our experimental results indicate that Deep3 predicts elements of JavaScript and Python code with precision above 82\% and 69\%, respectively. Further, Deep3 often significantly outperforms state-of-the-art approaches in overall prediction accuracy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Barman:2016:RWA, author = "Shaon Barman and Sarah Chasins and Rastislav Bodik and Sumit Gulwani", title = "{Ringer}: web automation by demonstration", journal = j-SIGPLAN, volume = "51", number = "10", pages = "748--764", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984020", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With increasing amounts of data available on the web and a diverse range of users interested in programmatically accessing that data, web automation must become easier. Automation helps users complete many tedious interactions, such as scraping data, completing forms, or transferring data between websites. However, writing web automation scripts typically requires an expert programmer because the writer must be able to reverse engineer the target webpage. We have built a record and replay tool, Ringer, that makes web automation accessible to non-coders. Ringer takes a user demonstration as input and creates a script that interacts with the page as a user would. This approach makes Ringer scripts more robust to webpage changes because user-facing interfaces remain relatively stable compared to the underlying webpage implementations. We evaluated our approach on benchmarks recorded on real webpages and found that it replayed 4x more benchmarks than a state-of-the-art replay tool.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Weitz:2016:SVB, author = "Konstantin Weitz and Doug Woos and Emina Torlak and Michael D. Ernst and Arvind Krishnamurthy and Zachary Tatlock", title = "Scalable verification of {Border Gateway Protocol} configurations with an {SMT} solver", journal = j-SIGPLAN, volume = "51", number = "10", pages = "765--780", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984012", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Internet Service Providers (ISPs) use the Border Gateway Protocol (BGP) to announce and exchange routes for delivering packets through the internet. ISPs must carefully configure their BGP routers to ensure traffic is routed reliably and securely. Correctly configuring BGP routers has proven challenging in practice, and misconfiguration has led to worldwide outages and traffic hijacks. This paper presents Bagpipe, a system that enables ISPs to declaratively express BGP policies and that automatically verifies that router configurations implement such policies. The novel initial network reduction soundly reduces policy verification to a search for counterexamples in a finite space. An SMT-based symbolic execution engine performs this search efficiently. Bagpipe reduces the size of its search space using predicate abstraction and parallelizes its search using symbolic variable hoisting. Bagpipe's policy specification language is expressive: we expressed policies inferred from real AS configurations, policies from the literature, and policies for 10 Juniper TechLibrary configuration scenarios. Bagpipe is efficient: we ran it on three ASes with a total of over 240,000 lines of Cisco and Juniper BGP configuration. Bagpipe is effective: it revealed 19 policy violations without issuing any false positives.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Loncaric:2016:PFT, author = "Calvin Loncaric and Satish Chandra and Cole Schlesinger and Manu Sridharan", title = "A practical framework for type inference error explanation", journal = j-SIGPLAN, volume = "51", number = "10", pages = "781--799", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983994", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many languages have support for automatic type inference. But when inference fails, the reported error messages can be unhelpful, highlighting a code location far from the source of the problem. Several lines of work have emerged proposing error reports derived from correcting sets: a set of program points that, when fixed, produce a well-typed program. Unfortunately, these approaches are tightly tied to specific languages; targeting a new language requires encoding a type inference algorithm for the language in a custom constraint system specific to the error reporting tool. We show how to produce correcting set-based error reports by leveraging existing type inference implementations, easing the burden of adoption and, as type inference algorithms tend to be efficient in practice, producing error reports of comparable quality to similar error reporting tools orders of magnitude faster. Many type inference algorithms are already formulated as dual phases of type constraint generation and solving; rather than (re)implementing type inference in an error explanation tool, we isolate the solving phase and treat it as an oracle for solving typing constraints. Given any set of typing constraints, error explanation proceeds by iteratively removing conflicting constraints from the initial constraint set until discovering a subset on which the solver succeeds; the constraints removed form a correcting set. Our approach is agnostic to the semantics of any particular language or type system, instead leveraging the existing type inference engine to give meaning to constraints.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Kell:2016:DDT, author = "Stephen Kell", title = "Dynamically diagnosing type errors in unsafe code", journal = j-SIGPLAN, volume = "51", number = "10", pages = "800--819", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2983998", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing approaches for detecting type errors in unsafe languages are limited. Static analysis methods are imprecise, and often require source-level changes, while most dynamic methods check only memory properties (bounds, liveness, etc.), owing to a lack of run-time type information. This paper describes libcrunch, a system for binary-compatible run-time type checking of unmodified unsafe code, currently focusing on C. Practical experience shows that our prototype implementation is easily applicable to many real codebases without source-level modification, correctly flags programmer errors with a very low rate of false positives, offers a very low run-time overhead, and covers classes of error caught by no previously existing tool.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Long:2016:FCE, author = "Yuheng Long and Yu David Liu and Hridesh Rajan", title = "First-class effect reflection for effect-guided programming", journal = j-SIGPLAN, volume = "51", number = "10", pages = "820--837", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984037", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces a novel type-and-effect calculus, first-class effects, where the computational effect of an expression can be programmatically reflected, passed around as values, and analyzed at run time. A broad range of designs ``hard-coded'' in existing effect-guided analyses --- from thread scheduling, version-consistent software updating, to data zeroing --- can be naturally supported through the programming abstractions. The core technical development is a type system with a number of features, including a hybrid type system that integrates static and dynamic effect analyses, a refinement type system to verify application-specific effect management properties, a double-bounded type system that computes both over-approximation of effects and their under-approximation. We introduce and establish a notion of soundness called trace consistency, defined in terms of how the effect and trace correspond. The property sheds foundational insight on ``good'' first-class effect programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Amin:2016:JST, author = "Nada Amin and Ross Tate", title = "{Java} and {Scala}'s type systems are unsound: the existential crisis of null pointers", journal = j-SIGPLAN, volume = "51", number = "10", pages = "838--848", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984004", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present short programs that demonstrate the unsoundness of Java and Scala's current type systems. In particular, these programs provide parametrically polymorphic functions that can turn any type into any type without (down)casting. Fortunately, parametric polymorphism was not integrated into the Java Virtual Machine (JVM), so these examples do not demonstrate any unsoundness of the JVM. Nonetheless, we discuss broader implications of these findings on the field of programming languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Sun:2016:FCB, author = "Chengnian Sun and Vu Le and Zhendong Su", title = "Finding compiler bugs via live code mutation", journal = j-SIGPLAN, volume = "51", number = "10", pages = "849--863", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984038", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Validating optimizing compilers is challenging because it is hard to generate valid test programs (i.e., those that do not expose any undefined behavior). Equivalence Modulo Inputs (EMI) is an effective, promising methodology to tackle this problem. Given a test program with some inputs, EMI mutates the program to derive variants that are semantically equivalent w.r.t. these inputs. The state-of-the-art instantiations of EMI are Orion and Athena, both of which rely on deleting code from or inserting code into code regions that are not executed under the inputs. Although both have demonstrated their ability in finding many bugs in GCC and LLVM, they are still limited due to their mutation strategies that operate only on dead code regions. This paper presents a novel EMI technique that allows mutation in the entire program (i.e., both live and dead regions). By removing the restriction of mutating only the dead regions, our technique significantly increases the EMI variant space. It also helps to more thoroughly stress test compilers as compilers must optimize mutated live code, whereas mutated dead code might be eliminated. Finally, our technique also makes compiler bugs more noticeable as miscompilations on mutated dead code may not be observable. We have realized the proposed technique in Hermes. The evaluation demonstrates Hermes's effectiveness. In 13 months, Hermes found 168 confirmed, valid bugs in GCC and LLVM, of which 132 have already been fixed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Shan:2016:FRR, author = "Zhiyong Shan and Tanzirul Azim and Iulian Neamtiu", title = "Finding resume and restart errors in {Android} applications", journal = j-SIGPLAN, volume = "51", number = "10", pages = "864--880", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984011", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Smartphone apps create and handle a large variety of ``instance'' data that has to persist across runs, such as the current navigation route, workout results, antivirus settings, or game state. Due to the nature of the smartphone platform, an app can be paused, sent into background, or killed at any time. If the instance data is not saved and restored between runs, in addition to data loss, partially-saved or corrupted data can crash the app upon resume or restart. While smartphone platforms offer API support for data-saving and data-retrieving operations, the use of this API is ad-hoc: left to the programmer, rather than enforced by the compiler. We have observed that several categories of bugs---including data loss, failure to resume/restart or resuming/restarting in the wrong state---are due to incorrect handling of instance data and are easily triggered by just pressing the `Home' or `Back' buttons. To help address this problem, we have constructed a tool chain for Android (the KREfinder static analysis and the KREreproducer input generator) that helps find and reproduce such incorrect handling. We have evaluated our approach by running the static analysis on 324 apps, of which 49 were further analyzed manually. Results indicate that our approach is (i) effective, as it has discovered 49 bugs, including in popular Android apps, and (ii) efficient, completing on average in 61 seconds per app. More generally, our approach helps determine whether an app saves too much or too little state.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Zuo:2016:LOF, author = "Zhiqiang Zuo and Lu Fang and Siau-Cheng Khoo and Guoqing Xu and Shan Lu", title = "Low-overhead and fully automated statistical debugging with abstraction refinement", journal = j-SIGPLAN, volume = "51", number = "10", pages = "881--896", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984005", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cooperative statistical debugging is an effective approach for diagnosing production-run failures. To quickly identify failure predictors from the huge program predicate space, existing techniques rely on random or heuristics-guided predicate sampling at the user side. However, none of them can satisfy the requirements of low cost, low diagnosis latency, and high diagnosis quality simultaneously, which are all indispensable for statistical debugging to be practical. This paper presents a new technique that tackles the above challenges. We formulate the technique as an instance of abstraction refinement, where efficient abstract-level profiling is first applied to the whole program and its execution brings information that can pinpoint suspicious coarse-grained entities that need to be refined. The refinement profiles a corresponding set of fine-grained entities, and generates feedback that determines what to prune and what to refine next. The process is fully automated, and more importantly, guided by a mathematically rigorous analysis that guarantees that our approach produces the same debugging results as an exhaustive analysis in deterministic settings. We have implemented this technique for both C and Java on both single machine and distributed system. A thorough evaluation demonstrates that our approach yields (1) an order of magnitude reduction in the user-side runtime overhead even compared to a sampling-based approach and (2) two orders of magnitude reduction in the size of data transferred over the network, completely automatically without sacrificing any debugging capability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Bavishi:2016:PRA, author = "Rohan Bavishi and Awanish Pandey and Subhajit Roy", title = "To be precise: regression aware debugging", journal = j-SIGPLAN, volume = "51", number = "10", pages = "897--915", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984014", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Bounded model checking based debugging solutions search for mutations of program expressions that produce the expected output for a currently failing test. However, the current localization tools are not regression aware: they do not use information from the passing tests in their localization formula. On the other hand, the current repair tools attempt to guarantee regression freedom: when provided with a set of passing tests, they guarantee that none of these tests can break due to the suggested repair patch, thereby constructing a large repair formula. In this paper, we propose regression awareness as a means to improve the quality of localization and to scale repair. To enable regression awareness, we summarize the proof of correctness of each passing test by computing Craig Interpolants over a symbolic encoding of the passing execution, and use these summaries as additional soft constraints while synthesizing altered executions corresponding to failing tests. Intuitively, these additional constraints act as roadblocks, thereby discouraging executions that may damage the proof of a passing test. We use a partial MAXSAT solver to relax the proofs in a systematic way, and use a ranking function that penalizes mutations that damage the existing proofs. We have implemented our algorithms into a tool, TINTIN, that enables regression aware localization and repair. For localizations, our strategy is effective in extracting a superior ranking of suspicious locations: on a set of 52 different versions across 12 different programs spanning three benchmark suites, TINTIN achieves a saving of developer effort by almost 45\% (in terms of the locations that must be examined by a developer to reach the ground-truth repair) in the worst case and 27\% in the average case over existing techniques. For automated repairs, on our set of benchmarks, TINTIN achieves a 2.3X speedup over existing techniques without sacrificing much on the ranking of the repair patches: the ground-truth repair appears as the topmost suggestion in more than 70\% of our benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "OOPSLA '16 conference proceedings.", } @Article{Lifflander:2017:CLO, author = "Jonathan Lifflander and Sriram Krishnamoorthy", title = "Cache locality optimization for recursive programs", journal = j-SIGPLAN, volume = "52", number = "6", pages = "1--16", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062385", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an approach to optimize the cache locality for recursive programs by dynamically splicing---recursively interleaving---the execution of distinct function invocations. By utilizing data effect annotations, we identify concurrency and data reuse opportunities across function invocations and interleave them to reduce reuse distance. We present algorithms that efficiently track effects in recursive programs, detect interference and dependencies, and interleave execution of function invocations using user-level (non-kernel) lightweight threads. To enable multi-core execution, a program is parallelized using a nested fork/join programming model. Our cache optimization strategy is designed to work in the context of a random work stealing scheduler. We present an implementation using the MIT Cilk framework that demonstrates significant improvements in sequential and parallel performance, competitive with a state-of-the-art compile-time optimizer for loop programs and a domain-specific optimizer for stencil programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Saarikivi:2017:FEC, author = "Olli Saarikivi and Margus Veanes and Todd Mytkowicz and Madan Musuvathi", title = "Fusing effectful comprehensions", journal = j-SIGPLAN, volume = "52", number = "6", pages = "17--32", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062362", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "List comprehensions provide a powerful abstraction mechanism for expressing computations over ordered collections of data declaratively without having to use explicit iteration constructs. This paper puts forth effectful comprehensions as an elegant way to describe list comprehensions that incorporate loop-carried state. This is motivated by operations such as compression/decompression and serialization/deserialization that are common in log/data processing pipelines and require loop-carried state when processing an input stream of data. We build on the underlying theory of symbolic transducers to fuse pipelines of effectful comprehensions into a single representation, from which efficient code can be generated. Using background theory reasoning with an SMT solver, our fusion and subsequent reachability based branch elimination algorithms can significantly reduce the complexity of the fused pipelines. Our implementation shows significant speedups over reasonable hand-written code (3.4 $ \times $, on average) and traditionally fused version of the pipeline (2.6 $ \times $, on average) for a variety of examples, including scenarios for extracting fields with regular expressions, processing XML with XPath, and running queries over encoded data.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Ding:2017:GTD, author = "Yufei Ding and Lin Ning and Hui Guan and Xipeng Shen", title = "Generalizations of the theory and deployment of triangular inequality for compiler-based strength reduction", journal = j-SIGPLAN, volume = "52", number = "6", pages = "33--48", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062377", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Triangular Inequality (TI) has been used in many manual algorithm designs to achieve good efficiency in solving some distance calculation-based problems. This paper presents our generalization of the idea into a compiler optimization technique, named TI-based strength reduction. The generalization consists of three parts. The first is the establishment of the theoretic foundation of this new optimization via the development of a new form of TI named Angular Triangular Inequality, along with several fundamental theorems. The second is the revealing of the properties of the new forms of TI and the proposal of guided TI adaptation, a systematic method to address the difficulties in effective deployments of TI optimizations. The third is an integration of the new optimization technique in an open-source compiler. Experiments on a set of data mining and machine learning algorithms show that the new technique can speed up the standard implementations by as much as 134X and 46X on average for distance-related problems, outperforming previous TI-based optimizations by 2.35X on average. It also extends the applicability of TI-based optimizations to vector related problems, producing tens of times of speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Menendez:2017:AID, author = "David Menendez and Santosh Nagarakatte", title = "{Alive-Infer}: data-driven precondition inference for peephole optimizations in {LLVM}", journal = j-SIGPLAN, volume = "52", number = "6", pages = "49--63", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Peephole optimizations are a common source of compiler bugs. Compiler developers typically transform an incorrect peephole optimization into a valid one by strengthening the precondition. This process is challenging and tedious. This paper proposes Alive-Infer, a data-driven approach that infers preconditions for peephole optimizations expressed in Alive. Alive-Infer generates positive and negative examples for an optimization, enumerates predicates on-demand, and learns a set of predicates that separate the positive and negative examples. Alive-Infer repeats this process until it finds a precondition that ensures the validity of the optimization. Alive-Infer reports both a weakest precondition and a set of succinct partial preconditions to the developer. Our prototype generates preconditions that are weaker than LLVM's preconditions for 73 optimizations in the Alive suite. We also demonstrate the applicability of this technique to generalize 54 optimization patterns generated by Souper, an LLVM IR-based superoptimizer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Yessenov:2017:DAD, author = "Kuat Yessenov and Ivan Kuraj and Armando Solar-Lezama", title = "{DemoMatch}: {API} discovery from demonstrations", journal = j-SIGPLAN, volume = "52", number = "6", pages = "64--78", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062386", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce DemoMatch, a tool for API discovery that allows the user to discover how to implement functionality using a software framework by demonstrating the functionality in existing applications built with the same framework. DemoMatch matches the demonstrations against a database of execution traces called Semeru and generates code snippets explaining how to use the functionality. We evaluated DemoMatch on several case studies involving Java Swing and Eclipse RCP.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{David:2017:SBT, author = "Yaniv David and Nimrod Partush and Eran Yahav", title = "Similarity of binaries through re-optimization", journal = j-SIGPLAN, volume = "52", number = "6", pages = "79--94", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a scalable approach for establishing similarity between stripped binaries (with no debug information). The main challenge in binary similarity, is to establish similarity even when the code has been compiled using different compilers, with different optimization levels, or targeting different architectures. Overcoming this challenge, while avoiding false positives, is invaluable to the process of reverse engineering and the process of locating vulnerable code. We present a technique that is scalable and precise, as it alleviates the need for heavyweight semantic comparison by performing out-of-context re-optimization of procedure fragments. It works by decomposing binary procedures to comparable fragments and transforming them to a canonical, normalized form using the compiler optimizer, which enables finding equivalent fragments through simple syntactic comparison. We use a statistical framework built by analyzing samples collected ``in the wild'' to generate a global context that quantifies the significance of each pair of fragments, and uses it to lift pairwise fragment equivalence to whole procedure similarity. We have implemented our technique in a tool called {\tt GitZ} and performed an extensive evaluation. We show that {\tt GitZ} is able to perform millions of comparisons efficiently, and find similarity with high accuracy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Bastani:2017:SPI, author = "Osbert Bastani and Rahul Sharma and Alex Aiken and Percy Liang", title = "Synthesizing program input grammars", journal = j-SIGPLAN, volume = "52", number = "6", pages = "95--110", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062349", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an algorithm for synthesizing a context-free grammar encoding the language of valid program inputs from a set of input examples and blackbox access to the program. Our algorithm addresses shortcomings of existing grammar inference algorithms, which both severely overgeneralize and are prohibitively slow. Our implementation, GLADE, leverages the grammar synthesized by our algorithm to fuzz test programs with structured inputs. We show that GLADE substantially increases the incremental coverage on valid inputs compared to two baseline fuzzers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Huang:2017:CMC, author = "Daniel Huang and Jean-Baptiste Tristan and Greg Morrisett", title = "Compiling {Markov} chain {Monte Carlo} algorithms for probabilistic modeling", journal = j-SIGPLAN, volume = "52", number = "6", pages = "111--125", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062375", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The problem of probabilistic modeling and inference, at a high-level, can be viewed as constructing a ( model, query, inference ) tuple, where an inference algorithm implements a query on a model. Notably, the derivation of inference algorithms can be a difficult and error-prone task. Hence, researchers have explored how ideas from probabilistic programming can be applied. In the context of constructing these tuples, probabilistic programming can be seen as taking a language-based approach to probabilistic modeling and inference. For instance, by using (1) appropriate languages for expressing models and queries and (2) devising inference techniques that operate on encodings of models (and queries) as program expressions, the task of inference can be automated. In this paper, we describe a compiler that transforms a probabilistic model written in a restricted modeling language and a query for posterior samples given observed data into a Markov Chain Monte Carlo (MCMC) inference algorithm that implements the query. The compiler uses a sequence of intermediate languages (ILs) that guide it in gradually and successively refining a declarative specification of a probabilistic model and the query into an executable MCMC inference algorithm. The compilation strategy produces composable MCMC algorithms for execution on a CPU or GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Eizenberg:2017:BBL, author = "Ariel Eizenberg and Yuanfeng Peng and Toma Pigli and William Mansky and Joseph Devietti", title = "{BARRACUDA}: binary-level analysis of runtime {RAces} in {CUDA} programs", journal = j-SIGPLAN, volume = "52", number = "6", pages = "126--140", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062342", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "GPU programming models enable and encourage massively parallel programming with over a million threads, requiring extreme parallelism to achieve good performance. Massive parallelism brings significant correctness challenges by increasing the possibility for bugs as the number of thread interleavings balloons. Conventional dynamic safety analyses struggle to run at this scale. We present BARRACUDA, a concurrency bug detector for GPU programs written in Nvidia's CUDA language. BARRACUDA handles a wider range of parallelism constructs than previous work, including branch operations, low-level atomics and memory fences, which allows BARRACUDA to detect new classes of concurrency bugs. BARRACUDA operates at the binary level for increased compatibility with existing code, leveraging a new binary instrumentation framework that is extensible to other dynamic analyses. BARRACUDA incorporates a number of novel optimizations that are crucial for scaling concurrency bug detection to over a million threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Rhodes:2017:BSC, author = "Dustin Rhodes and Cormac Flanagan and Stephen N. Freund", title = "{BigFoot}: static check placement for dynamic race detection", journal = j-SIGPLAN, volume = "52", number = "6", pages = "141--156", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062350", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Precise dynamic data race detectors provide strong correctness guarantees but have high overheads because they generally keep analysis state in a separate shadow location for each heap memory location, and they check (and potentially update) the corresponding shadow location on each heap access. The BigFoot dynamic data race detector uses a combination of static and dynamic analysis techniques to coalesce checks and compress shadow locations. With BigFoot, multiple accesses to an object or array often induce a single coalesced check that manipulates a single compressed shadow location, resulting in a performance improvement over FastTrack of 61\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Kini:2017:DRP, author = "Dileep Kini and Umang Mathur and Mahesh Viswanathan", title = "Dynamic race prediction in linear time", journal = j-SIGPLAN, volume = "52", number = "6", pages = "157--170", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062374", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing reliable concurrent software remains a huge challenge for today's programmers. Programmers rarely reason about their code by explicitly considering different possible inter-leavings of its execution. We consider the problem of detecting data races from individual executions in a sound manner. The classical approach to solving this problem has been to use Lamport's happens-before (HB) relation. Until now HB remains the only approach that runs in linear time. Previous efforts in improving over HB such as causally-precedes (CP) and maximal causal models fall short due to the fact that they are not implementable efficiently and hence have to compromise on their race detecting ability by limiting their techniques to bounded sized fragments of the execution. We present a new relation weak-causally-precedes (WCP) that is provably better than CP in terms of being able to detect more races, while still remaining sound. Moreover, it admits a linear time algorithm which works on the entire execution without having to fragment it.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Billes:2017:SBB, author = "Marina Billes and Anders M{\o}ller and Michael Pradel", title = "Systematic black-box analysis of collaborative web applications", journal = j-SIGPLAN, volume = "52", number = "6", pages = "171--184", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062364", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Web applications, such as collaborative editors that allow multiple clients to concurrently interact on a shared resource, are difficult to implement correctly. Existing techniques for analyzing concurrent software do not scale to such complex systems or do not consider multiple interacting clients. This paper presents Simian, the first fully automated technique for systematically analyzing multi-client web applications. Naively exploring all possible interactions between a set of clients of such applications is practically infeasible. Simian obtains scalability for real-world applications by using a two-phase black-box approach. The application code remains unknown to the analysis and is first explored systematically using a single client to infer potential conflicts between client events triggered in a specific context. The second phase synthesizes multi-client interactions targeted at triggering misbehavior that may result from the potential conflicts, and reports an inconsistency if the clients do not converge to a consistent state. We evaluate the analysis on three widely used systems, Google Docs, Firepad, and ownCloud Documents, where it reports a variety of inconsistencies, such as incorrect formatting and misplaced text fragments. Moreover, we find that the two-phase approach runs 10x faster compared to exhaustive exploration, making systematic analysis practically applicable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Haas:2017:BWS, author = "Andreas Haas and Andreas Rossberg and Derek L. Schuff and Ben L. Titzer and Michael Holman and Dan Gohman and Luke Wagner and Alon Zakai and J. F. Bastien", title = "Bringing the web up to speed with {WebAssembly}", journal = j-SIGPLAN, volume = "52", number = "6", pages = "185--200", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062363", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The maturation of the Web platform has given rise to sophisticated and demanding Web applications such as interactive 3D visualization, audio and video software, and games. With that, efficiency and security of code on the Web has become more important than ever. Yet JavaScript as the only built-in language of the Web is not well-equipped to meet these requirements, especially as a compilation target. Engineers from the four major browser vendors have risen to the challenge and collaboratively designed a portable low-level bytecode called WebAssembly. It offers compact representation, efficient validation and compilation, and safe low to no-overhead execution. Rather than committing to a specific programming model, WebAssembly is an abstraction over modern hardware, making it language-, hardware-, and platform-independent, with use cases beyond just the Web. WebAssembly has been designed with a formal semantics from the start. We describe the motivation, design and formal semantics of WebAssembly and provide some preliminary experience with implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Petrashko:2017:MCU, author = "Dmitry Petrashko and Ondrej Lhot{\'a}k and Martin Odersky", title = "Miniphases: compilation using modular and efficient tree transformations", journal = j-SIGPLAN, volume = "52", number = "6", pages = "201--216", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062346", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Production compilers commonly perform dozens of transformations on an intermediate representation. Running those transformations in separate passes harms performance. One approach to recover performance is to combine transformations by hand in order to reduce number of passes. Such an approach harms modularity, and thus makes it hard to maintain and evolve a compiler over the long term, and makes reasoning about performance harder. This paper describes a methodology that allows a compiler writer to define multiple transformations separately, but fuse them into a single traversal of the intermediate representation when the compiler runs. This approach has been implemented in a compiler for the Scala language. Our performance evaluation indicates that this approach reduces the running time of tree transformations by 35\% and shows that this is due to improved cache friendliness. At the same time, the approach improves total memory consumption by reducing the object tenuring rate by 50\%. This approach enables compiler writers to write transformations that are both modular and fast at the same time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Canino:2017:PAE, author = "Anthony Canino and Yu David Liu", title = "Proactive and adaptive energy-aware programming with mixed typechecking", journal = j-SIGPLAN, volume = "52", number = "6", pages = "217--232", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062356", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Application-level energy management is an important dimension of energy optimization. In this paper, we introduce ENT, a novel programming language for enabling *proactive* and *adaptive* mode-based energy management at the application level. The proactive design allows programmers to apply their application knowledge to energy management, by characterizing the energy behavior of different program fragments with modes. The adaptive design allows such characterization to be delayed until run time, useful for capturing dynamic program behavior dependent on program states, configuration settings, external battery levels, or CPU temperatures. The key insight is both proactiveness and adaptiveness can be unified under a type system combined with static typing and dynamic typing. ENT has been implemented as an extension to Java, and successfully ported to three energy-conscious platforms: an Intel-based laptop, a Raspberry Pi, and an Android phone. Evaluation shows ENT improves the programmability, debuggability, and energy efficiency of battery-aware and temperature-aware programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Kedia:2017:SFS, author = "Piyus Kedia and Manuel Costa and Matthew Parkinson and Kapil Vaswani and Dimitrios Vytiniotis and Aaron Blankstein", title = "Simple, fast, and safe manual memory management", journal = j-SIGPLAN, volume = "52", number = "6", pages = "233--247", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062376", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Safe programming languages are readily available, but many applications continue to be written in unsafe languages because of efficiency. As a consequence, many applications continue to have exploitable memory safety bugs. Since garbage collection is a major source of inefficiency in the implementation of safe languages, replacing it with safe manual memory management would be an important step towards solving this problem. Previous approaches to safe manual memory management use programming models based on regions, unique pointers, borrowing of references, and ownership types. We propose a much simpler programming model that does not require any of these concepts. Starting from the design of an imperative type safe language (like Java or C\#), we just add a delete operator to free memory explicitly and an exception which is thrown if the program dereferences a pointer to freed memory. We propose an efficient implementation of this programming model that guarantees type safety. Experimental results from our implementation based on the C\# native compiler show that this design achieves up to 3x reduction in peak working set and run time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Kincaid:2017:CRA, author = "Zachary Kincaid and Jason Breck and Ashkan Forouhi Boroujeni and Thomas Reps", title = "Compositional recurrence analysis revisited", journal = j-SIGPLAN, volume = "52", number = "6", pages = "248--262", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compositional recurrence analysis (CRA) is a static-analysis method based on a combination of symbolic analysis and abstract interpretation. This paper addresses the problem of creating a context-sensitive interprocedural version of CRA that handles recursive procedures. The problem is non-trivial because there is an ``impedance mismatch'' between CRA, which relies on analysis techniques based on regular languages (i.e., Tarjan's path-expression method), and the context-free-language underpinnings of context-sensitive analysis. We show how to address this impedance mismatch by augmenting the CRA abstract domain with additional operations. We call the resulting algorithm Interprocedural CRA (ICRA). Our experiments with ICRA show that it has broad overall strength compared with several state-of-the-art software model checkers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Thiessen:2017:CTP, author = "Rei Thiessen and Ondrej Lhot{\'a}k", title = "Context transformations for pointer analysis", journal = j-SIGPLAN, volume = "52", number = "6", pages = "263--277", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062359", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Points-to analysis for Java benefits greatly from context sensitivity. CFL-reachability and k -limited context strings are two approaches to obtaining context sensitivity with different advantages: CFL-reachability allows local reasoning about data-value flow and thus is suitable for demand-driven analyses, whereas k -limited analyses allow object sensitivity which is a superior calling context abstraction for object-oriented languages. We combine the advantages of both approaches to obtain a context-sensitive analysis that is as precise as k -limited context strings, but is more efficient to compute. Our key insight is based on a novel abstraction of contexts adapted from CFL-reachability that represents a relation between two calling contexts as a composition of transformations over contexts. We formulate pointer analysis in an algebraic structure of context transformations, which is a set of functions over calling contexts closed under function composition. We show that the context representation of context-string-based analyses is an explicit enumeration of all input and output values of context transformations. CFL-reachability-based pointer analysis is formulated to use call-strings as contexts, but the context transformations concept can be applied to any context abstraction used in k -limited analyses, including object- and type-sensitive analysis. The result is a more efficient algorithm for computing context-sensitive results for a wide variety of context configurations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Tan:2017:EPP, author = "Tian Tan and Yue Li and Jingling Xue", title = "Efficient and precise points-to analysis: modeling the heap by merging equivalent automata", journal = j-SIGPLAN, volume = "52", number = "6", pages = "278--291", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062360", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mainstream points-to analysis techniques for object-oriented languages rely predominantly on the allocation-site abstraction to model heap objects. We present MAHJONG, a novel heap abstraction that is specifically developed to address the needs of an important class of type-dependent clients, such as call graph construction, devirtualization and may-fail casting. By merging equivalent automata representing type-consistent objects that are created by the allocation-site abstraction, MAHJONG enables an allocation-site-based points-to analysis to run significantly faster while achieving nearly the same precision for type-dependent clients. MAHJONG is simple conceptually, efficient, and drops easily on any allocation-site-based points-to analysis. We demonstrate its effectiveness by discussing some insights on why it is a better alternative of the allocation-site abstraction for type-dependent clients and evaluating it extensively on 12 large real-world Java programs with five context-sensitive points-to analyses and three widely used type-dependent clients. MAHJONG is expected to provide significant benefits for many program analyses where call graphs are required.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Santhiar:2017:SDD, author = "Anirudh Santhiar and Aditya Kanade", title = "Static deadlock detection for asynchronous {C\#} programs", journal = j-SIGPLAN, volume = "52", number = "6", pages = "292--305", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062361", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Asynchronous programming is a standard approach for designing responsive applications. Modern languages such as C\# provide async/await primitives for the disciplined use of asynchrony. In spite of this, programs can deadlock because of incorrect use of blocking operations along with non-blocking (asynchronous) operations. While developers are aware of this problem, there is no automated technique to detect deadlocks in asynchronous programs. We present a novel representation of control flow and scheduling of asynchronous programs, called continuation scheduling graph and formulate necessary conditions for a deadlock to occur in a program. We design static analyses to construct continuation scheduling graphs of asynchronous C\# programs and to identify deadlocks in them. We have implemented the static analyses in a tool called DeadWait. Using DeadWait, we found 43 previously unknown deadlocks in 11 asynchronous C\# libraries. We reported the deadlocks to the library developers. They have confirmed and fixed 40 of them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Fu:2017:AHC, author = "Zhoulai Fu and Zhendong Su", title = "Achieving high coverage for floating-point code via unconstrained programming", journal = j-SIGPLAN, volume = "52", number = "6", pages = "306--319", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062383", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Achieving high code coverage is essential in testing, which gives us confidence in code quality. Testing floating-point code usually requires painstaking efforts in handling floating-point constraints, e.g., in symbolic execution. This paper turns the challenge of testing floating-point code into the opportunity of applying unconstrained programming --- the mathematical solution for calculating function minimum points over the entire search space. Our core insight is to derive a representing function from the floating-point program, any of whose minimum points is a test input guaranteed to exercise a new branch of the tested program. This guarantee allows us to achieve high coverage of the floating-point program by repeatedly minimizing the representing function. We have realized this approach in a tool called CoverMe and conducted an extensive evaluation of it on Sun's C math library. Our evaluation results show that CoverMe achieves, on average, 90.8\% branch coverage in 6.9 seconds, drastically outperforming our compared tools: (1) Random testing, (2) AFL, a highly optimized, robust fuzzer released by Google, and (3) Austin, a state-of-the-art coverage-based testing tool designed to support floating-point code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Chamith:2017:IPL, author = "Buddhika Chamith and Bo Joel Svensson and Luke Dalessandro and Ryan R. Newton", title = "Instruction punning: lightweight instrumentation for x86-64", journal = j-SIGPLAN, volume = "52", number = "6", pages = "320--332", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062344", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing techniques for injecting probes into running applications are limited; they either fail to support probing arbitrary locations, or to support scalable, rapid toggling of probes. We introduce a new technique on x86-64, called instruction punning, which allows scalable probes at any instruction. The key idea is that when we inject a jump instruction, the relative address of the jump serves simultaneously as data and as an instruction sequence. We show that this approach achieves probe invocation overheads of only a few dozen cycles, and probe activation/deactivation costs that are cheaper than a system call, even when all threads in the system are both invoking probes and toggling them.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{DAntras:2017:LOD, author = "Amanieu D'Antras and Cosmin Gorgovan and Jim Garside and Mikel Luj{\'a}n", title = "Low overhead dynamic binary translation on {ARM}", journal = j-SIGPLAN, volume = "52", number = "6", pages = "333--346", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062371", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ARMv8 architecture introduced AArch64, a 64-bit execution mode with a new instruction set, while retaining binary compatibility with previous versions of the ARM architecture through AArch32, a 32-bit execution mode. Most hardware implementations of ARMv8 processors support both AArch32 and AArch64, which comes at a cost in hardware complexity. We present MAMBO-X64, a dynamic binary translator for Linux which executes 32-bit ARM binaries using only the AArch64 instruction set. We have evaluated the performance of MAMBO-X64 on three existing ARMv8 processors which support both AArch32 and AArch64 instruction sets. The performance was measured by comparing the running time of 32-bit benchmarks running under MAMBO-X64 with the same benchmark running natively. On SPEC CPU2006, we achieve a geometric mean overhead of less than 7.5\% on in-order Cortex-A53 processors and a performance improvement of 1\% on out-of-order X-Gene 1 processors. MAMBO-X64 achieves such low overhead by novel optimizations to map AArch32 floating-point registers to AArch64 registers dynamically, handle overflowing address calculations efficiently, generate traces that harness hardware return address prediction, and handle operating system signals accurately.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Zhang:2017:SPE, author = "Qirun Zhang and Chengnian Sun and Zhendong Su", title = "Skeletal program enumeration for rigorous compiler testing", journal = j-SIGPLAN, volume = "52", number = "6", pages = "347--361", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062379", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A program can be viewed as a syntactic structure P (syntactic skeleton) parameterized by a collection of identifiers V (variable names). This paper introduces the skeletal program enumeration (SPE) problem: Given a syntactic skeleton P and a set of variables V , enumerate a set of programs P exhibiting all possible variable usage patterns within P. It proposes an effective realization of SPE for systematic, rigorous compiler testing by leveraging three important observations: (1) Programs with different variable usage patterns exhibit diverse control- and data-dependence, and help exploit different compiler optimizations; (2) most real compiler bugs were revealed by small tests (i.e., small-sized P) --- this ``small-scope'' observation opens up SPE for practical compiler validation; and (3) SPE is exhaustive w.r.t. a given syntactic skeleton and variable set, offering a level of guarantee absent from all existing compiler testing techniques. The key challenge of SPE is how to eliminate the enormous amount of equivalent programs w.r.t. \alpha -conversion. Our main technical contribution is a novel algorithm for computing the canonical (and smallest) set of all non- \alpha -equivalent programs. To demonstrate its practical utility, we have applied the SPE technique to test C/C++ compilers using syntactic skeletons derived from their own regression test-suites. Our evaluation results are extremely encouraging. In less than six months, our approach has led to 217 confirmed GCC/Clang bug reports, 119 of which have already been fixed, and the majority are long latent despite extensive prior testing efforts. Our SPE algorithm also provides six orders of magnitude reduction. Moreover, in three weeks, our technique has found 29 CompCert crashing bugs and 42 bugs in two Scala optimizing compilers. These results demonstrate our SPE technique's generality and further illustrate its effectiveness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Antonopoulos:2017:DIS, author = "Timos Antonopoulos and Paul Gazzillo and Michael Hicks and Eric Koskinen and Tachio Terauchi and Shiyi Wei", title = "Decomposition instead of self-composition for proving the absence of timing channels", journal = j-SIGPLAN, volume = "52", number = "6", pages = "362--375", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062378", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel approach to proving the absence of timing channels. The idea is to partition the program's execution traces in such a way that each partition component is checked for timing attack resilience by a time complexity analysis and that per-component resilience implies the resilience of the whole program. We construct a partition by splitting the program traces at secret-independent branches. This ensures that any pair of traces with the same public input has a component containing both traces. Crucially, the per-component checks can be normal safety properties expressed in terms of a single execution. Our approach is thus in contrast to prior approaches, such as self-composition, that aim to reason about multiple $ (k \geq 2) $ executions at once. We formalize the above as an approach called quotient partitioning, generalized to any k -safety property, and prove it to be sound. A key feature of our approach is a demand-driven partitioning strategy that uses a regex-like notion called trails to identify sets of execution traces, particularly those influenced by tainted (or secret) data. We have applied our technique in a prototype implementation tool called Blazer, based on WALA, PPL, and the brics automaton library. We have proved timing-channel freedom of (or synthesized an attack specification for) 24 programs written in Java bytecode, including 6 classic examples from the literature and 6 examples extracted from the DARPA STAC challenge problems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Hu:2017:API, author = "Qinheping Hu and Loris D'Antoni", title = "Automatic program inversion using symbolic transducers", journal = j-SIGPLAN, volume = "52", number = "6", pages = "376--389", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062345", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a fully-automated technique for inverting functional programs that operate over lists such as string encoders and decoders. We consider programs that can be modeled using symbolic extended finite transducers (), an expressive model that can describe complex list-manipulating programs while retaining several decidable properties. Concretely, given a program P expressed as an , we propose techniques for: (1) checking whether P is injective and, if that is the case, (2) building an P$^{-1}$ describing its inverse. We first show that it is undecidable to check whether an is injective and propose an algorithm for checking injectivity for a restricted, but a practical class of . We then propose an algorithm for inverting based on the following idea: if an is injective, inverting it amounts to inverting all its individual transitions. We leverage recent advances program synthesis and show that the transition inversion problem can be expressed as an instance of the syntax-guided synthesis framework. Finally, we implement the proposed techniques in a tool called and show that can invert 13 out 14 real complex string encoders and decoders, producing inverse programs that are substantially identical to manually written ones.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Ohmann:2017:CFR, author = "Peter Ohmann and Alexander Brooks and Loris D'Antoni and Ben Liblit", title = "Control-flow recovery from partial failure reports", journal = j-SIGPLAN, volume = "52", number = "6", pages = "390--405", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062368", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging is difficult. When software fails in production, debugging is even harder, as failure reports usually provide only an incomplete picture of the failing execution. We present a system that answers control-flow queries posed by developers as formal languages, indicating whether the query expresses control flow that is possible or impossible for a given failure report. We consider three separate approaches that trade off precision, expressiveness for failure constraints, and scalability. We also introduce a new subclass of regular languages, the unreliable trace languages, which are particularly suited to answering control-flow queries in polynomial time. Our system answers queries remarkably efficiently when we encode failure constraints and user queries entirely as unreliable trace languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Doychev:2017:RAS, author = "Goran Doychev and Boris K{\"o}pf", title = "Rigorous analysis of software countermeasures against cache attacks", journal = j-SIGPLAN, volume = "52", number = "6", pages = "406--421", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062388", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "CPU caches introduce variations into the execution time of programs that can be exploited by adversaries to recover private information about users or cryptographic keys. Establishing the security of countermeasures against this threat often requires intricate reasoning about the interactions of program code, memory layout, and hardware architecture and has so far only been done for restricted cases. In this paper we devise novel techniques that provide support for bit-level and arithmetic reasoning about memory accesses in the presence of dynamic memory allocation. These techniques enable us to perform the first rigorous analysis of widely deployed software countermeasures against cache attacks on modular exponentiation, based on executable code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Feng:2017:CBSb, author = "Yu Feng and Ruben Martins and Jacob {Van Geffen} and Isil Dillig and Swarat Chaudhuri", title = "Component-based synthesis of table consolidation and transformation tasks from examples", journal = j-SIGPLAN, volume = "52", number = "6", pages = "422--436", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062351", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel component-based synthesis algorithm that marries the power of type-directed search with lightweight SMT-based deduction and partial evaluation. Given a set of components together with their over-approximate first-order specifications, our method first generates a program sketch over a subset of the components and checks its feasibility using an SMT solver. Since a program sketch typically represents many concrete programs, the use of SMT-based deduction greatly increases the scalability of the algorithm. Once a feasible program sketch is found, our algorithm completes the sketch in a bottom-up fashion, using partial evaluation to further increase the power of deduction for rejecting partially-filled program sketches. We apply the proposed synthesis methodology for automating a large class of data preparation tasks that commonly arise in data science. We have evaluated our synthesis algorithm on dozens of data wrangling and consolidation tasks obtained from on-line forums, and we show that our approach can automatically solve a large class of problems encountered by R users.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Beckett:2017:NCS, author = "Ryan Beckett and Ratul Mahajan and Todd Millstein and Jitendra Padhye and David Walker", title = "Network configuration synthesis with abstract topologies", journal = j-SIGPLAN, volume = "52", number = "6", pages = "437--451", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We develop Propane/AT, a system to synthesize provably-correct BGP (border gateway protocol) configurations for large, evolving networks from high-level specifications of topology, routing policy, and fault-tolerance requirements. Propane/AT is based on new abstractions for capturing parameterized network topologies and their evolution, and algorithms to analyze the impact of topology and routing policy on fault tolerance. Our algorithms operate entirely on abstract topologies. We prove that the properties established by our analyses hold for every concrete instantiation of the given abstract topology. Propane/AT also guarantees that only incremental changes to existing device configurations are required when the network evolves to add or remove devices and links. Our experiments with real-world topologies and policies show that our abstractions and algorithms are effective, and that, for large networks, Propane/AT synthesizes configurations two orders of magnitude faster than systems that operate on concrete topologies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Wang:2017:SHE, author = "Chenglong Wang and Alvin Cheung and Rastislav Bodik", title = "Synthesizing highly expressive {SQL} queries from input-output examples", journal = j-SIGPLAN, volume = "52", number = "6", pages = "452--466", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062365", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "SQL is the de facto language for manipulating relational data. Though powerful, many users find it difficult to write SQL queries due to highly expressive constructs. While using the programming-by-example paradigm to help users write SQL queries is an attractive proposition, as evidenced by online help forums such as Stack Overflow, developing techniques for synthesizing SQL queries from given input-output (I/O) examples has been difficult, due to the large space of SQL queries as a result of its rich set of operators. In this paper, we present a new scalable and efficient algorithm for synthesizing SQL queries based on I/O examples. The key innovation of our algorithm is development of a language for abstract queries, i.e., queries with uninstantiated operators, that can be used to express a large space of SQL queries efficiently. Using abstract queries to represent the search space nicely decomposes the synthesis problem into two tasks: (1) searching for abstract queries that can potentially satisfy the given I/O examples, and (2) instantiating the found abstract queries and ranking the results. We have implemented this algorithm in a new tool called Scythe and evaluated it using 193 benchmarks collected from Stack Overflow. Our evaluation shows that Scythe can efficiently solve 74\% of the benchmarks, most in just a few seconds, and the queries range from simple ones involving a single selection to complex queries with 6 nested subqueires.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Bornholt:2017:SMM, author = "James Bornholt and Emina Torlak", title = "Synthesizing memory models from framework sketches and Litmus tests", journal = j-SIGPLAN, volume = "52", number = "6", pages = "467--481", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062353", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A memory consistency model specifies which writes to shared memory a given read may see. Ambiguities or errors in these specifications can lead to bugs in both compilers and applications. Yet architectures usually define their memory models with prose and litmus tests-small concurrent programs that demonstrate allowed and forbidden outcomes. Recent work has formalized the memory models of common architectures through substantial manual effort, but as new architectures emerge, there is a growing need for tools to aid these efforts. This paper presents MemSynth, a synthesis-aided system for reasoning about axiomatic specifications of memory models. MemSynth takes as input a set of litmus tests and a framework sketch that defines a class of memory models. The sketch comprises a set of axioms with missing expressions (or holes). Given these inputs, MemSynth synthesizes a completion of the axioms-i.e., a memory model-that gives the desired outcome on all tests. The MemSynth engine employs a novel embedding of bounded relational logic in a solver-aided programming language, which enables it to tackle complex synthesis queries intractable to existing relational solvers. This design also enables it to solve new kinds of queries, such as checking if a set of litmus tests unambiguously defines a memory model within a framework sketch. We show that MemSynth can synthesize specifications for x86 in under two seconds, and for PowerPC in 12 seconds from 768 litmus tests. Our ambiguity check identifies missing tests from both the Intel x86 documentation and the validation suite of a previous PowerPC formalization. We also used MemSynth to reproduce, debug, and automatically repair a paper on comparing memory models in just two days.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Maurer:2017:CC, author = "Luke Maurer and Paul Downen and Zena M. Ariola and Simon Peyton Jones", title = "Compiling without continuations", journal = j-SIGPLAN, volume = "52", number = "6", pages = "482--494", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062380", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many fields of study in compilers give rise to the concept of a join point-a place where different execution paths come together. Join points are often treated as functions or continuations, but we believe it is time to study them in their own right. We show that adding join points to a direct-style functional intermediate language is a simple but powerful change that allows new optimizations to be performed, including a significant improvement to list fusion. Finally, we report on recent work on adding join points to the intermediate language of the Glasgow Haskell Compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Patterson:2017:FRM, author = "Daniel Patterson and Jamie Perconti and Christos Dimoulas and Amal Ahmed", title = "{FunTAL}: reasonably mixing a functional language with assembly", journal = j-SIGPLAN, volume = "52", number = "6", pages = "495--509", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062347", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present FunTAL, the first multi-language system to formalize safe interoperability between a high-level functional language and low-level assembly code while supporting compositional reasoning about the mix. A central challenge in developing such a multi-language is bridging the gap between assembly, which is staged into jumps to continuations, and high-level code, where subterms return a result. We present a compositional stack-based typed assembly language that supports components, comprised of one or more basic blocks, that may be embedded in high-level contexts. We also present a logical relation for FunTAL that supports reasoning about equivalence of high-level components and their assembly replacements, mixed-language programs with callbacks between languages, and assembly components comprised of different numbers of basic blocks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Chu:2017:HPQ, author = "Shumo Chu and Konstantin Weitz and Alvin Cheung and Dan Suciu", title = "{HoTTSQL}: proving query rewrites with univalent {SQL} semantics", journal = j-SIGPLAN, volume = "52", number = "6", pages = "510--524", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062348", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Every database system contains a query optimizer that performs query rewrites. Unfortunately, developing query optimizers remains a highly challenging task. Part of the challenges comes from the intricacies and rich features of query languages, which makes reasoning about rewrite rules difficult. In this paper, we propose a machine-checkable denotational semantics for SQL, the de facto language for relational database, for rigorously validating rewrite rules. Unlike previously proposed semantics that are either non-mechanized or only cover a small amount of SQL language features, our semantics covers all major features of SQL, including bags, correlated subqueries, aggregation, and indexes. Our mechanized semantics, called HoTT SQL, is based on K-Relations and homotopy type theory, where we denote relations as mathematical functions from tuples to univalent types. We have implemented HoTTSQL in Coq, which takes only fewer than 300 lines of code and have proved a wide range of SQL rewrite rules, including those from database research literature (e.g., magic set rewrites) and real-world query optimizers (e.g., subquery elimination). Several of these rewrite rules have never been previously proven correct. In addition, while query equivalence is generally undecidable, we have implemented an automated decision procedure using HoTTSQL for conjunctive queries: a well studied decidable fragment of SQL that encompasses many real-world queries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Eisenberg:2017:LP, author = "Richard A. Eisenberg and Simon Peyton Jones", title = "Levity polymorphism", journal = j-SIGPLAN, volume = "52", number = "6", pages = "525--539", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062357", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parametric polymorphism is one of the linchpins of modern typed programming, but it comes with a real performance penalty. We describe this penalty; offer a principled way to reason about it (kinds as calling conventions); and propose levity polymorphism. This new form of polymorphism allows abstractions over calling conventions; we detail and verify restrictions that are necessary in order to compile levity-polymorphic functions. Levity polymorphism has created new opportunities in Haskell, including the ability to generalize nearly half of the type classes in GHC's standard library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Farzan:2017:SDC, author = "Azadeh Farzan and Victor Nicolet", title = "Synthesis of divide and conquer parallelism for loops", journal = j-SIGPLAN, volume = "52", number = "6", pages = "540--555", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062355", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Divide-and-conquer is a common parallel programming skeleton supported by many cross-platform multithreaded libraries, and most commonly used by programmers for parallelization. The challenges of producing (manually or automatically) a correct divide-and-conquer parallel program from a given sequential code are two-fold: (1) assuming that a good solution exists where individual worker threads execute a code identical to the sequential one, the programmer has to provide the extra code for dividing the tasks and combining the partial results (i.e. joins), and (2) the sequential code may not be suitable for divide-and-conquer parallelization as is, and may need to be modified to become a part of a good solution. We address both challenges in this paper. We present an automated synthesis technique to synthesize correct joins and an algorithm for modifying the sequential code to make it suitable for parallelization when necessary. This paper focuses on class of loops that traverse a read-only collection and compute a scalar function over that collection. We present theoretical results for when the necessary modifications to sequential code are possible, theoretical guarantees for the algorithmic solutions presented here, and experimental evaluation of the approach's success in practice and the quality of the produced parallel programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Henriksen:2017:FPF, author = "Troels Henriksen and Niels G. W. Serup and Martin Elsman and Fritz Henglein and Cosmin E. Oancea", title = "{Futhark}: purely functional {GPU-programming} with nested parallelism and in-place array updates", journal = j-SIGPLAN, volume = "52", number = "6", pages = "556--571", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062354", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Futhark is a purely functional data-parallel array language that offers a machine-neutral programming model and an optimising compiler that generates OpenCL code for GPUs. This paper presents the design and implementation of three key features of Futhark that seek a suitable middle ground with imperative approaches. First, in order to express efficient code inside the parallel constructs, we introduce a simple type system for in-place updates that ensures referential transparency and supports equational reasoning. Second, we furnish Futhark with parallel operators capable of expressing efficient strength-reduced code, along with their fusion rules. Third, we present a flattening transformation aimed at enhancing the degree of parallelism that (i) builds on loop interchange and distribution but uses higher-order reasoning rather than array-dependence analysis, and (ii) still allows further locality-of-reference optimisations. Finally, an evaluation on 16 benchmarks demonstrates the impact of the language and compiler features and shows application-level performance competitive with hand-written GPU code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Fedyukovich:2017:GSS, author = "Grigory Fedyukovich and Maaz Bin Safeer Ahmad and Rastislav Bodik", title = "Gradual synthesis for static parallelization of single-pass array-processing programs", journal = j-SIGPLAN, volume = "52", number = "6", pages = "572--585", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062382", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parallelizing of software improves its effectiveness and productivity. To guarantee correctness, the parallel and serial versions of the same code must be formally verified to be equivalent. We present a novel approach, called GRASSP, that automatically synthesizes parallel single-pass array-processing programs by treating the given serial versions as specifications. Given arbitrary segmentation of the input array, GRASSP synthesizes a code to determine a new segmentation of the array that allows computing partial results for each segment and merging them. In contrast to other parallelizers, GRASSP gradually considers several parallelization scenarios and certifies the results using constrained Horn solving. For several classes of programs, we show that such parallelization can be performed efficiently. The C++ translations of the GRASSP solutions sped performance by up to 5X relative to serial code on an 8-thread machine and Hadoop translations by up to 10X on a 10-node Amazon EMR cluster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Bourke:2017:FVC, author = "Timothy Bourke and L{\'e}lio Brun and Pierre-{\'E}variste Dagand and Xavier Leroy and Marc Pouzet and Lionel Rieg", title = "A formally verified compiler for {Lustre}", journal = j-SIGPLAN, volume = "52", number = "6", pages = "586--601", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062358", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The correct compilation of block diagram languages like Lustre, Scade, and a discrete subset of Simulink is important since they are used to program critical embedded control software. We describe the specification and verification in an Interactive Theorem Prover of a compilation chain that treats the key aspects of Lustre: sampling, nodes, and delays. Building on CompCert, we show that repeated execution of the generated assembly code faithfully implements the dataflow semantics of source programs. We resolve two key technical challenges. The first is the change from a synchronous dataflow semantics, where programs manipulate streams of values, to an imperative one, where computations manipulate memory sequentially. The second is the verified compilation of an imperative language with encapsulated state to C code where the state is realized by nested records. We also treat a standard control optimization that eliminates unnecessary conditional statements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Abdulla:2017:FCF, author = "Parosh Aziz Abdulla and Mohamed Faouzi Atig and Yu-Fang Chen and Bui Phi Diep and Luk{\'a}s Hol{\'\i}k and Ahmed Rezine and Philipp R{\"u}mmer", title = "Flatten and conquer: a framework for efficient analysis of string constraints", journal = j-SIGPLAN, volume = "52", number = "6", pages = "602--617", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062384", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a uniform and efficient framework for checking the satisfiability of a large class of string constraints. The framework is based on the observation that both satisfiability and unsatisfiability of common constraints can be demonstrated through witnesses with simple patterns. These patterns are captured using flat automata each of which consists of a sequence of simple loops. We build a Counter-Example Guided Abstraction Refinement (CEGAR) framework which contains both an under- and an over-approximation module. The flow of information between the modules allows to increase the precision in an automatic manner. We have implemented the framework as a tool and performed extensive experimentation that demonstrates both the generality and efficiency of our method.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Lahav:2017:RSC, author = "Ori Lahav and Viktor Vafeiadis and Jeehoon Kang and Chung-Kil Hur and Derek Dreyer", title = "Repairing sequential consistency in {C\slash C++11}", journal = j-SIGPLAN, volume = "52", number = "6", pages = "618--632", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062352", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The C/C++11 memory model defines the semantics of concurrent memory accesses in C/C++, and in particular supports racy ``atomic'' accesses at a range of different consistency levels, from very weak consistency (``relaxed'') to strong, sequential consistency (``SC''). Unfortunately, as we observe in this paper, the semantics of SC atomic accesses in C/C++11, as well as in all proposed strengthenings of the semantics, is flawed, in that (contrary to previously published results) both suggested compilation schemes to the Power architecture are unsound. We propose a model, called RC11 (for Repaired C11), with a better semantics for SC accesses that restores the soundness of the compilation schemes to Power, maintains the DRF-SC guarantee, and provides stronger, more useful, guarantees to SC fences. In addition, we formally prove, for the first time, the correctness of the proposed stronger compilation schemes to Power that preserve load-to-store ordering and avoid ``out-of-thin-air'' reads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Lee:2017:TUB, author = "Juneyoung Lee and Yoonseung Kim and Youngju Song and Chung-Kil Hur and Sanjoy Das and David Majnemer and John Regehr and Nuno P. Lopes", title = "Taming undefined behavior in {LLVM}", journal = j-SIGPLAN, volume = "52", number = "6", pages = "633--647", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062343", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A central concern for an optimizing compiler is the design of its intermediate representation (IR) for code. The IR should make it easy to perform transformations, and should also afford efficient and precise static analysis. In this paper we study an aspect of IR design that has received little attention: the role of undefined behavior. The IR for every optimizing compiler we have looked at, including GCC, LLVM, Intel's, and Microsoft's, supports one or more forms of undefined behavior (UB), not only to reflect the semantics of UB-heavy programming languages such as C and C++, but also to model inherently unsafe low-level operations such as memory stores and to avoid over-constraining IR semantics to the point that desirable transformations become illegal. The current semantics of LLVM's IR fails to justify some cases of loop unswitching, global value numbering, and other important ``textbook'' optimizations, causing long-standing bugs. We present solutions to the problems we have identified in LLVM's IR and show that most optimizations currently in LLVM remain sound, and that some desirable new transformations become permissible. Our solutions do not degrade compile time or performance of generated code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Schneider:2017:LSM, author = "Scott Schneider and Kun-Lung Wu", title = "Low-synchronization, mostly lock-free, elastic scheduling for streaming runtimes", journal = j-SIGPLAN, volume = "52", number = "6", pages = "648--661", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062366", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the scalable, elastic operator scheduler in IBM Streams 4.2. Streams is a distributed stream processing system used in production at many companies in a wide range of industries. The programming language for Streams, SPL, presents operators, tuples and streams as the primary abstractions. A fundamental SPL optimization is operator fusion, where multiple operators execute in the same process. Streams 4.2 introduces automatic submission-time fusion to simplify application development and deployment. However, potentially thousands of operators could then execute in the same process, with no user guidance for thread placement. We needed a way to automatically figure out how many threads to use, with arbitrarily sized applications on a wide variety of hardware, and without any input from programmers. Our solution has two components. The first is a scalable operator scheduler that minimizes synchronization, locks and global data, while allowing threads to execute any operator and dynamically come and go. The second is an elastic algorithm to dynamically adjust the number of threads to optimize performance, using the principles of trusted measurements to establish trends. We demonstrate our scheduler's ability to scale to over a hundred threads, and our elasticity algorithm's ability to adapt to different workloads on an Intel Xeon system with 176 logical cores, and an IBM Power8 system with 184 logical cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Wurthinger:2017:PPE, author = "Thomas W{\"u}rthinger and Christian Wimmer and Christian Humer and Andreas W{\"o}{\ss} and Lukas Stadler and Chris Seaton and Gilles Duboscq and Doug Simon and Matthias Grimmer", title = "Practical partial evaluation for high-performance dynamic language runtimes", journal = j-SIGPLAN, volume = "52", number = "6", pages = "662--676", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062381", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most high-performance dynamic language virtual machines duplicate language semantics in the interpreter, compiler, and runtime system. This violates the principle to not repeat yourself. In contrast, we define languages solely by writing an interpreter. The interpreter performs specializations, e.g., augments the interpreted program with type information and profiling information. Compiled code is derived automatically using partial evaluation while incorporating these specializations. This makes partial evaluation practical in the context of dynamic languages: It reduces the size of the compiled code while still compiling all parts of an operation that are relevant for a particular program. When a speculation fails, execution transfers back to the interpreter, the program re-specializes in the interpreter, and later partial evaluation again transforms the new state of the interpreter to compiled code. We evaluate our approach by comparing our implementations of JavaScript, Ruby, and R with best-in-class specialized production implementations. Our general-purpose compilation system is competitive with production systems even when they have been heavily optimized for the one language they support. For our set of benchmarks, our speedup relative to the V8 JavaScript VM is 0.83x, relative to JRuby is 3.8x, and relative to GNU R is 5x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Muller:2017:RPC, author = "Stefan K. Muller and Umut A. Acar and Robert Harper", title = "Responsive parallel computation: bridging competitive and cooperative threading", journal = j-SIGPLAN, volume = "52", number = "6", pages = "677--692", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Competitive and cooperative threading are widely used abstractions in computing. In competitive threading, threads are scheduled preemptively with the goal of minimizing response time, usually of interactive applications. In cooperative threading, threads are scheduled non-preemptively with the goal of maximizing throughput or minimizing the completion time, usually in compute-intensive applications, e.g. scientific computing, machine learning and AI. Although both of these forms of threading rely on the same abstraction of a thread, they have, to date, remained largely separate forms of computing. Motivated by the recent increase in the mainstream use of multicore computers, we propose a threading model that aims to unify competitive and cooperative threading. To this end, we extend the classic graph-based cost model for cooperative threading to allow for competitive threading, and describe how such a cost model may be used in a programming language by presenting a language and a corresponding cost semantics. Finally, we show that the cost model and the semantics are realizable by presenting an operational semantics for the language that specifies the behavior of an implementation, as well as an implementation and a small empirical evaluation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Mamouras:2017:SMS, author = "Konstantinos Mamouras and Mukund Raghothaman and Rajeev Alur and Zachary G. Ives and Sanjeev Khanna", title = "{StreamQRE}: modular specification and efficient evaluation of quantitative queries over streaming data", journal = j-SIGPLAN, volume = "52", number = "6", pages = "693--708", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Real-time decision making in emerging IoT applications typically relies on computing quantitative summaries of large data streams in an efficient and incremental manner. To simplify the task of programming the desired logic, we propose StreamQRE, which provides natural and high-level constructs for processing streaming data. Our language has a novel integration of linguistic constructs from two distinct programming paradigms: streaming extensions of relational query languages and quantitative extensions of regular expressions. The former allows the programmer to employ relational constructs to partition the input data by keys and to integrate data streams from different sources, while the latter can be used to exploit the logical hierarchy in the input stream for modular specifications. We first present the core language with a small set of combinators, formal semantics, and a decidable type system. We then show how to express a number of common patterns with illustrative examples. Our compilation algorithm translates the high-level query into a streaming algorithm with precise complexity bounds on per-item processing time and total memory footprint. We also show how to integrate approximation algorithms into our framework. We report on an implementation in Java, and evaluate it with respect to existing high-performance engines for processing streaming data. Our experimental evaluation shows that (1) StreamQRE allows more natural and succinct specification of queries compared to existing frameworks, (2) the throughput of our implementation is higher than comparable systems (for example, two-to-four times greater than RxJava), and (3) the approximation algorithms supported by our implementation can lead to substantial memory savings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '17 conference proceedings.", } @Article{Weirich:2017:IDT, author = "Stephanie Weirich", title = "The influence of dependent types (keynote)", journal = j-SIGPLAN, volume = "52", number = "1", pages = "1--1", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009923", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "What has dependent type theory done for Haskell? In this talk, I will discuss the influence of dependent types on the design of programming languages and on the practice of functional programmers. Over the past ten years, the Glasgow Haskell compiler has adopted several type system features inspired by dependent type theory. However, this process has not been a direct translation; working in the context of an existing language has lead us to new designs in the semantics of dependent types. I will take a close look at what we have achieved in GHC and discuss what we have learned from this experiment: what works now, what doesn't work yet, and what has surprised us along the way.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Turon:2017:RPP, author = "Aaron Turon", title = "{Rust}: from {POPL} to practice (keynote)", journal = j-SIGPLAN, volume = "52", number = "1", pages = "2--2", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3011999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In 2015, a language based fundamentally on substructural typing --- Rust --- hit its 1.0 release, and less than a year later it has been put into production use in a number of tech companies, including some household names. The language has started a trend, with several other mainstream languages, including C++ and Swift, in the early stages of incorporating ideas about ownership. How did this come about? Rust's core focus is safe systems programming. It does not require a runtime system or garbage collector, but guarantees memory safety. It does not stipulate any particular style of concurrent programming, but instead provides the tools needed to guarantee data race freedom even when doing low-level shared-state concurrency. It allows you to build up high-level abstractions without paying a tax; its compilation model ensures that the abstractions boil away. These benefits derive from two core aspects of Rust: its ownership system (based on substructural typing) and its trait system (a descendant of Haskell's typeclasses). The talk will cover these two pillars of Rust design, with particular attention to the key innovations that make the language usable at scale. It will highlight the implications for concurrency, where Rust provides a unique perspective. It will also touch on aspects of Rust's development that tend to get less attention within the POPL community: Rust's governance and open development process, and design considerations around language and library evolution. Finally, it will mention a few of the myriad open research questions around Rust.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Alglave:2017:OPI, author = "Jade Alglave and Patrick Cousot", title = "Ogre and {Pythia}: an invariance proof method for weak consistency models", journal = j-SIGPLAN, volume = "52", number = "1", pages = "3--18", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009883", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We design an invariance proof method for concurrent programs parameterised by a weak consistency model. The calculational design of the invariance proof method is by abstract interpretation of a truly parallel analytic semantics. This generalises the methods by Lamport and Owicki-Gries for sequential consistency. We use cat as an example of language to write consistency specifications of both concurrent programs and machine architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Germane:2017:PEA, author = "Kimball Germane and Matthew Might", title = "A posteriori environment analysis with {Pushdown Delta CFA}", journal = j-SIGPLAN, volume = "52", number = "1", pages = "19--31", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009899", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Flow-driven higher-order inlining is blocked by free variables, yet current theories of environment analysis cannot reliably cope with multiply-bound variables. One of these, $ \Delta $CFA, is a promising theory based on stack change but is undermined by its finite-state model of the stack. We present Pushdown $ \Delta $CFA which takes a $ \Delta $CFA -approach to pushdown models of control flow and can cope with multiply-bound variables, even in the face of recursion.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Li:2017:SDC, author = "Huisong Li and Fran{\c{c}}ois Berenger and Bor-Yuh Evan Chang and Xavier Rival", title = "Semantic-directed clumping of disjunctive abstract states", journal = j-SIGPLAN, volume = "52", number = "1", pages = "32--45", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009881", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To infer complex structural invariants, shape analyses rely on expressive families of logical properties. Many such analyses manipulate abstract memory states that consist of separating conjunctions of basic predicates describing atomic blocks or summaries. Moreover, they use finite disjunctions of abstract memory states in order to account for dissimilar shapes. Disjunctions should be kept small for the sake of scalability, though precision often requires to keep additional case splits. In this context, deciding when and how to merge case splits and to replace them with summaries is critical both for the precision and for the efficiency. Existing techniques use sets of syntactic rules, which are tedious to design and prone to failure. In this paper, we design a semantic criterion to clump abstract states based on their silhouette which applies not only to the conservative union of disjuncts, but also to the weakening of separating conjunction of memory predicates into inductive summaries. Our approach allows to define union and widening operators that aim at preserving the case splits that are required for the analysis to succeed. We implement this approach in the MemCAD analyzer, and evaluate it on real-world C codes from existing libraries, including programs dealing with doubly linked lists, red-black trees and AVL-trees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Singh:2017:FPA, author = "Gagandeep Singh and Markus P{\"u}schel and Martin Vechev", title = "Fast polyhedra abstract domain", journal = j-SIGPLAN, volume = "52", number = "1", pages = "46--59", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009885", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Numerical abstract domains are an important ingredient of modern static analyzers used for verifying critical program properties (e.g., absence of buffer overflow or memory safety). Among the many numerical domains introduced over the years, Polyhedra is the most expressive one, but also the most expensive: it has worst-case exponential space and time complexity. As a consequence, static analysis with the Polyhedra domain is thought to be impractical when applied to large scale, real world programs. In this paper, we present a new approach and a complete implementation for speeding up Polyhedra domain analysis. Our approach does not lose precision, and for many practical cases, is orders of magnitude faster than state-of-the-art solutions. The key insight underlying our work is that polyhedra arising during analysis can usually be kept decomposed, thus considerably reducing the overall complexity. We first present the theory underlying our approach, which identifies the interaction between partitions of variables and domain operators. Based on the theory we develop new algorithms for these operators that work with decomposed polyhedra. We implemented these algorithms using the same interface as existing libraries, thus enabling static analyzers to use our implementation with little effort. In our evaluation, we analyze large benchmarks from the popular software verification competition, including Linux device drivers with over 50K lines of code. Our experimental results demonstrate massive gains in both space and time: we show end-to-end speedups of two to five orders of magnitude compared to state-of-the-art Polyhedra implementations as well as significant memory gains, on all larger benchmarks. In fact, in many cases our analysis terminates in seconds where prior code runs out of memory or times out after 4 hours. We believe this work is an important step in making the Polyhedra abstract domain both feasible and practically usable for handling large, real-world programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Dolan:2017:PST, author = "Stephen Dolan and Alan Mycroft", title = "Polymorphism, subtyping, and type inference in {MLsub}", journal = j-SIGPLAN, volume = "52", number = "1", pages = "60--72", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009882", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a type system combining subtyping and ML-style parametric polymorphism. Unlike previous work, our system supports type inference and has compact principal types. We demonstrate this system in the minimal language MLsub, which types a strict superset of core ML programs. This is made possible by keeping a strict separation between the types used to describe inputs and those used to describe outputs, and extending the classical unification algorithm to handle subtyping constraints between these input and output types. Principal types are kept compact by type simplification, which exploits deep connections between subtyping and the algebra of regular languages. An implementation is available online.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Grigore:2017:JGT, author = "Radu Grigore", title = "{Java} generics are {Turing} complete", journal = j-SIGPLAN, volume = "52", number = "1", pages = "73--85", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009871", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes a reduction from the halting problem of Turing machines to subtype checking in Java. It follows that subtype checking in Java is undecidable, which answers a question posed by Kennedy and Pierce in 2007. It also follows that Java's type checker can recognize any recursive language, which improves a result of Gill and Levy from 2016. The latter point is illustrated by a parser generator for fluent interfaces.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Omar:2017:HBT, author = "Cyrus Omar and Ian Voysey and Michael Hilton and Jonathan Aldrich and Matthew A. Hammer", title = "{Hazelnut}: a bidirectionally typed structure editor calculus", journal = j-SIGPLAN, volume = "52", number = "1", pages = "86--99", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009900", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Structure editors allow programmers to edit the tree structure of a program directly. This can have cognitive benefits, particularly for novice and end-user programmers. It also simplifies matters for tool designers, because they do not need to contend with malformed program text. This paper introduces Hazelnut, a structure editor based on a small bidirectionally typed lambda calculus extended with holes and a cursor. Hazelnut goes one step beyond syntactic well-formedness: its edit actions operate over statically meaningful incomplete terms. Na{\~A}{\=v}ely, this would force the programmer to construct terms in a rigid ``outside-in'' manner. To avoid this problem, the action semantics automatically places terms assigned a type that is inconsistent with the expected type inside a hole. This meaningfully defers the type consistency check until the term inside the hole is finished. Hazelnut is not intended as an end-user tool itself. Instead, it serves as a foundational account of typed structure editing. To that end, we describe how Hazelnut's rich metatheory, which we have mechanized using the Agda proof assistant, serves as a guide when we extend the calculus to include binary sum types. We also discuss various interpretations of holes, and in so doing reveal connections with gradual typing and contextual modal type theory, the Curry--Howard interpretation of contextual modal logic. Finally, we discuss how Hazelnut's semantics lends itself to implementation as an event-based functional reactive program. Our simple reference implementation is written using js_of_ocaml.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Crary:2017:MAP, author = "Karl Crary", title = "Modules, abstraction, and parametric polymorphism", journal = j-SIGPLAN, volume = "52", number = "1", pages = "100--113", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009892", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reynolds's Abstraction theorem forms the mathematical foundation for data abstraction. His setting was the polymorphic lambda calculus. Today, many modern languages, such as the ML family, employ rich module systems designed to give more expressive support for data abstraction than the polymorphic lambda calculus, but analogues of the Abstraction theorem for such module systems have lagged far behind. We give an account of the Abstraction theorem for a modern module calculus supporting generative and applicative functors, higher-order functors, sealing, and translucent signatures. The main issues to be overcome are: (1) the fact that modules combine both types and terms, so they must be treated as both simultaneously, (2) the effect discipline that models the distinction between transparent and opaque modules, and (3) a very rich language of type constructors supporting singleton kinds. We define logical equivalence for modules and show that it coincides with contextual equivalence. This substantiates the folk theorem that modules are good for data abstraction. All our proofs are formalized in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Lampropoulos:2017:BLL, author = "Leonidas Lampropoulos and Diane Gallois-Wong and Catalin Hritcu and John Hughes and Benjamin C. Pierce and Li-yao Xia", title = "Beginner's luck: a language for property-based generators", journal = j-SIGPLAN, volume = "52", number = "1", pages = "114--129", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009868", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Property-based random testing {\`a} la QuickCheck requires building efficient generators for well-distributed random data satisfying complex logical predicates, but writing these generators can be difficult and error prone. We propose a domain-specific language in which generators are conveniently expressed by decorating predicates with lightweight annotations to control both the distribution of generated values and the amount of constraint solving that happens before each variable is instantiated. This language, called Luck, makes generators easier to write, read, and maintain. We give Luck a formal semantics and prove several fundamental properties, including the soundness and completeness of random generation with respect to a standard predicate semantics. We evaluate Luck on common examples from the property-based testing literature and on two significant case studies, showing that it can be used in complex domains with comparable bug-finding effectiveness and a significant reduction in testing code size compared to handwritten generators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Shan:2017:EBI, author = "Chung-chieh Shan and Norman Ramsey", title = "Exact {Bayesian} inference by symbolic disintegration", journal = j-SIGPLAN, volume = "52", number = "1", pages = "130--144", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009852", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Bayesian inference, of posterior knowledge from prior knowledge and observed evidence, is typically defined by Bayes's rule, which says the posterior multiplied by the probability of an observation equals a joint probability. But the observation of a continuous quantity usually has probability zero, in which case Bayes's rule says only that the unknown times zero is zero. To infer a posterior distribution from a zero-probability observation, the statistical notion of disintegration tells us to specify the observation as an expression rather than a predicate, but does not tell us how to compute the posterior. We present the first method of computing a disintegration from a probabilistic program and an expression of a quantity to be observed, even when the observation has probability zero. Because the method produces an exact posterior term and preserves a semantics in which monadic terms denote measures, it composes with other inference methods in a modular way --- without sacrificing accuracy or performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Chatterjee:2017:SIP, author = "Krishnendu Chatterjee and Petr Novotn{\'y} and {\Eth}orde Zikeli{\'c}", title = "Stochastic invariants for probabilistic termination", journal = j-SIGPLAN, volume = "52", number = "1", pages = "145--160", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009873", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Termination is one of the basic liveness properties, and we study the termination problem for probabilistic programs with real-valued variables. Previous works focused on the qualitative problem that asks whether an input program terminates with probability{\^A} 1 (almost-sure termination). A powerful approach for this qualitative problem is the notion of ranking supermartingales with respect to a given set of invariants. The quantitative problem (probabilistic termination) asks for bounds on the termination probability, and this problem has not been addressed yet. A fundamental and conceptual drawback of the existing approaches to address probabilistic termination is that even though the supermartingales consider the probabilistic behaviour of the programs, the invariants are obtained completely ignoring the probabilistic aspect (i.e., the invariants are obtained considering all behaviours with no information about the probability). In this work we address the probabilistic termination problem for linear-arithmetic probabilistic programs with nondeterminism. We formally define the notion of stochastic invariants, which are constraints along with a probability bound that the constraints hold. We introduce a concept of repulsing supermartingales. First, we show that repulsing supermartingales can be used to obtain bounds on the probability of the stochastic invariants. Second, we show the effectiveness of repulsing supermartingales in the following three ways: (1){\^A} With a combination of ranking and repulsing supermartingales we can compute lower bounds on the probability of termination; (2){\^A} repulsing supermartingales provide witnesses for refutation of almost-sure termination; and (3){\^A} with a combination of ranking and repulsing supermartingales we can establish persistence properties of probabilistic programs. Along with our conceptual contributions, we establish the following computational results: First, the synthesis of a stochastic invariant which supports some ranking supermartingale and at the same time admits a repulsing supermartingale can be achieved via reduction to the existential first-order theory of reals, which generalizes existing results from the non-probabilistic setting. Second, given a program with ``strict invariants'' (e.g., obtained via abstract interpretation) and a stochastic invariant, we can check in polynomial time whether there exists a linear repulsing supermartingale w.r.t. the stochastic invariant (via reduction to LP). We also present experimental evaluation of our approach on academic examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Barthe:2017:CPP, author = "Gilles Barthe and Benjamin Gr{\'e}goire and Justin Hsu and Pierre-Yves Strub", title = "Coupling proofs are probabilistic product programs", journal = j-SIGPLAN, volume = "52", number = "1", pages = "161--174", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009896", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Couplings are a powerful mathematical tool for reasoning about pairs of probabilistic processes. Recent developments in formal verification identify a close connection between couplings and pRHL, a relational program logic motivated by applications to provable security, enabling formal construction of couplings from the probability theory literature. However, existing work using pRHL merely shows existence of a coupling and does not give a way to prove quantitative properties about the coupling, needed to reason about mixing and convergence of probabilistic processes. Furthermore, pRHL is inherently incomplete, and is not able to capture some advanced forms of couplings such as shift couplings. We address both problems as follows. First, we define an extension of pRHL, called x-pRHL, which explicitly constructs the coupling in a pRHL derivation in the form of a probabilistic product program that simulates two correlated runs of the original program. Existing verification tools for probabilistic programs can then be directly applied to the probabilistic product to prove quantitative properties of the coupling. Second, we equip x-pRHL with a new rule for while loops, where reasoning can freely mix synchronized and unsynchronized loop iterations. Our proof rule can capture examples of shift couplings, and the logic is relatively complete for deterministic programs. We show soundness of x-PRHL and use it to analyze two classes of examples. First, we verify rapid mixing using different tools from coupling: standard coupling, shift coupling, and path coupling, a compositional principle for combining local couplings into a global coupling. Second, we verify (approximate) equivalence between a source and an optimized program for several instances of loop optimizations from the literature.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Kang:2017:PSR, author = "Jeehoon Kang and Chung-Kil Hur and Ori Lahav and Viktor Vafeiadis and Derek Dreyer", title = "A promising semantics for relaxed-memory concurrency", journal = j-SIGPLAN, volume = "52", number = "1", pages = "175--189", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009850", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite many years of research, it has proven very difficult to develop a memory model for concurrent programming languages that adequately balances the conflicting desiderata of programmers, compilers, and hardware. In this paper, we propose the first relaxed memory model that (1) accounts for a broad spectrum of features from the C++11 concurrency model, (2) is implementable, in the sense that it provably validates many standard compiler optimizations and reorderings, as well as standard compilation schemes to x86-TSO and Power, (3) justifies simple invariant-based reasoning, thus demonstrating the absence of bad ``out-of-thin-air'' behaviors, (4) supports ``DRF'' guarantees, ensuring that programmers who use sufficient synchronization need not understand the full complexities of relaxed-memory semantics, and (5) defines the semantics of racy programs without relying on undefined behaviors, which is a prerequisite for applicability to type-safe languages like Java. The key novel idea behind our model is the notion of *promises*: a thread may promise to execute a write in the future, thus enabling other threads to read from that write out of order. Crucially, to prevent out-of-thin-air behaviors, a promise step requires a thread-local certification that it will be possible to execute the promised write even in the absence of the promise. To establish confidence in our model, we have formalized most of our key results in Coq.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Wickerson:2017:ACM, author = "John Wickerson and Mark Batty and Tyler Sorensen and George A. Constantinides", title = "Automatically comparing memory consistency models", journal = j-SIGPLAN, volume = "52", number = "1", pages = "190--204", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009838", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A memory consistency model (MCM) is the part of a programming language or computer architecture specification that defines which values can legally be read from shared memory locations. Because MCMs take into account various optimisations employed by architectures and compilers, they are often complex and counterintuitive, which makes them challenging to design and to understand. We identify four tasks involved in designing and understanding MCMs: generating conformance tests, distinguishing two MCMs, checking compiler optimisations, and checking compiler mappings. We show that all four tasks are instances of a general constraint-satisfaction problem to which the solution is either a program or a pair of programs. Although this problem is intractable for automatic solvers when phrased over programs directly, we show how to solve analogous constraints over program executions, and then construct programs that satisfy the original constraints. Our technique, which is implemented in the Alloy modelling framework, is illustrated on several software- and architecture-level MCMs, both axiomatically and operationally defined. We automatically recreate several known results, often in a simpler form, including: distinctions between variants of the C11 MCM; a failure of the `SC-DRF guarantee' in an early C11 draft; that x86 is `multi-copy atomic' and Power is not; bugs in common C11 compiler optimisations; and bugs in a compiler mapping from OpenCL to AMD-style GPUs. We also use our technique to develop and validate a new MCM for NVIDIA GPUs that supports a natural mapping from OpenCL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Krebbers:2017:IPH, author = "Robbert Krebbers and Amin Timany and Lars Birkedal", title = "Interactive proofs in higher-order concurrent separation logic", journal = j-SIGPLAN, volume = "52", number = "1", pages = "205--217", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009855", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When using a proof assistant to reason in an embedded logic --- like separation logic --- one cannot benefit from the proof contexts and basic tactics of the proof assistant. This results in proofs that are at a too low level of abstraction because they are cluttered with bookkeeping code related to manipulating the object logic. In this paper, we introduce a so-called proof mode that extends the Coq proof assistant with (spatial and non-spatial) named proof contexts for the object logic. We show that thanks to these contexts we can implement high-level tactics for introduction and elimination of the connectives of the object logic, and thereby make reasoning in the embedded logic as seamless as reasoning in the meta logic of the proof assistant. We apply our method to Iris: a state of the art higher-order impredicative concurrent separation logic. We show that our method is very general, and is not just limited to program verification. We demonstrate its generality by formalizing correctness proofs of fine-grained concurrent algorithms, derived constructs of the Iris logic, and a unary and binary logical relation for a language with concurrency, higher-order store, polymorphism, and recursive types. This is the first formalization of a binary logical relation for such an expressive language. We also show how to use the logical relation to prove contextual refinement of fine-grained concurrent algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Krogh-Jespersen:2017:RMT, author = "Morten Krogh-Jespersen and Kasper Svendsen and Lars Birkedal", title = "A relational model of types-and-effects in higher-order concurrent separation logic", journal = j-SIGPLAN, volume = "52", number = "1", pages = "218--231", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009877", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recently we have seen a renewed interest in programming languages that tame the complexity of state and concurrency through refined type systems with more fine-grained control over effects. In addition to simplifying reasoning and eliminating whole classes of bugs, statically tracking effects opens the door to advanced compiler optimizations. In this paper we present a relational model of a type-and-effect system for a higher-order, concurrent programming language. The model precisely captures the semantic invariants expressed by the effect annotations. We demonstrate that these invariants are strong enough to prove advanced program transformations, including automatic parallelization of expressions with suitably disjoint effects. The model also supports refinement proofs between abstract data types implementations with different internal data representations, including proofs that fine-grained concurrent algorithms refine their coarse-grained counterparts. This is the first model for such an expressive language that supports both effect-based optimizations and data abstraction. The logical relation is defined in Iris, a state-of-the-art higher-order concurrent separation logic. This greatly simplifies proving well-definedness of the logical relation and also provides us with a powerful logic for reasoning in the model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{DAntoni:2017:MSO, author = "Loris D'Antoni and Margus Veanes", title = "Monadic second-order logic on finite sequences", journal = j-SIGPLAN, volume = "52", number = "1", pages = "232--245", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009844", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We extend the weak monadic second-order logic of one successor on finite strings (M2L-STR) to symbolic alphabets by allowing character predicates to range over decidable quantifier free theories instead of finite alphabets. We call this logic, which is able to describe sequences over complex and potentially infinite domains, symbolic M2L-STR (S-M2L-STR). We then present a decision procedure for S-M2L-STR based on a reduction to symbolic finite automata, a decidable extension of finite automata that allows transitions to carry predicates and can therefore model symbolic alphabets. The reduction constructs a symbolic automaton over an alphabet consisting of pairs of symbols where the first element of the pair is a symbol in the original formula's alphabet, while the second element is a bit-vector. To handle this modified alphabet we show that the Cartesian product of two decidable Boolean algebras (e.g., the formula's one and the bit-vector's one) also forms a decidable Boolean algebras. To make the decision procedure practical, we propose two efficient representations of the Cartesian product of two Boolean algebras, one based on algebraic decision diagrams and one on a variant of Shannon expansions. Finally, we implement our decision procedure and evaluate it on more than 10,000 formulas. Despite the generality, our implementation has comparable performance with the state-of-the-art M2L-STR solvers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Kobayashi:2017:RBH, author = "Naoki Kobayashi and {\'E}tienne Lozes and Florian Bruse", title = "On the relationship between higher-order recursion schemes and higher-order fixpoint logic", journal = j-SIGPLAN, volume = "52", number = "1", pages = "246--259", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009854", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We study the relationship between two kinds of higher-order extensions of model checking: HORS model checking, where models are extended to higher-order recursion schemes, and HFL model checking, where the logic is extended to higher-order modal fixpoint logic. Those extensions have been independently studied until recently, and the former has been applied to higher-order program verification. We show that there exist (arguably) natural reductions between the two problems. To prove the correctness of the translation from HORS to HFL model checking, we establish a type-based characterization of HFL model checking, which should be of independent interest. The results reveal a close relationship between the two problems, enabling cross-fertilization of the two research threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Kovacs:2017:CTQ, author = "Laura Kov{\'a}cs and Simon Robillard and Andrei Voronkov", title = "Coming to terms with quantified reasoning", journal = j-SIGPLAN, volume = "52", number = "1", pages = "260--270", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009887", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The theory of finite term algebras provides a natural framework to describe the semantics of functional languages. The ability to efficiently reason about term algebras is essential to automate program analysis and verification for functional or imperative programs over inductively defined data types such as lists and trees. However, as the theory of finite term algebras is not finitely axiomatizable, reasoning about quantified properties over term algebras is challenging. In this paper we address full first-order reasoning about properties of programs manipulating term algebras, and describe two approaches for doing so by using first-order theorem proving. Our first method is a conservative extension of the theory of term algebras using a finite number of statements, while our second method relies on extending the superposition calculus of first-order theorem provers with additional inference rules. We implemented our work in the first-order theorem prover Vampire and evaluated it on a large number of inductive datatype benchmarks, as well as game theory constraints. Our experimental results show that our methods are able to find proofs for many hard problems previously unsolved by state-of-the-art methods. We also show that Vampire implementing our methods outperforms existing SMT solvers able to deal with inductive data types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Scully:2017:POA, author = "Ziv Scully and Adam Chlipala", title = "A program optimization for automatic database result caching", journal = j-SIGPLAN, volume = "52", number = "1", pages = "271--284", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009891", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most popular Web applications rely on persistent databases based on languages like SQL for declarative specification of data models and the operations that read and modify them. As applications scale up in user base, they often face challenges responding quickly enough to the high volume of requests. A common aid is caching of database results in the application's memory space, taking advantage of program-specific knowledge of which caching schemes are sound and useful, embodied in handwritten modifications that make the program less maintainable. These modifications also require nontrivial reasoning about the read-write dependencies across operations. In this paper, we present a compiler optimization that automatically adds sound SQL caching to Web applications coded in the Ur/Web domain-specific functional language, with no modifications required to source code. We use a custom cache implementation that supports concurrent operations without compromising the transactional semantics of the database abstraction. Through experiments with microbenchmarks and production Ur/Web applications, we show that our optimization in many cases enables an easy doubling or more of an application's throughput, requiring nothing more than passing an extra command-line flag to the compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Kiselyov:2017:SFC, author = "Oleg Kiselyov and Aggelos Biboudis and Nick Palladinos and Yannis Smaragdakis", title = "Stream fusion, to completeness", journal = j-SIGPLAN, volume = "52", number = "1", pages = "285--299", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009880", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Stream processing is mainstream (again): Widely-used stream libraries are now available for virtually all modern OO and functional languages, from Java to C\# to Scala to OCaml to Haskell. Yet expressivity and performance are still lacking. For instance, the popular, well-optimized Java 8 streams do not support the zip operator and are still an order of magnitude slower than hand-written loops. We present the first approach that represents the full generality of stream processing and eliminates overheads, via the use of staging. It is based on an unusually rich semantic model of stream interaction. We support any combination of zipping, nesting (or flat-mapping), sub-ranging, filtering, mapping-of finite or infinite streams. Our model captures idiosyncrasies that a programmer uses in optimizing stream pipelines, such as rate differences and the choice of a ``for'' vs. ``while'' loops. Our approach delivers hand-written-like code, but automatically. It explicitly avoids the reliance on black-box optimizers and sufficiently-smart compilers, offering highest, guaranteed and portable performance. Our approach relies on high-level concepts that are then readily mapped into an implementation. Accordingly, we have two distinct implementations: an OCaml stream library, staged via MetaOCaml, and a Scala library for the JVM, staged via LMS. In both cases, we derive libraries richer and simultaneously many tens of times faster than past work. We greatly exceed in performance the standard stream libraries available in Java, Scala and OCaml, including the well-optimized Java 8 streams.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Chiang:2017:RFP, author = "Wei-Fan Chiang and Mark Baranowski and Ian Briggs and Alexey Solovyev and Ganesh Gopalakrishnan and Zvonimir Rakamari{\'c}", title = "Rigorous floating-point mixed-precision tuning", journal = j-SIGPLAN, volume = "52", number = "1", pages = "300--315", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009846", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Virtually all real-valued computations are carried out using floating-point data types and operations. The precision of these data types must be set with the goals of reducing the overall round-off error, but also emphasizing performance improvements. Often, a mixed-precision allocation achieves this optimum; unfortunately, there are no techniques available to compute such allocations and conservatively meet a given error target across all program inputs. In this work, we present a rigorous approach to precision allocation based on formal analysis via Symbolic Taylor Expansions, and error analysis based on interval functions. This approach is implemented in an automated tool called FPTuner that generates and solves a quadratically constrained quadratic program to obtain a precision-annotated version of the given expression. FPTuner automatically introduces all the requisite precision up and down casting operations. It also allows users to flexibly control precision allocation using constraints to cap the number of high precision operators as well as group operators to allocate the same precision to facilitate vectorization. We evaluate FPTuner by tuning several benchmarks and measuring the proportion of lower precision operators allocated as we increase the error threshold. We also measure the reduction in energy consumption resulting from executing mixed-precision tuned code on a real hardware platform. We observe significant energy savings in response to mixed-precision tuning, but also observe situations where unexpected compiler behaviors thwart intended optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Cicek:2017:RCA, author = "Ezgi {\c{C}}i{\c{c}}ek and Gilles Barthe and Marco Gaboardi and Deepak Garg and Jan Hoffmann", title = "Relational cost analysis", journal = j-SIGPLAN, volume = "52", number = "1", pages = "316--329", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009858", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Establishing quantitative bounds on the execution cost of programs is essential in many areas of computer science such as complexity analysis, compiler optimizations, security and privacy. Techniques based on program analysis, type systems and abstract interpretation are well-studied, but methods for analyzing how the execution costs of two programs compare to each other have not received attention. Naively combining the worst and best case execution costs of the two programs does not work well in many cases because such analysis forgets the similarities between the programs or the inputs. In this work, we propose a relational cost analysis technique that is capable of establishing precise bounds on the difference in the execution cost of two programs by making use of relational properties of programs and inputs. We develop , a refinement type and effect system for a higher-order functional language with recursion and subtyping. The key novelty of our technique is the combination of relational refinements with two modes of typing --- relational typing for reasoning about similar computations/inputs and unary typing for reasoning about unrelated computations/inputs. This combination allows us to analyze the execution cost difference of two programs more precisely than a naive non-relational approach. We prove our type system sound using a semantic model based on step-indexed unary and binary logical relations accounting for non-relational and relational reasoning principles with their respective costs. We demonstrate the precision and generality of our technique through examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Madhavan:2017:CBR, author = "Ravichandhran Madhavan and Sumith Kulal and Viktor Kuncak", title = "Contract-based resource verification for higher-order functions with memoization", journal = j-SIGPLAN, volume = "52", number = "1", pages = "330--343", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009874", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a new approach for specifying and verifying resource utilization of higher-order functional programs that use lazy evaluation and memoization. In our approach, users can specify the desired resource bound as templates with numerical holes e.g. as steps \leq ? * size(l) + ? in the contracts of functions. They can also express invariants necessary for establishing the bounds that may depend on the state of memoization. Our approach operates in two phases: first generating an instrumented first-order program that accurately models the higher-order control flow and the effects of memoization on resources using sets, algebraic datatypes and mutual recursion, and then verifying the contracts of the first-order program by producing verification conditions of the form $ \exists \forall $ using an extended assume/guarantee reasoning. We use our approach to verify precise bounds on resources such as evaluation steps and number of heap-allocated objects on 17 challenging data structures and algorithms. Our benchmarks, comprising of 5K lines of functional Scala code, include lazy mergesort, Okasaki's real-time queue and deque data structures that rely on aliasing of references to first-class functions; lazy data structures based on numerical representations such as the conqueue data structure of Scala's data-parallel library, cyclic streams, as well as dynamic programming algorithms such as knapsack and Viterbi. Our evaluations show that when averaged over all benchmarks the actual runtime resource consumption is 80\% of the value inferred by our tool when estimating the number of evaluation steps, and is 88\% for the number of heap-allocated objects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Zhang:2017:CSD, author = "Qirun Zhang and Zhendong Su", title = "Context-sensitive data-dependence analysis via linear conjunctive language reachability", journal = j-SIGPLAN, volume = "52", number = "1", pages = "344--358", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009848", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many program analysis problems can be formulated as graph reachability problems. In the literature, context-free language (CFL) reachability has been the most popular formulation and can be computed in subcubic time. The context-sensitive data-dependence analysis is a fundamental abstraction that can express a broad range of program analysis problems. It essentially describes an interleaved matched-parenthesis language reachability problem. The language is not context-free, and the problem is well-known to be undecidable. In practice, many program analyses adopt CFL-reachability to exactly model the matched parentheses for either context-sensitivity or structure-transmitted data-dependence, but not both. Thus, the CFL-reachability formulation for context-sensitive data-dependence analysis is inherently an approximation. To support more precise and scalable analyses, this paper introduces linear conjunctive language (LCL) reachability, a new, expressive class of graph reachability. LCL not only contains the interleaved matched-parenthesis language, but is also closed under all set-theoretic operations. Given a graph with n nodes and m edges, we propose an O ( mn ) time approximation algorithm for solving all-pairs LCL-reachability, which is asymptotically better than known CFL-reachability algorithms. Our formulation and algorithm offer a new perspective on attacking the aforementioned undecidable problem --- the LCL-reachability formulation is exact, while the LCL-reachability algorithm yields a sound approximation. We have applied the LCL-reachability framework to two existing client analyses. The experimental results show that the LCL-reachability framework is both more precise and scalable than the traditional CFL-reachability framework. This paper opens up the opportunity to exploit LCL-reachability in program analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Hoffmann:2017:TAR, author = "Jan Hoffmann and Ankush Das and Shu-Chun Weng", title = "Towards automatic resource bound analysis for {OCaml}", journal = j-SIGPLAN, volume = "52", number = "1", pages = "359--373", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009842", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This article presents a resource analysis system for OCaml programs. The system automatically derives worst-case resource bounds for higher-order polymorphic programs with user-defined inductive types. The technique is parametric in the resource and can derive bounds for time, memory allocations and energy usage. The derived bounds are multivariate resource polynomials which are functions of different size parameters that depend on the standard OCaml types. Bound inference is fully automatic and reduced to a linear optimization problem that is passed to an off-the-shelf LP solver. Technically, the analysis system is based on a novel multivariate automatic amortized resource analysis (AARA). It builds on existing work on linear AARA for higher-order programs with user-defined inductive types and on multivariate AARA for first-order programs with built-in lists and binary trees. This is the first amortized analysis, that automatically derives polynomial bounds for higher-order functions and polynomial bounds that depend on user-defined inductive types. Moreover, the analysis handles a limited form of side effects and even outperforms the linear bound inference of previous systems. At the same time, it preserves the expressivity and efficiency of existing AARA techniques. The practicality of the analysis system is demonstrated with an implementation and integration with Inria's OCaml compiler. The implementation is used to automatically derive resource bounds for 411 functions and 6018 lines of code derived from OCaml libraries, the CompCert compiler, and implementations of textbook algorithms. In a case study, the system infers bounds on the number of queries that are sent by OCaml programs to DynamoDB, a commercial NoSQL cloud database service.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Scherer:2017:DES, author = "Gabriel Scherer", title = "Deciding equivalence with sums and the empty type", journal = j-SIGPLAN, volume = "52", number = "1", pages = "374--386", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009901", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The logical technique of focusing can be applied to the $ \lambda $ -calculus; in a simple type system with atomic types and negative type formers (functions, products, the unit type), its normal forms coincide with {\^I}$^{}^2${\^I}$ \cdot $-normal forms. Introducing a saturation phase gives a notion of quasi-normal forms in presence of positive types (sum types and the empty type). This rich structure let us prove the decidability of {\^I}$^{}^2${\^I}$ \cdot $-equivalence in presence of the empty type, the fact that it coincides with contextual equivalence, and with set-theoretic equality in all finite models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Ilik:2017:ELN, author = "Danko Ilik", title = "The exp--log normal form of types: decomposing extensional equality and representing terms compactly", journal = j-SIGPLAN, volume = "52", number = "1", pages = "387--399", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009841", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lambda calculi with algebraic data types lie at the core of functional programming languages and proof assistants, but conceal at least two fundamental theoretical problems already in the presence of the simplest non-trivial data type, the sum type. First, we do not know of an explicit and implemented algorithm for deciding the beta-eta-equality of terms---and this in spite of the first decidability results proven two decades ago. Second, it is not clear how to decide when two types are essentially the same, i.e. isomorphic, in spite of the meta-theoretic results on decidability of the isomorphism. In this paper, we present the exp-log normal form of types---derived from the representation of exponential polynomials via the unary exponential and logarithmic functions---that any type built from arrows, products, and sums, can be isomorphically mapped to. The type normal form can be used as a simple heuristic for deciding type isomorphism, thanks to the fact that it is a systematic application of the high-school identities. We then show that the type normal form allows to reduce the standard beta-eta equational theory of the lambda calculus to a specialized version of itself, while preserving completeness of the equality on terms. We end by describing an alternative representation of normal terms of the lambda calculus with sums, together with a Coq-implemented converter into/from our new term calculus. The difference with the only other previously implemented heuristic for deciding interesting instances of eta-equality by Balat, Di Cosmo, and Fiore, is that we exploits the type information of terms substantially and this often allows us to obtain a canonical representation of terms without performing a sophisticated term analyses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Levy:2017:CI, author = "Paul Blain Levy", title = "Contextual isomorphisms", journal = j-SIGPLAN, volume = "52", number = "1", pages = "400--414", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009898", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "What is the right notion of ``isomorphism'' between types, in a simple type theory? The traditional answer is: a pair of terms that are inverse up to a specified congruence. We firstly argue that, in the presence of effects, this answer is too liberal and needs to be restricted, using F{\~A}$ 1 / 4 $ hrmann's notion of thunkability in the case of value types (as in call-by-value), or using Munch-Maccagnoni's notion of linearity in the case of computation types (as in call-by-name). Yet that leaves us with different notions of isomorphism for different kinds of type. This situation is resolved by means of a new notion of ``contextual'' isomorphism (or morphism), analogous at the level of types to contextual equivalence of terms. A contextual morphism is a way of replacing one type with the other wherever it may occur in a judgement, in a way that is preserved by the action of any term with holes. For types of pure $ \lambda $-calculus, we show that a contextual morphism corresponds to a traditional isomorphism. For value types, a contextual morphism corresponds to a thunkable isomorphism, and for computation types, to a linear isomorphism.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Brown:2017:TSE, author = "Matt Brown and Jens Palsberg", title = "Typed self-evaluation via intensional type functions", journal = j-SIGPLAN, volume = "52", number = "1", pages = "415--428", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009853", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many popular languages have a self-interpreter, that is, an interpreter for the language written in itself. So far, work on polymorphically-typed self-interpreters has concentrated on self-recognizers that merely recover a program from its representation. A larger and until now unsolved challenge is to implement a polymorphically-typed self-evaluator that evaluates the represented program and produces a representation of the result. We present F$_\omega^{\mu i}$, the first $ \lambda $-calculus that supports a polymorphically-typed self-evaluator. Our calculus extends F$_\omega $ with recursive types and intensional type functions and has decidable type checking. Our key innovation is a novel implementation of type equality proofs that enables us to define a versatile representation of programs. Our results establish a new category of languages that can support polymorphically-typed self-evaluators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Flur:2017:MSC, author = "Shaked Flur and Susmit Sarkar and Christopher Pulte and Kyndylan Nienhuis and Luc Maranget and Kathryn E. Gray and Ali Sezgin and Mark Batty and Peter Sewell", title = "Mixed-size concurrency: {ARM}, {POWER}, {C\slash C++11}, and {SC}", journal = j-SIGPLAN, volume = "52", number = "1", pages = "429--442", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009839", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Previous work on the semantics of relaxed shared-memory concurrency has only considered the case in which each load reads the data of exactly one store. In practice, however, multiprocessors support mixed-size accesses, and these are used by systems software and (to some degree) exposed at the C/C++ language level. A semantic foundation for software, therefore, has to address them. We investigate the mixed-size behaviour of ARMv8 and IBM POWER architectures and implementations: by experiment, by developing semantic models, by testing the correspondence between these, and by discussion with ARM and IBM staff. This turns out to be surprisingly subtle, and on the way we have to revisit the fundamental concepts of coherence and sequential consistency, which change in this setting. In particular, we show that adding a memory barrier between each instruction does not restore sequential consistency. We go on to extend the C/C++11 model to support non-atomic mixed-size memory accesses. This is a necessary step towards semantics for real-world shared-memory concurrent code, beyond litmus tests.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Lidbury:2017:DRD, author = "Christopher Lidbury and Alastair F. Donaldson", title = "Dynamic race detection for {C++11}", journal = j-SIGPLAN, volume = "52", number = "1", pages = "443--457", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009857", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The intricate rules for memory ordering and synchronisation associated with the C/C++11 memory model mean that data races can be difficult to eliminate from concurrent programs. Dynamic data race analysis can pinpoint races in large and complex applications, but the state-of-the-art ThreadSanitizer (tsan) tool for C/C++ considers only sequentially consistent program executions, and does not correctly model synchronisation between C/C++11 atomic operations. We present a scalable dynamic data race analysis for C/C++11 that correctly captures C/C++11 synchronisation, and uses instrumentation to support exploration of a class of non sequentially consistent executions. We concisely define the memory model fragment captured by our instrumentation via a restricted axiomatic semantics, and show that the axiomatic semantics permits exactly those executions explored by our instrumentation. We have implemented our analysis in tsan, and evaluate its effectiveness on benchmark programs, enabling a comparison with the CDSChecker tool, and on two large and highly concurrent applications: the Firefox and Chromium web browsers. Our results show that our method can detect races that are beyond the scope of the original tsan tool, and that the overhead associated with applying our enhanced instrumentation to large applications is tolerable.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Brutschy:2017:SEC, author = "Lucas Brutschy and Dimitar Dimitrov and Peter M{\"u}ller and Martin Vechev", title = "Serializability for eventual consistency: criterion, analysis, and applications", journal = j-SIGPLAN, volume = "52", number = "1", pages = "458--472", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009895", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Developing and reasoning about systems using eventually consistent data stores is a difficult challenge due to the presence of unexpected behaviors that do not occur under sequential consistency. A fundamental problem in this setting is to identify a correctness criterion that precisely captures intended application behaviors yet is generic enough to be applicable to a wide range of applications. In this paper, we present such a criterion. More precisely, we generalize conflict serializability to the setting of eventual consistency. Our generalization is based on a novel dependency model that incorporates two powerful algebraic properties: commutativity and absorption. These properties enable precise reasoning about programs that employ high-level replicated data types, common in modern systems. To apply our criterion in practice, we also developed a dynamic analysis algorithm and a tool that checks whether a given program execution is serializable. We performed a thorough experimental evaluation on two real-world use cases: debugging cloud-backed mobile applications and implementing clients of a popular eventually consistent key-value store. Our experimental results indicate that our criterion reveals harmful synchronization problems in applications, is more effective at finding them than prior approaches, and can be used for the development of practical, eventually consistent applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Hoenicke:2017:TMM, author = "Jochen Hoenicke and Rupak Majumdar and Andreas Podelski", title = "Thread modularity at many levels: a pearl in compositional verification", journal = j-SIGPLAN, volume = "52", number = "1", pages = "473--485", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009893", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A thread-modular proof for the correctness of a concurrent program is based on an inductive and interference-free annotation of each thread. It is well-known that the corresponding proof system is not complete (unless one adds auxiliary variables). We describe a hierarchy of proof systems where each level k corresponds to a generalized notion of thread modularity (level 1 corresponds to the original notion). Each level is strictly more expressive than the previous. Further, each level precisely captures programs that can be proved using uniform Ashcroft invariants with k universal quantifiers. We demonstrate the usefulness of the hierarchy by giving a compositional proof of the Mach shootdown algorithm for TLB consistency. We show a proof at level 2 that shows the algorithm is correct for an arbitrary number of CPUs. However, there is no proof for the algorithm at level 1 which does not involve auxiliary state.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Leijen:2017:TDC, author = "Daan Leijen", title = "Type directed compilation of row-typed algebraic effects", journal = j-SIGPLAN, volume = "52", number = "1", pages = "486--499", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009872", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Algebraic effect handlers, introduced by Plotkin and Power in 2002, are recently gaining in popularity as a purely functional approach to modeling effects. In this article, we give a full overview of practical algebraic effects in the context of a compiled implementation in the Koka language. In particular, we show how algebraic effects generalize over common constructs like exception handling, state, iterators and async-await. We give an effective type inference algorithm based on extensible effect rows using scoped labels, and a direct operational semantics. Finally, we show an efficient compilation scheme to common runtime platforms (like JavaScript) using a type directed selective CPS translation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Lindley:2017:DDD, author = "Sam Lindley and Conor McBride and Craig McLaughlin", title = "Do be do be do", journal = j-SIGPLAN, volume = "52", number = "1", pages = "500--514", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009897", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We explore the design and implementation of Frank, a strict functional programming language with a bidirectional effect type system designed from the ground up around a novel variant of Plotkin and Pretnar's effect handler abstraction. Effect handlers provide an abstraction for modular effectful programming: a handler acts as an interpreter for a collection of commands whose interfaces are statically tracked by the type system. However, Frank eliminates the need for an additional effect handling construct by generalising the basic mechanism of functional abstraction itself. A function is simply the special case of a Frank operator that interprets no commands. Moreover, Frank's operators can be multihandlers which simultaneously interpret commands from several sources at once, without disturbing the direct style of functional programming with values. Effect typing in Frank employs a novel form of effect polymorphism which avoid mentioning effect variables in source code. This is achieved by propagating an ambient ability inwards, rather than accumulating unions of potential effects outwards. We introduce Frank by example, and then give a formal account of the Frank type system and its semantics. We introduce Core Frank by elaborating Frank operators into functions, case expressions, and unary handlers, and then give a sound small-step operational semantics for Core Frank. Programming with effects and handlers is in its infancy. We contribute an exploration of future possibilities, particularly in combination with other forms of rich type system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Ahman:2017:DMF, author = "Danel Ahman and Catalin Hritcu and Kenji Maillard and Guido Mart{\'\i}nez and Gordon Plotkin and Jonathan Protzenko and Aseem Rastogi and Nikhil Swamy", title = "{Dijkstra} monads for free", journal = j-SIGPLAN, volume = "52", number = "1", pages = "515--529", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009878", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dijkstra monads enable a dependent type theory to be enhanced with support for specifying and verifying effectful code via weakest preconditions. Together with their closely related counterparts, Hoare monads, they provide the basis on which verification tools like F*, Hoare Type Theory (HTT), and Ynot are built. We show that Dijkstra monads can be derived ``for free'' by applying a continuation-passing style (CPS) translation to the standard monadic definitions of the underlying computational effects. Automatically deriving Dijkstra monads in this way provides a correct-by-construction and efficient way of reasoning about user-defined effects in dependent type theories. We demonstrate these ideas in EMF*, a new dependently typed calculus, validating it via both formal proof and a prototype implementation within F*. Besides equipping F* with a more uniform and extensible effect system, EMF* enables a novel mixture of intrinsic and extrinsic proofs within F*.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Sekiyama:2017:SMC, author = "Taro Sekiyama and Atsushi Igarashi", title = "Stateful manifest contracts", journal = j-SIGPLAN, volume = "52", number = "1", pages = "530--544", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009875", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper studies hybrid contract verification for an imperative higher-order language based on a so-called manifest contract system. In manifest contract systems, contracts are part of static types and contract verification is hybrid in the sense that some contracts are statically verified, typically by subtyping, but others are dynamically by casts. It is, however, not trivial to extend existing manifest contract systems, which have been designed mostly for pure functional languages, to imperative features, mainly because of the lack of flow-sensitivity, which should be taken into account in verifying imperative programs statically. We develop an imperative higher-order manifest contract system $ \lambda_{\rm ref}^H $ for flow-sensitive hybrid contract verification. We introduce a computational variant of Nanevski et al's Hoare types, which are flow-sensitive types to represent pre- and postconditions of impure computation. Our Hoare types are computational in the sense that pre- and postconditions are given by Booleans in the same language as programs so that they are dynamically verifiable. $ \lambda_{\rm ref}^H $ also supports refinement types as in existing manifest contract systems to describe flow-insensitive, state-independent contracts of pure computation. While it is desirable that any --- possibly state-manipulating --- predicate can be used in contracts, abuse of stateful operations will break the system. To control stateful operations in contracts, we introduce a region-based effect system, which allows contracts in refinement types and computational Hoare types to manipulate states, as long as they are observationally pure and read-only, respectively. We show that dynamic contract checking in our calculus is consistent with static typing in the sense that the final result obtained without dynamic contract violations satisfies contracts in its static type. It in particular means that the state after stateful computations satisfies their postconditions. As in some of prior manifest contract systems, static contract verification in this work is ``post facto,'' that is, we first define our manifest contract system so that all contracts are checked at run time, formalize conditions when dynamic checks can be removed safely, and show that programs with and without such removable checks are contextually equivalent. We also apply the idea of post facto verification to region-based local reasoning, inspired by the frame rule of Separation Logic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{deAmorim:2017:SAM, author = "Arthur Azevedo de Amorim and Marco Gaboardi and Justin Hsu and Shin-ya Katsumata and Ikram Cherigui", title = "A semantic account of metric preservation", journal = j-SIGPLAN, volume = "52", number = "1", pages = "545--556", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009890", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program sensitivity measures how robust a program is to small changes in its input, and is a fundamental notion in domains ranging from differential privacy to cyber-physical systems. A natural way to formalize program sensitivity is in terms of metrics on the input and output spaces, requiring that an r -sensitive function map inputs that are at distance d to outputs that are at distance at most r {\^A}$ \cdot $ d. Program sensitivity is thus an analogue of Lipschitz continuity for programs. Reed and Pierce introduced Fuzz, a functional language with a linear type system that can express program sensitivity. They show soundness operationally, in the form of a metric preservation property. Inspired by their work, we study program sensitivity and metric preservation from a denotational point of view. In particular, we introduce metric CPOs, a novel semantic structure for reasoning about computation on metric spaces, by endowing CPOs with a compatible notion of distance. This structure is useful for reasoning about metric properties of programs, and specifically about program sensitivity. We demonstrate metric CPOs by giving a model for the deterministic fragment of Fuzz.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Smolka:2017:CMS, author = "Steffen Smolka and Praveen Kumar and Nate Foster and Dexter Kozen and Alexandra Silva", title = "{Cantor} meets {Scott}: semantic foundations for probabilistic networks", journal = j-SIGPLAN, volume = "52", number = "1", pages = "557--571", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009843", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "ProbNetKAT is a probabilistic extension of NetKAT with a denotational semantics based on Markov kernels. The language is expressive enough to generate continuous distributions, which raises the question of how to compute effectively in the language. This paper gives an new characterization of ProbNetKAT's semantics using domain theory, which provides the foundation needed to build a practical implementation. We show how to use the semantics to approximate the behavior of arbitrary ProbNetKAT programs using distributions with finite support. We develop a prototype implementation and show how to use it to solve a variety of problems including characterizing the expected congestion induced by different routing schemes and reasoning probabilistically about reachability in a network.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Subramanian:2017:GSF, author = "Kausik Subramanian and Loris D'Antoni and Aditya Akella", title = "{Genesis}: synthesizing forwarding tables in multi-tenant networks", journal = j-SIGPLAN, volume = "52", number = "1", pages = "572--585", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009845", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Operators in multi-tenant cloud datacenters require support for diverse and complex end-to-end policies, such as, reachability, middlebox traversals, isolation, traffic engineering, and network resource management. We present Genesis, a datacenter network management system which allows policies to be specified in a declarative manner without explicitly programming the network data plane. Genesis tackles the problem of enforcing policies by synthesizing switch forwarding tables. It uses the formal foundations of constraint solving in combination with fast off-the-shelf SMT solvers. To improve synthesis performance, Genesis incorporates a novel search strategy that uses regular expressions to specify properties that leverage the structure of datacenter networks, and a divide-and-conquer synthesis procedure which exploits the structure of policy relationships. We have prototyped Genesis, and conducted experiments with a variety of workloads on real-world topologies to demonstrate its performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Kopczynski:2017:LSS, author = "Eryk Kopczy{\'n}ski and Szymon Toru{\'n}czyk", title = "{LOIS}: syntax and semantics", journal = j-SIGPLAN, volume = "52", number = "1", pages = "586--598", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009876", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the semantics of an imperative programming language called LOIS (Looping Over Infinite Sets), which allows iterating through certain infinite sets, in finite time. Our semantics intuitively correspond to execution of infinitely many threads in parallel. This allows to merge the power of abstract mathematical constructions into imperative programming. Infinite sets are internally represented using first order formulas over some underlying logical structure, and SMT solvers are employed to evaluate programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Feng:2017:CBSa, author = "Yu Feng and Ruben Martins and Yuepeng Wang and Isil Dillig and Thomas W. Reps", title = "Component-based synthesis for complex {APIs}", journal = j-SIGPLAN, volume = "52", number = "1", pages = "599--612", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009851", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Component-based approaches to program synthesis assemble programs from a database of existing components, such as methods provided by an API. In this paper, we present a novel type-directed algorithm for component-based synthesis. The key novelty of our approach is the use of a compact Petri-net representation to model relationships between methods in an API. Given a target method signature S, our approach performs reachability analysis on the underlying Petri-net model to identify sequences of method calls that could be used to synthesize an implementation of S. The programs synthesized by our algorithm are guaranteed to type check and pass all test cases provided by the user. We have implemented this approach in a tool called SyPet, and used it to successfully synthesize real-world programming tasks extracted from on-line forums and existing code repositories. We also compare SyPet with two state-of-the-art synthesis tools, namely InSynth and CodeHint, and demonstrate that SyPet can synthesize more programs in less time. Finally, we compare our approach with an alternative solution based on hypergraphs and demonstrate its advantages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Moerman:2017:LNA, author = "Joshua Moerman and Matteo Sammartino and Alexandra Silva and Bartek Klin and Michal Szynwelski", title = "Learning nominal automata", journal = j-SIGPLAN, volume = "52", number = "1", pages = "613--625", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009879", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present an Angluin-style algorithm to learn nominal automata, which are acceptors of languages over infinite (structured) alphabets. The abstract approach we take allows us to seamlessly extend known variations of the algorithm to this new setting. In particular we can learn a subclass of nominal non-deterministic automata. An implementation using a recently developed Haskell library for nominal computation is provided for preliminary experiments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Bouajjani:2017:VCC, author = "Ahmed Bouajjani and Constantin Enea and Rachid Guerraoui and Jad Hamza", title = "On verifying causal consistency", journal = j-SIGPLAN, volume = "52", number = "1", pages = "626--638", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009888", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Causal consistency is one of the most adopted consistency criteria for distributed implementations of data structures. It ensures that operations are executed at all sites according to their causal precedence. We address the issue of verifying automatically whether the executions of an implementation of a data structure are causally consistent. We consider two problems: (1) checking whether one single execution is causally consistent, which is relevant for developing testing and bug finding algorithms, and (2) verifying whether all the executions of an implementation are causally consistent. We show that the first problem is NP-complete. This holds even for the read-write memory abstraction, which is a building block of many modern distributed systems. Indeed, such systems often store data in key-value stores, which are instances of the read-write memory abstraction. Moreover, we prove that, surprisingly, the second problem is undecidable, and again this holds even for the read-write memory abstraction. However, we show that for the read-write memory abstraction, these negative results can be circumvented if the implementations are data independent, i.e., their behaviors do not depend on the data values that are written or read at each moment, which is a realistic assumption. We prove that for data independent implementations, the problem of checking the correctness of a single execution w.r.t. the read-write memory abstraction is polynomial time. Furthermore, we show that for such implementations the set of non-causally consistent executions can be represented by means of a finite number of register automata. Using these machines as observers (in parallel with the implementation) allows to reduce polynomially the problem of checking causal consistency to a state reachability problem. This reduction holds regardless of the class of programs used for the implementation, of the number of read-write variables, and of the used data domain. It allows leveraging existing techniques for assertion/reachability checking to causal consistency verification. Moreover, for a significant class of implementations, we derive from this reduction the decidability of verifying causal consistency w.r.t. the read-write memory abstraction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Srikanth:2017:CVU, author = "Akhilesh Srikanth and Burak Sahin and William R. Harris", title = "Complexity verification using guided theorem enumeration", journal = j-SIGPLAN, volume = "52", number = "1", pages = "639--652", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009864", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Determining if a given program satisfies a given bound on the amount of resources that it may use is a fundamental problem with critical practical applications. Conventional automatic verifiers for safety properties cannot be applied to address this problem directly because such verifiers target properties expressed in decidable theories; however, many practical bounds are expressed in nonlinear theories, which are undecidable. In this work, we introduce an automatic verification algorithm, CAMPY, that determines if a given program P satisfies a given resource bound B, which may be expressed using polynomial, exponential, and logarithmic terms. The key technical contribution behind our verifier is an interpolating theorem prover for non-linear theories that lazily learns a sufficiently accurate approximation of non-linear theories by selectively grounding theorems of the nonlinear theory that are relevant to proving that P satisfies B. To evaluate CAMPY, we implemented it to target Java Virtual Machine bytecode. We applied CAMPY to verify that over 20 solutions submitted for programming problems hosted on popular online coding platforms satisfy or do not satisfy expected complexity bounds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Dudenhefner:2017:ITC, author = "Andrej Dudenhefner and Jakob Rehof", title = "Intersection type calculi of bounded dimension", journal = j-SIGPLAN, volume = "52", number = "1", pages = "653--665", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009862", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A notion of dimension in intersection typed \lambda -calculi is presented. The dimension of a typed \lambda -term is given by the minimal norm of an elaboration (a proof theoretic decoration) necessary for typing the term at its type, and, intuitively, measures intersection introduction as a resource. Bounded-dimensional intersection type calculi are shown to enjoy subject reduction, since terms can be elaborated in non-increasing norm under \beta -reduction. We prove that a multiset interpretation (corresponding to a non-idempotent and non-linear interpretation of intersection) of dimensionality corresponds to the number of simultaneous constraints required during search for inhabitants. As a consequence, the inhabitation problem is decidable in bounded multiset dimension, and it is proven to be EXPSPACE-complete. This result is a substantial generalization of inhabitation for the rank 2-fragment, yielding a calculus with decidable inhabitation which is independent of rank. Our results give rise to a new criterion (dimensional bound) for subclasses of intersection type calculi with a decidable inhabitation problem, which is orthogonal to previously known criteria, and which should have immediate applications in synthesis. Additionally, we give examples of dimensional analysis of fragments of the intersection type system, including conservativity over simple types, rank 2-types, and normal form typings, and we provide some observations towards dimensional analysis of other systems. It is suggested (for future work) that our notion of dimension may have semantic interpretations in terms of reduction complexity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Amin:2017:TSP, author = "Nada Amin and Tiark Rompf", title = "Type soundness proofs with definitional interpreters", journal = j-SIGPLAN, volume = "52", number = "1", pages = "666--679", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009866", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While type soundness proofs are taught in every graduate PL class, the gap between realistic languages and what is accessible to formal proofs is large. In the case of Scala, it has been shown that its formal model, the Dependent Object Types (DOT) calculus, cannot simultaneously support key metatheoretic properties such as environment narrowing and subtyping transitivity, which are usually required for a type soundness proof. Moreover, Scala and many other realistic languages lack a general substitution property. The first contribution of this paper is to demonstrate how type soundness proofs for advanced, polymorphic, type systems can be carried out with an operational semantics based on high-level, definitional interpreters, implemented in Coq. We present the first mechanized soundness proofs in this style for System F and several extensions, including mutable references. Our proofs use only straightforward induction, which is significant, as the combination of big-step semantics, mutable references, and polymorphism is commonly believed to require coinductive proof techniques. The second main contribution of this paper is to show how DOT-like calculi emerge from straightforward generalizations of the operational aspects of F, exposing a rich design space of calculi with path-dependent types inbetween System F and DOT, which we dub the System D Square. By working directly on the target language, definitional interpreters can focus the design space and expose the invariants that actually matter at runtime. Looking at such runtime invariants is an exciting new avenue for type system design.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Angiuli:2017:CHD, author = "Carlo Angiuli and Robert Harper and Todd Wilson", title = "Computational higher-dimensional type theory", journal = j-SIGPLAN, volume = "52", number = "1", pages = "680--693", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009861", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Formal constructive type theory has proved to be an effective language for mechanized proof. By avoiding non-constructive principles, such as the law of the excluded middle, type theory admits sharper proofs and broader interpretations of results. From a computer science perspective, interest in type theory arises from its applications to programming languages. Standard constructive type theories used in mechanization admit computational interpretations based on meta-mathematical normalization theorems. These proofs are notoriously brittle; any change to the theory potentially invalidates its computational meaning. As a case in point, Voevodsky's univalence axiom raises questions about the computational meaning of proofs. We consider the question: Can higher-dimensional type theory be construed as a programming language? We answer this question affirmatively by providing a direct, deterministic operational interpretation for a representative higher-dimensional dependent type theory with higher inductive types and an instance of univalence. Rather than being a formal type theory defined by rules, it is instead a computational type theory in the sense of Martin-L{\"o}f's meaning explanations and of the NuPRL semantics. The definition of the type theory starts with programs; types are specifications of program behavior. The main result is a canonicity theorem stating that closed programs of boolean type evaluate to true or false.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Chang:2017:TSM, author = "Stephen Chang and Alex Knauth and Ben Greenman", title = "Type systems as macros", journal = j-SIGPLAN, volume = "52", number = "1", pages = "694--705", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009886", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Turnstile, a metalanguage for creating typed embedded languages. To implement the type system, programmers write type checking rules resembling traditional judgment syntax. To implement the semantics, they incorporate elaborations into these rules. Turnstile critically depends on the idea of linguistic reuse. It exploits a macro system in a novel way to simultaneously type check and rewrite a surface program into a target language. Reusing a macro system also yields modular implementations whose rules may be mixed and matched to create other languages. Combined with typical compiler and runtime reuse, Turnstile produces performant typed embedded languages with little effort.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Kumar:2017:PFA, author = "Ananya Kumar and Guy E. Blelloch and Robert Harper", title = "Parallel functional arrays", journal = j-SIGPLAN, volume = "52", number = "1", pages = "706--718", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009869", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The goal of this paper is to develop a form of functional arrays (sequences) that are as efficient as imperative arrays, can be used in parallel, and have well defined cost-semantics. The key idea is to consider sequences with functional value semantics but non-functional cost semantics. Because the value semantics is functional, ``updating'' a sequence returns a new sequence. We allow operations on ``older'' sequences (called interior sequences) to be more expensive than operations on the ``most recent'' sequences (called leaf sequences). We embed sequences in a language supporting fork-join parallelism. Due to the parallelism, operations can be interleaved non-deterministically, and, in conjunction with the different cost for interior and leaf sequences, this can lead to non-deterministic costs for a program. Consequently the costs of programs can be difficult to analyze. The main result is the derivation of a deterministic cost dynamics which makes analyzing the costs easier. The theorems are not specific to sequences and can be applied to other data types with different costs for operating on interior and leaf versions. We present a wait-free concurrent implementation of sequences that requires constant work for accessing and updating leaf sequences, and logarithmic work for accessing and linear work for updating interior sequences. We sketch a proof of correctness for the sequence implementation. The key advantages of the present approach compared to current approaches is that our implementation requires no changes to existing programming languages, supports nested parallelism, and has well defined cost semantics. At the same time, it allows for functional implementations of algorithms such as depth-first search with the same asymptotic complexity as imperative implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Konnov:2017:SCP, author = "Igor Konnov and Marijana Lazi{\'c} and Helmut Veith and Josef Widder", title = "A short counterexample property for safety and liveness verification of fault-tolerant distributed algorithms", journal = j-SIGPLAN, volume = "52", number = "1", pages = "719--734", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009860", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Distributed algorithms have many mission-critical applications ranging from embedded systems and replicated databases to cloud computing. Due to asynchronous communication, process faults, or network failures, these algorithms are difficult to design and verify. Many algorithms achieve fault tolerance by using threshold guards that, for instance, ensure that a process waits until it has received an acknowledgment from a majority of its peers. Consequently, domain-specific languages for fault-tolerant distributed systems offer language support for threshold guards. We introduce an automated method for model checking of safety and liveness of threshold-guarded distributed algorithms in systems where the number of processes and the fraction of faulty processes are parameters. Our method is based on a short counterexample property: if a distributed algorithm violates a temporal specification (in a fragment of LTL), then there is a counterexample whose length is bounded and independent of the parameters. We prove this property by (i) characterizing executions depending on the structure of the temporal formula, and (ii) using commutativity of transitions to accelerate and shorten executions. We extended the ByMC toolset (Byzantine Model Checker) with our technique, and verified liveness and safety of 10 prominent fault-tolerant distributed algorithms, most of which were out of reach for existing techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Liu:2017:ADB, author = "Xinxin Liu and Tingting Yu and Wenhui Zhang", title = "Analyzing divergence in bisimulation semantics", journal = j-SIGPLAN, volume = "52", number = "1", pages = "735--747", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009870", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Some bisimulation based abstract equivalence relations may equate divergent systems with non-divergent ones, examples including weak bisimulation equivalence and branching bisimulation equivalence. Thus extra efforts are needed to analyze divergence for the compared systems. In this paper we propose a new method for analyzing divergence in bisimulation semantics, which relies only on simple observations of individual transitions. We show that this method can verify several typical divergence preserving bisimulation equivalences including two well-known ones. As an application case study, we use the proposed method to verify the HSY collision stack to draw the conclusion that the stack implementation is correct in terms of linearizability with lock-free progress condition.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Lange:2017:FGL, author = "Julien Lange and Nicholas Ng and Bernardo Toninho and Nobuko Yoshida", title = "Fencing off {Go}: liveness and safety for channel-based programming", journal = j-SIGPLAN, volume = "52", number = "1", pages = "748--761", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009847", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Go is a production-level statically typed programming language whose design features explicit message-passing primitives and lightweight threads, enabling (and encouraging) programmers to develop concurrent systems where components interact through communication more so than by lock-based shared memory concurrency. Go can only detect global deadlocks at runtime, but provides no compile-time protection against all too common communication mismatches or partial deadlocks. This work develops a static verification framework for bounded liveness and safety in Go programs, able to detect communication errors and partial deadlocks in a general class of realistic concurrent programs, including those with dynamic channel creation and infinite recursion. Our approach infers from a Go program a faithful representation of its communication patterns as a behavioural type. By checking a syntactic restriction on channel usage, dubbed fencing, we ensure that programs are made up of finitely many different communication patterns that may be repeated infinitely many times. This restriction allows us to implement bounded verification procedures (akin to bounded model checking) to check for liveness and safety in types which in turn approximates liveness and safety in Go programs. We have implemented a type inference and liveness and safety checks in a tool-chain and tested it against publicly available Go programs. Updated on 27th Feb 2017. See Comments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Vitousek:2017:BTL, author = "Michael M. Vitousek and Cameron Swords and Jeremy G. Siek", title = "Big types in little runtime: open-world soundness and collaborative blame for gradual type systems", journal = j-SIGPLAN, volume = "52", number = "1", pages = "762--774", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009849", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Gradual typing combines static and dynamic typing in the same language, offering programmers the error detection and strong guarantees of static types and the rapid prototyping and flexible programming idioms of dynamic types. Many gradually typed languages are implemented by translation into an untyped target language (e.g., Typed Clojure, TypeScript, Gradualtalk, and Reticulated Python). For such languages, it is desirable to support arbitrary interaction between translated code and legacy code in the untyped language while maintaining the type soundness of the translated code. In this paper we formalize this goal in the form of the open-world soundness criterion. We discuss why it is challenging to achieve open-world soundness using the traditional proxy-based approach for higher-order casts. However, the transient design satisfies open-world soundness. Indeed, we present a formal semantics for the transient design and prove that our semantics satisfies open-world soundness. In this paper we also solve a challenging problem for the transient design: how to provide blame tracking without proxies. We define a semantics for blame and prove the Blame Theorem. We also prove that the Gradual Guarantee holds for this system, ensuring that programs can be evolved freely between static and dynamic typing. Finally, we demonstrate that the runtime overhead of the transient approach is low in the context of Reticulated Python, an implementation of gradual typing for Python.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Lehmann:2017:GRT, author = "Nico Lehmann and {\'E}ric Tanter", title = "Gradual refinement types", journal = j-SIGPLAN, volume = "52", number = "1", pages = "775--788", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009856", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Refinement types are an effective language-based verification technique. However, as any expressive typing discipline, its strength is its weakness, imposing sometimes undesired rigidity. Guided by abstract interpretation, we extend the gradual typing agenda and develop the notion of gradual refinement types, allowing smooth evolution and interoperability between simple types and logically-refined types. In doing so, we address two challenges unexplored in the gradual typing literature: dealing with imprecise logical information, and with dependent function types. The first challenge leads to a crucial notion of locality for refinement formulas, and the second yields novel operators related to type- and term-level substitution, identifying new opportunity for runtime errors in gradual dependently-typed languages. The gradual language we present is type safe, type sound, and satisfies the refined criteria for gradually-typed languages of Siek et al. We also explain how to extend our approach to richer refinement logics, anticipating key challenges to consider.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Cimini:2017:AGD, author = "Matteo Cimini and Jeremy G. Siek", title = "Automatically generating the dynamic semantics of gradually typed languages", journal = j-SIGPLAN, volume = "52", number = "1", pages = "789--803", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009863", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many language designers have adopted gradual typing. However, there remains open questions regarding how to gradualize languages. Cimini and Siek (2016) created a methodology and algorithm to automatically generate the type system of a gradually typed language from a fully static version of the language. In this paper, we address the next challenge of how to automatically generate the dynamic semantics of gradually typed languages. Such languages typically use an intermediate language with explicit casts. Our first result is a methodology for generating the syntax, type system, and dynamic semantics of the intermediate language with casts. Next, we present an algorithm that formalizes and automates the methodology, given a language definition as input. We show that our approach is general enough to automatically gradualize several languages, including features such as polymorphism, recursive types and exceptions. We prove that our algorithm produces languages that satisfy the key correctness criteria of gradual typing. Finally, we implement the algorithm, generating complete specifications of gradually typed languages in lambda-Prolog, including executable interpreters.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Jafery:2017:SUR, author = "Khurram A. Jafery and Joshua Dunfield", title = "Sums of uncertainty: refinements go gradual", journal = j-SIGPLAN, volume = "52", number = "1", pages = "804--817", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009865", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A long-standing shortcoming of statically typed functional languages is that type checking does not rule out pattern-matching failures (run-time match exceptions). Refinement types distinguish different values of datatypes; if a program annotated with refinements passes type checking, pattern-matching failures become impossible. Unfortunately, refinement is a monolithic property of a type, exacerbating the difficulty of adding refinement types to nontrivial programs. Gradual typing has explored how to incrementally move between static typing and dynamic typing. We develop a type system of gradual sums that combines refinement with imprecision. Then, we develop a bidirectional version of the type system, which rules out excessive imprecision, and give a type-directed translation to a target language with explicit casts. We prove that the static sublanguage cannot have match failures, that a well-typed program remains well-typed if its type annotations are made less precise, and that making annotations less precise causes target programs to fail later. Several of these results correspond to criteria for gradual typing given by Siek et al. (2015).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Ying:2017:IQP, author = "Mingsheng Ying and Shenggang Ying and Xiaodi Wu", title = "Invariants of quantum programs: characterisations and generation", journal = j-SIGPLAN, volume = "52", number = "1", pages = "818--832", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009840", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program invariant is a fundamental notion widely used in program verification and analysis. The aim of this paper is twofold: (i) find an appropriate definition of invariants for quantum programs; and (ii) develop an effective technique of invariant generation for verification and analysis of quantum programs. Interestingly, the notion of invariant can be defined for quantum programs in two different ways --- additive invariants and multiplicative invariants --- corresponding to two interpretations of implication in a continuous valued logic: the Lukasiewicz implication and the Godel implication. It is shown that both of them can be used to establish partial correctness of quantum programs. The problem of generating additive invariants of quantum programs is addressed by reducing it to an SDP (Semidefinite Programming) problem. This approach is applied with an SDP solver to generate invariants of two important quantum algorithms --- quantum walk and quantum Metropolis sampling. Our examples show that the generated invariants can be used to verify correctness of these algorithms and are helpful in optimising quantum Metropolis sampling. To our knowledge, this paper is the first attempt to define the notion of invariant and to develop a method of invariant generation for quantum programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{DalLago:2017:GPC, author = "Ugo {Dal Lago} and Claudia Faggian and Beno{\^\i}t Valiron and Akira Yoshimizu", title = "The geometry of parallelism: classical, probabilistic, and quantum effects", journal = j-SIGPLAN, volume = "52", number = "1", pages = "833--845", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009859", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a Geometry of Interaction model for higher-order quantum computation, and prove its adequacy for a fully fledged quantum programming language in which entanglement, duplication, and recursion are all available. This model is an instance of a new framework which captures not only quantum but also classical and probabilistic computation. Its main feature is the ability to model commutative effects in a parallel setting. Our model comes with a multi-token machine, a proof net system, and a -style language. Being based on a multi-token machine equipped with a memory, it has a concrete nature which makes it well suited for building low-level operational descriptions of higher-order languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Paykin:2017:QCL, author = "Jennifer Paykin and Robert Rand and Steve Zdancewic", title = "{QWIRE}: a core language for quantum circuits", journal = j-SIGPLAN, volume = "52", number = "1", pages = "846--858", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009894", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces QWIRE (``choir''), a language for defining quantum circuits and an interface for manipulating them inside of an arbitrary classical host language. QWIRE is minimal---it contains only a few primitives---and sound with respect to the physical properties entailed by quantum mechanics. At the same time, QWIRE is expressive and highly modular due to its relationship with the host language, mirroring the QRAM model of computation that places a quantum computer (controlled by circuits) alongside a classical computer (controlled by the host language). We present QWIRE along with its type system and operational semantics, which we prove is safe and strongly normalizing whenever the host language is. We give circuits a denotational semantics in terms of density matrices. Throughout, we investigate examples that demonstrate the expressive power of QWIRE, including extensions to the host language that (1) expose a general analysis framework for circuits, and (2) provide dependent types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Amin:2017:LVA, author = "Nada Amin and Tiark Rompf", title = "{LMS-Verify}: abstraction without regret for verified systems programming", journal = j-SIGPLAN, volume = "52", number = "1", pages = "859--873", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009867", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance critical software is almost always developed in C, as programmers do not trust high-level languages to deliver the same reliable performance. This is bad because low-level code in unsafe languages attracts security vulnerabilities and because development is far less productive, with PL advances mostly lost on programmers operating under tight performance constraints. High-level languages provide memory safety out of the box, but they are deemed too slow and unpredictable for serious system software. Recent years have seen a surge in staging and generative programming: the key idea is to use high-level languages and their abstraction power as glorified macro systems to compose code fragments in first-order, potentially domain-specific, intermediate languages, from which fast C can be emitted. But what about security? Since the end result is still C code, the safety guarantees of the high-level host language are lost. In this paper, we extend this generative approach to emit ACSL specifications along with C code. We demonstrate that staging achieves ``abstraction without regret'' for verification: we show how high-level programming models, in particular higher-order composable contracts from dynamic languages, can be used at generation time to compose and generate first-order specifications that can be statically checked by existing tools. We also show how type classes can automatically attach invariants to data types, reducing the need for repetitive manual annotations. We evaluate our system on several case studies that varyingly exercise verification of memory safety, overflow safety, and functional correctness. We feature an HTTP parser that is (1) fast (2) high-level: implemented using staged parser combinators (3) secure: with verified memory safety. This result is significant, as input parsing is a key attack vector, and vulnerabilities related to HTTP parsing have been documented in all widely-used web servers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Assaf:2017:HSA, author = "Mounir Assaf and David A. Naumann and Julien Signoles and {\'E}ric Totel and Fr{\'e}d{\'e}ric Tronel", title = "Hypercollecting semantics and its application to static analysis of information flow", journal = j-SIGPLAN, volume = "52", number = "1", pages = "874--887", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009889", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We show how static analysis for secure information flow can be expressed and proved correct entirely within the framework of abstract interpretation. The key idea is to define a Galois connection that directly approximates the hyperproperty of interest. To enable use of such Galois connections, we introduce a fixpoint characterisation of hypercollecting semantics, i.e. a ``set of sets'' transformer. This makes it possible to systematically derive static analyses for hyperproperties entirely within the calculational framework of abstract interpretation. We evaluate this technique by deriving example static analyses. For qualitative information flow, we derive a dependence analysis similar to the logic of Amtoft and Banerjee (SAS'04) and the type system of Hunt and Sands (POPL'06). For quantitative information flow, we derive a novel cardinality analysis that bounds the leakage conveyed by a program instead of simply deciding whether it exists. This encompasses problems that are hypersafety but not k -safety. We put the framework to use and introduce variations that achieve precision rivalling the most recent and precise static analyses for information flow.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Zhang:2017:LTA, author = "Danfeng Zhang and Daniel Kifer", title = "{LightDP}: towards automating differential privacy proofs", journal = j-SIGPLAN, volume = "52", number = "1", pages = "888--901", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009884", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The growing popularity and adoption of differential privacy in academic and industrial settings has resulted in the development of increasingly sophisticated algorithms for releasing information while preserving privacy. Accompanying this phenomenon is the natural rise in the development and publication of incorrect algorithms, thus demonstrating the necessity of formal verification tools. However, existing formal methods for differential privacy face a dilemma: methods based on customized logics can verify sophisticated algorithms but come with a steep learning curve and significant annotation burden on the programmers, while existing programming platforms lack expressive power for some sophisticated algorithms. In this paper, we present LightDP, a simple imperative language that strikes a better balance between expressive power and usability. The core of LightDP is a novel relational type system that separates relational reasoning from privacy budget calculations. With dependent types, the type system is powerful enough to verify sophisticated algorithms where the composition theorem falls short. In addition, the inference engine of LightDP infers most of the proof details, and even searches for the proof with minimal privacy cost when multiple proofs exist. We show that LightDP verifies sophisticated algorithms with little manual effort.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "POPL '17 conference proceedings.", } @Article{Tallada:2016:CGP, author = "Marc Gonzalez Tallada", title = "Coarse grain parallelization of deep neural networks", journal = j-SIGPLAN, volume = "51", number = "8", pages = "1:1--1:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deep neural networks (DNN) have recently achieved extraordinary results in domains like computer vision and speech recognition. An essential element for this success has been the introduction of high performance computing (HPC) techniques in the critical step of training the neural network. This paper describes the implementation and analysis of a network-agnostic and convergence-invariant coarse-grain parallelization of the DNN training algorithm. The coarse-grain parallelization is achieved through the exploitation of the batch-level parallelism. This strategy is independent from the support of specialized and optimized libraries. Therefore, the optimization is immediately available for accelerating the DNN training. The proposal is compatible with multi-GPU execution without altering the algorithm convergence rate. The parallelization has been implemented in Caffe, a state-of-the-art DNN framework. The paper describes the code transformations for the parallelization and we also identify the limiting performance factors of the approach. We show competitive performance results for two state-of-the-art computer vision datasets, MNIST and CIFAR-10. In particular, on a 16-core Xeon E5-2667v2 at 3.30GHz we observe speedups of 8$ \times $ over the sequential execution, at similar performance levels of those obtained by the GPU optimized Caffe version in a NVIDIA K40 GPU.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Wang:2016:HPM, author = "Xiao Wang and Amit Sabne and Sherman Kisner and Anand Raghunathan and Charles Bouman and Samuel Midkiff", title = "High performance model based image reconstruction", journal = j-SIGPLAN, volume = "51", number = "8", pages = "2:1--2:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851163", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computed Tomography (CT) Image Reconstruction is an important technique used in a wide range of applications, ranging from explosive detection, medical imaging to scientific imaging. Among available reconstruction methods, Model Based Iterative Reconstruction (MBIR) produces higher quality images and allows for the use of more general CT scanner geometries than is possible with more commonly used methods. The high computational cost of MBIR, however, often makes it impractical in applications for which it would otherwise be ideal. This paper describes a new MBIR implementation that significantly reduces the computational cost of MBIR while retaining its benefits. It describes a novel organization of the scanner data into super-voxels (SV) that, combined with a super-voxel buffer (SVB), dramatically increase locality and prefetching, enable parallelism across SVs and lead to an average speedup of 187 on 20 cores.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Agrawal:2016:EAE, author = "Sandeep R. Agrawal and Christopher M. Dee and Alvin R. Lebeck", title = "Exploiting accelerators for efficient high dimensional similarity search", journal = j-SIGPLAN, volume = "51", number = "8", pages = "3:1--3:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851144", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Similarity search finds the most similar matches in an object collection for a given query; making it an important problem across a wide range of disciplines such as web search, image recognition and protein sequencing. Practical implementations of High Dimensional Similarity Search (HDSS) search across billions of possible solutions for multiple queries in real time, making its performance and efficiency a significant challenge. Existing clusters and datacenters use commercial multicore hardware to perform search, which may not provide the optimal performance and performance per Watt. This work explores the performance, power and cost benefits of using throughput accelerators like GPUs to perform similarity search for query cohorts even under tight deadlines. We propose optimized implementations of similarity search for both the host and the accelerator. Augmenting existing Xeon servers with accelerators results in a 3$ \times $ improvement in throughput per machine, resulting in a more than 2.5$ \times $ reduction in cost of ownership, even for discounted Xeon servers. Replacing a Xeon based cluster with an accelerator based cluster for similarity search reduces the total cost of ownership by more than 6$ \times $ to 16$ \times $ while consuming significantly less power than an ARM based cluster.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Cruz:2016:DCG, author = "Flavio Cruz and Ricardo Rocha and Seth Copen Goldstein", title = "Declarative coordination of graph-based parallel programs", journal = j-SIGPLAN, volume = "51", number = "8", pages = "4:1--4:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851153", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Declarative programming has been hailed as a promising approach to parallel programming since it makes it easier to reason about programs while hiding the implementation details of parallelism from the programmer. However, its advantage is also its disadvantage as it leaves the programmer with no straightforward way to optimize programs for performance. In this paper, we introduce Coordinated Linear Meld (CLM), a concurrent forward-chaining linear logic programming language, with a declarative way to coordinate the execution of parallel programs allowing the programmer to specify arbitrary scheduling and data partitioning policies. Our approach allows the programmer to write graph-based declarative programs and then optionally to use coordination to fine-tune parallel performance. In this paper we specify the set of coordination facts, discuss their implementation in a parallel virtual machine, and show---through example---how they can be used to optimize parallel execution. We compare the performance of CLM programs against the original uncoordinated Linear Meld and several other frameworks.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Denniston:2016:DH, author = "Tyler Denniston and Shoaib Kamil and Saman Amarasinghe", title = "Distributed {Halide}", journal = j-SIGPLAN, volume = "51", number = "8", pages = "5:1--5:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851157", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many image processing tasks are naturally expressed as a pipeline of small computational kernels known as stencils. Halide is a popular domain-specific language and compiler designed to implement image processing algorithms. Halide uses simple language constructs to express what to compute and a separate scheduling co-language for expressing when and where to perform the computation. This approach has demonstrated performance comparable to or better than hand-optimized code. Until now, however, Halide has been restricted to parallel shared memory execution, limiting its performance for memory-bandwidth-bound pipelines or large-scale image processing tasks. We present an extension to Halide to support distributed-memory parallel execution of complex stencil pipelines. These extensions compose with the existing scheduling constructs in Halide, allowing expression of complex computation and communication strategies. Existing Halide applications can be distributed with minimal changes, allowing programmers to explore the tradeoff between recomputation and communication with little effort. Approximately 10 new of lines code are needed even for a 200 line, 99 stage application. On nine image processing benchmarks, our extensions give up to a 1.4$ \times $ speedup on a single node over regular multithreaded execution with the same number of cores, by mitigating the effects of non-uniform memory access. The distributed benchmarks achieve up to 18$ \times $ speedup on a 16 node testing machine and up to 57$ \times $ speedup on 64 nodes of the NERSC Cori supercomputer.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Newton:2016:PTC, author = "Ryan R. Newton and {\"O}mer S. Agacan and Peter Fogg and Sam Tobin-Hochstadt", title = "Parallel type-checking with {Haskell} using saturating {LVars} and stream generators", journal = j-SIGPLAN, volume = "51", number = "8", pages = "6:1--6:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851142", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Given the sophistication of recent type systems, unification-based type-checking and inference can be a time-consuming phase of compilation---especially when union types are combined with subtyping. It is natural to consider improving performance through parallelism, but these algorithms are challenging to parallelize due to complicated control structure and difficulties representing data in a way that is both efficient and supports concurrency. We provide techniques that address these problems based on the LVish approach to deterministic-by-default parallel programming. We extend LVish with Saturating LVars, the first LVars implemented to release memory during the object's lifetime. Our design allows us to achieve a parallel speedup on worst-case (exponential) inputs of Hindley-Milner inference, and on the Typed Racket type-checking algorithm, which yields up an 8.46$ \times $ parallel speedup on 14 cores for type-checking examples drawn from the Racket repository.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Wang:2016:APG, author = "Lei Wang and Fan Yang and Liangji Zhuang and Huimin Cui and Fang Lv and Xiaobing Feng", title = "Articulation points guided redundancy elimination for betweenness centrality", journal = j-SIGPLAN, volume = "51", number = "8", pages = "7:1--7:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851154", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Betweenness centrality (BC) is an important metrics in graph analysis which indicates critical vertices in large-scale networks based on shortest path enumeration. Typically, a BC algorithm constructs a shortest-path DAG for each vertex to calculate its BC score. However, for emerging real-world graphs, even the state-of-the-art BC algorithm will introduce a number of redundancies, as suggested by the existence of articulation points. Articulation points imply some common sub-DAGs in the DAGs for different vertices, but existing algorithms do not leverage such information and miss the optimization opportunity. We propose a redundancy elimination approach, which identifies the common sub-DAGs shared between the DAGs for different vertices. Our approach leverages the articulation points and reuses the results of the common sub-DAGs in calculating the BC scores, which eliminates redundant computations. We implemented the approach as an algorithm with two-level parallelism and evaluated it on a multicore platform. Compared to the state-of-the-art implementation using shared memory, our approach achieves an average speedup of 4.6x across a variety of real-world graphs, with the traversal rates up to 45 ~ 2400 MTEPS (Millions of Traversed Edges per Second).", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Bloemen:2016:MCF, author = "Vincent Bloemen and Alfons Laarman and Jaco van de Pol", title = "Multi-core on-the-fly {SCC} decomposition", journal = j-SIGPLAN, volume = "51", number = "8", pages = "8:1--8:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851161", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The main advantages of Tarjan's strongly connected component (SCC) algorithm are its linear time complexity and ability to return SCCs on-the-fly, while traversing or even generating the graph. Until now, most parallel SCC algorithms sacrifice both: they run in quadratic worst-case time and/or require the full graph in advance. The current paper presents a novel parallel, on-the-fly SCC algorithm. It preserves the linear-time property by letting workers explore the graph randomly while carefully communicating partially completed SCCs. We prove that this strategy is correct. For efficiently communicating partial SCCs, we develop a concurrent, iterable disjoint set structure (combining the union-find data structure with a cyclic list). We demonstrate scalability on a 64-core machine using 75 real-world graphs (from model checking and explicit data graphs), synthetic graphs (combinations of trees, cycles and linear graphs), and random graphs. Previous work did not show speedups for graphs containing a large SCC. We observe that our parallel algorithm is typically 10-30$ \times $ faster compared to Tarjan's algorithm for graphs containing a large SCC. Comparable performance (with respect to the current state-of-the-art) is obtained for graphs containing many small SCCs.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Kannan:2016:HPP, author = "Ramakrishnan Kannan and Grey Ballard and Haesun Park", title = "A high-performance parallel algorithm for nonnegative matrix factorization", journal = j-SIGPLAN, volume = "51", number = "8", pages = "9:1--9:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851152", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-negative matrix factorization (NMF) is the problem of determining two non-negative low rank factors W and H, for the given input matrix A, such that A \approx WH. NMF is a useful tool for many applications in different domains such as topic modeling in text mining, background separation in video analysis, and community detection in social networks. Despite its popularity in the data mining community, there is a lack of efficient distributed algorithms to solve the problem for big data sets. We propose a high-performance distributed-memory parallel algorithm that computes the factorization by iteratively solving alternating non-negative least squares (NLS) subproblems for W and H. It maintains the data and factor matrices in memory (distributed across processors), uses MPI for interprocessor communication, and, in the dense case, provably minimizes communication costs (under mild assumptions). As opposed to previous implementations, our algorithm is also flexible: (1) it performs well for both dense and sparse matrices, and (2) it allows the user to choose any one of the multiple algorithms for solving the updates to low rank factors W and H within the alternating iterations. We demonstrate the scalability of our algorithm and compare it with baseline implementations, showing significant performance improvements.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Chowdhury:2016:AAD, author = "Rezaul Chowdhury and Pramod Ganapathi and Jesmin Jahan Tithi and Charles Bachmeier and Bradley C. Kuszmaul and Charles E. Leiserson and Armando Solar-Lezama and Yuan Tang", title = "{AUTOGEN}: automatic discovery of cache-oblivious parallel recursive algorithms for solving dynamic programs", journal = j-SIGPLAN, volume = "51", number = "8", pages = "10:1--10:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851167", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present AUTOGEN---an algorithm that for a wide class of dynamic programming (DP) problems automatically discovers highly efficient cache-oblivious parallel recursive divide-and-conquer algorithms from inefficient iterative descriptions of DP recurrences. AUTOGEN analyzes the set of DP table locations accessed by the iterative algorithm when run on a DP table of small size, and automatically identifies a recursive access pattern and a corresponding provably correct recursive algorithm for solving the DP recurrence. We use AUTOGEN to autodiscover efficient algorithms for several well-known problems. Our experimental results show that several autodiscovered algorithms significantly outperform parallel looping and tiled loop-based algorithms. Also these algorithms are less sensitive to fluctuations of memory and bandwidth compared with their looping counterparts, and their running times and energy profiles remain relatively more stable. To the best of our knowledge, AUTOGEN is the first algorithm that can automatically discover new nontrivial divide-and-conquer algorithms.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Wang:2016:GHP, author = "Yangzihao Wang and Andrew Davidson and Yuechao Pan and Yuduo Wu and Andy Riffel and John D. Owens", title = "{Gunrock}: a high-performance graph processing library on the {GPU}", journal = j-SIGPLAN, volume = "51", number = "8", pages = "11:1--11:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851145", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For large-scale graph analytics on the GPU, the irregularity of data access/control flow and the complexity of programming GPUs have been two significant challenges for developing a programmable high-performance graph library. ``Gunrock,'' our high-level bulk-synchronous graph-processing system targeting the GPU, takes a new approach to abstracting GPU graph analytics: rather than designing an abstraction around computation, Gunrock instead implements a novel data-centric abstraction centered on operations on a vertex or edge frontier. Gunrock achieves a balance between performance and expressiveness by coupling high-performance GPU computing primitives and optimization strategies with a high-level programming model that allows programmers to quickly develop new graph primitives with small code size and minimal GPU programming knowledge. We evaluate Gunrock on five graph primitives (BFS, BC, SSSP, CC, and PageRank) and show that Gunrock has on average at least an order of magnitude speedup over Boost and PowerGraph, comparable performance to the fastest GPU hardwired primitives, and better performance than any other GPU high-level graph library.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Ashkiani:2016:GM, author = "Saman Ashkiani and Andrew Davidson and Ulrich Meyer and John D. Owens", title = "{GPU} multisplit", journal = j-SIGPLAN, volume = "51", number = "8", pages = "12:1--12:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851169", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multisplit is a broadly useful parallel primitive that permutes its input data into contiguous buckets or bins, where the function that categorizes an element into a bucket is provided by the programmer. Due to the lack of an efficient multisplit on GPUs, programmers often choose to implement multisplit with a sort. However, sort does more work than necessary to implement multisplit, and is thus inefficient. In this work, we provide a parallel model and multiple implementations for the multisplit problem. Our principal focus is multisplit for a small number of buckets. In our implementations, we exploit the computational hierarchy of the GPU to perform most of the work locally, with minimal usage of global operations. We also use warp-synchronous programming models to avoid branch divergence and reduce memory usage, as well as hierarchical reordering of input elements to achieve better coalescing of global memory accesses. On an NVIDIA K40c GPU, for key-only (key-value) multisplit, we demonstrate a 3.0-6.7x (4.4-8.0x) speedup over radix sort, and achieve a peak throughput of 10.0 G keys/s.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Matteis:2016:KCR, author = "Tiziano {De Matteis} and Gabriele Mencagli", title = "Keep calm and react with foresight: strategies for low-latency and energy-efficient elastic data stream processing", journal = j-SIGPLAN, volume = "51", number = "8", pages = "13:1--13:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851148", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper addresses the problem of designing scaling strategies for elastic data stream processing. Elasticity allows applications to rapidly change their configuration on-the-fly (e.g., the amount of used resources) in response to dynamic workload fluctuations. In this work we face this problem by adopting the Model Predictive Control technique, a control-theoretic method aimed at finding the optimal application configuration along a limited prediction horizon in the future by solving an online optimization problem. Our control strategies are designed to address latency constraints, using Queueing Theory models, and energy consumption by changing the number of used cores and the CPU frequency through the Dynamic Voltage and Frequency Scaling (DVFS) support available in the modern multicore CPUs. The proactive capabilities, in addition to the latency- and energy-awareness, represent the novel features of our approach. To validate our methodology, we develop a thorough set of experiments on a high-frequency trading application. The results demonstrate the high-degree of flexibility and configurability of our approach, and show the effectiveness of our elastic scaling strategies compared with existing state-of-the-art techniques used in similar scenarios.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Li:2016:WSI, author = "Jing Li and Kunal Agrawal and Sameh Elnikety and Yuxiong He and I-Ting Angelina Lee and Chenyang Lu and Kathryn S. McKinley", title = "Work stealing for interactive services to meet target latency", journal = j-SIGPLAN, volume = "51", number = "8", pages = "14:1--14:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851151", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interactive web services increasingly drive critical business workloads such as search, advertising, games, shopping, and finance. Whereas optimizing parallel programs and distributed server systems have historically focused on average latency and throughput, the primary metric for interactive applications is instead consistent responsiveness, i.e., minimizing the number of requests that miss a target latency. This paper is the first to show how to generalize work-stealing, which is traditionally used to minimize the makespan of a single parallel job, to optimize for a target latency in interactive services with multiple parallel requests. We design a new adaptive work stealing policy, called tail-control, that reduces the number of requests that miss a target latency. It uses instantaneous request progress, system load, and a target latency to choose when to parallelize requests with stealing, when to admit new requests, and when to limit parallelism of large requests. We implement this approach in the Intel Thread Building Block (TBB) library and evaluate it on real-world workloads and synthetic workloads. The tail-control policy substantially reduces the number of requests exceeding the desired target latency and delivers up to 58\% relative improvement over various baseline policies. This generalization of work stealing for multiple requests effectively optimizes the number of requests that complete within a target latency, a key metric for interactive services.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Steele:2016:AAC, author = "Guy L. {Steele, Jr.} and Jean-Baptiste Tristan", title = "Adding approximate counters", journal = j-SIGPLAN, volume = "51", number = "8", pages = "15:1--15:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851147", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a general framework for adding the values of two approximate counters to produce a new approximate counter value whose expected estimated value is equal to the sum of the expected estimated values of the given approximate counters. (To the best of our knowledge, this is the first published description of any algorithm for adding two approximate counters.) We then work out implementation details for five different kinds of approximate counter and provide optimized pseudocode. For three of them, we present proofs that the variance of a counter value produced by adding two counter values in this way is bounded, and in fact is no worse, or not much worse, than the variance of the value of a single counter to which the same total number of increment operations have been applied. Addition of approximate counters is useful in massively parallel divide-and-conquer algorithms that use a distributed representation for large arrays of counters. We describe two machine-learning algorithms for topic modeling that use millions of integer counters, and confirm that replacing the integer counters with approximate counters is effective, speeding up a GPU-based implementation by over 65\% and a CPU-based by nearly 50\%, as well as reducing memory requirements, without degrading their statistical effectiveness.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Yang:2016:WFQ, author = "Chaoran Yang and John Mellor-Crummey", title = "A wait-free queue as fast as fetch-and-add", journal = j-SIGPLAN, volume = "51", number = "8", pages = "16:1--16:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851168", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent data structures that have fast and predictable performance are of critical importance for harnessing the power of multicore processors, which are now ubiquitous. Although wait-free objects, whose operations complete in a bounded number of steps, were devised more than two decades ago, wait-free objects that can deliver scalable high performance are still rare. In this paper, we present the first wait-free FIFO queue based on fetch-and-add (FAA). While compare-and-swap (CAS) based non-blocking algorithms may perform poorly due to work wasted by CAS failures, algorithms that coordinate using FAA, which is guaranteed to succeed, can in principle perform better under high contention. Along with FAA, our queue uses a custom epoch-based scheme to reclaim memory; on x86 architectures, it requires no extra memory fences on our algorithm's typical execution path. An empirical study of our new FAA-based wait-free FIFO queue under high contention on four different architectures with many hardware threads shows that it outperforms prior queue designs that lack a wait-free progress guarantee. Surprisingly, at the highest level of contention, the throughput of our queue is often as high as that of a microbenchmark that only performs FAA. As a result, our fast wait-free queue implementation is useful in practice on most multi-core systems today. We believe that our design can serve as an example of how to construct other fast wait-free objects.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Haider:2016:LRA, author = "Syed Kamran Haider and William Hasenplaugh and Dan Alistarh", title = "Lease\slash release: architectural support for scaling contended data structures", journal = j-SIGPLAN, volume = "51", number = "8", pages = "17:1--17:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851155", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "High memory contention is generally agreed to be a worst-case scenario for concurrent data structures. There has been a significant amount of research effort spent investigating designs which minimize contention, and several programming techniques have been proposed to mitigate its effects. However, there are currently few architectural mechanisms to allow scaling contended data structures at high thread counts. In this paper, we investigate hardware support for scalable contended data structures. We propose Lease/Release, a simple addition to standard directory-based MSI cache coherence protocols, allowing participants to lease memory, at the granularity of cache lines, by delaying coherence messages for a short, bounded period of time. Our analysis shows that Lease/Release can significantly reduce the overheads of contention for both non-blocking (lock-free) and lock-based data structure implementations, while ensuring that no deadlocks are introduced. We validate Lease/Release empirically on the Graphite multiprocessor simulator, on a range of data structures, including queue, stack, and priority queue implementations, as well as on transactional applications. Results show that Lease/Release consistently improves both throughput and energy usage, by up to 5x, both for lock-free and lock-based data structure designs.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Guerraoui:2016:OCO, author = "Rachid Guerraoui and Vasileios Trigonakis", title = "Optimistic concurrency with {OPTIK}", journal = j-SIGPLAN, volume = "51", number = "8", pages = "18:1--18:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851146", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce OPTIK, a new practical design pattern for designing and implementing fast and scalable concurrent data structures. OPTIK relies on the commonly-used technique of version numbers for detecting conflicting concurrent operations. We show how to implement the OPTIK pattern using the novel concept of OPTIK locks. These locks enable the use of version numbers for implementing very efficient optimistic concurrent data structures. Existing state-of-the-art lock-based data structures acquire the lock and then check for conflicts. In contrast, with OPTIK locks, we merge the lock acquisition with the detection of conflicting concurrency in a single atomic step, similarly to lock-free algorithms. We illustrate the power of our OPTIK pattern and its implementation by introducing four new algorithms and by optimizing four state-of-the-art algorithms for linked lists, skip lists, hash tables, and queues. Our results show that concurrent data structures built using OPTIK are more scalable than the state of the art.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Dice:2016:RTL, author = "Dave Dice and Alex Kogan and Yossi Lev", title = "Refined transactional lock elision", journal = j-SIGPLAN, volume = "51", number = "8", pages = "19:1--19:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851162", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional lock elision (TLE) is a well-known technique that exploits hardware transactional memory (HTM) to introduce concurrency into lock-based software. It achieves that by attempting to execute a critical section protected by a lock in an atomic hardware transaction, reverting to the lock if these attempts fail. One significant drawback of TLE is that it disables hardware speculation once there is a thread running under lock. In this paper we present two algorithms that rely on existing compiler support for transactional programs and allow threads to speculate concurrently on HTM along with a thread holding the lock. We demonstrate the benefit of our algorithms over TLE and other related approaches with an in-depth analysis of a number of benchmarks and a wide range of workloads, including an AVL tree-based micro-benchmark and ccTSA, a real sequence assembler application.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Cao:2016:DBG, author = "Man Cao and Minjia Zhang and Aritra Sengupta and Michael D. Bond", title = "Drinking from both glasses: combining pessimistic and optimistic tracking of cross-thread dependences", journal = j-SIGPLAN, volume = "51", number = "8", pages = "20:1--20:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851143", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is notoriously challenging to develop parallel software systems that are both scalable and correct. Runtime support for parallelism---such as multithreaded record {\&} replay, data race detectors, transactional memory, and enforcement of stronger memory models---helps achieve these goals, but existing commodity solutions slow programs substantially in order to track (i.e., detect or control) an execution's cross-thread dependences accurately. Prior work tracks cross-thread dependences either ``pessimistically,'' slowing every program access, or ``optimistically,'' allowing for lightweight instrumentation of most accesses but dramatically slowing accesses involved in cross-thread dependences. This paper seeks to hybridize pessimistic and optimistic tracking, which is challenging because there exists a fundamental mismatch between pessimistic and optimistic tracking. We address this challenge based on insights about how dependence tracking and program synchronization interact, and introduce a novel approach called hybrid tracking. Hybrid tracking is suitable for building efficient runtime support, which we demonstrate by building hybrid-tracking-based versions of a dependence recorder and a region serializability enforcer. An adaptive, profile-based policy makes runtime decisions about switching between pessimistic and optimistic tracking. Our evaluation shows that hybrid tracking enables runtime support to overcome the performance limitations of both pessimistic and optimistic tracking alone.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Wang:2016:MGM, author = "Tianzheng Wang and Milind Chabbi and Hideaki Kimura", title = "Be my guest: {MCS} lock now welcomes guests", journal = j-SIGPLAN, volume = "51", number = "8", pages = "21:1--21:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The MCS lock is one of the most prevalent queuing locks. It provides fair scheduling and high performance on massively parallel systems. However, the MCS lock mandates a bring-your-own-context policy: each lock user must provide an additional context (i.e., a queue node) to interact with the lock. This paper proposes MCSg, a variant of the MCS lock that relaxes this restriction. Our key observation is that not all lock users are created equal. We analyzed how locks are used in massively-parallel modern systems, such as NUMA-aware operating systems and databases. We found that such systems often have a small number of ``regular'' code paths that enter the lock very frequently. Such code paths are the primary beneficiary of the high scalability of MCS locks. However, there are also many ``guest'' code paths that infrequently enter the lock and do not need the same degree of fairness to access the lock (e.g., background tasks that only run periodically with lower priority). These guest users, which are typically spread out in various modules of the software, prefer context-free locks, such as ticket locks. MCSg provides these guests a context-free interface while regular users still enjoy the benefits provided by MCS. It can also be used as a drop-in replacement of MCS for more advanced locks, such as cohort locking. We also propose MCSg++, an extended version of MCSg, which avoids guest starvation and non-FIFO behaviors that might happen with MCSg. Our evaluation using microbenchmarks and the TPC-C database benchmark on a 16-socket, 240-core server shows that both MCSg and MCSg++ preserve the benefits of MCS for regular users while providing a context-free interface for guests.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Chabbi:2016:CCL, author = "Milind Chabbi and John Mellor-Crummey", title = "Contention-conscious, locality-preserving locks", journal = j-SIGPLAN, volume = "51", number = "8", pages = "22:1--22:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851166", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the last decade, the growing use of cache-coherent NUMA architectures has spurred the development of numerous locality-preserving mutual exclusion algorithms. NUMA-aware locks such as HCLH, HMCS, and cohort locks exploit locality of reference among nearby threads to deliver high lock throughput under high contention. However, the hierarchical nature of these locality-aware locks increases latency, which reduces the throughput of uncontended or lightly-contended critical sections. To date, no lock design for NUMA systems has delivered both low latency under low contention and high throughput under high contention. In this paper, we describe the design and evaluation of an adaptive mutual exclusion scheme (AHMCS lock), which employs several orthogonal strategies---a hierarchical MCS (HMCS) lock for high throughput under high contention, Lamport's fast path approach for low latency under low contention, an adaptation mechanism that employs hysteresis to balance latency and throughput under moderate contention, and hardware transactional memory for lowest latency in the absence of contention. The result is a top performing lock that has most properties of an ideal mutual exclusion algorithm. AHMCS exploits the strengths of multiple contention management techniques to deliver high performance over a broad range of contention levels. Our empirical evaluations demonstrate the effectiveness of AHMCS over prior art.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Kalikar:2016:DNM, author = "Saurabh Kalikar and Rupesh Nasre", title = "{DomLock}: a new multi-granularity locking technique for hierarchies", journal = j-SIGPLAN, volume = "51", number = "8", pages = "23:1--23:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851164", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present efficient locking mechanisms for hierarchical data structures. Several applications work on an abstract hierarchy of objects, and a parallel execution on this hierarchy necessitates synchronization across workers operating on different parts of the hierarchy. Existing synchronization mechanisms are either too coarse, too inefficient, or too ad hoc, resulting in reduced or unpredictable amount of concurrency. We propose a new locking approach based on the structural properties of the underlying hierarchy. We show that the developed techniques are efficient even when the hierarchy is an arbitrary graph, and are applicable even when the hierarchy involves mutation. Theoretically, we present our approach as a locking-cost-minimizing instance of a generic algebraic model of synchronization for hierarchical data structures. Using STMBench7, we illustrate considerable reduction in the locking cost, resulting in an average throughput improvement of 42\%.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Ritson:2016:BWM, author = "Carl G. Ritson and Scott Owens", title = "Benchmarking weak memory models", journal = j-SIGPLAN, volume = "51", number = "8", pages = "24:1--24:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851150", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To achieve good multi-core performance, modern microprocessors have weak memory models, rather than enforce sequential consistency. This gives the programmer a wide scope for choosing exactly how to implement various aspects of inter-thread communication through the system's shared memory. However, these choices come with both semantic and performance consequences, often in tension with each other. In this paper, we focus on the performance side, and define techniques for evaluating the impact of various choices in using weak memory models, such as where to put fences, and which fences to use. We make no attempt to judge certain strategies as best or most efficient, and instead provide the techniques that will allow the programmer to understand the performance implications when identifying and resolving any semantic/performance trade-offs. In particular, our technique supports the reasoned selection of macrobenchmarks to use in investigating trade-offs in using weak memory models. We demonstrate our technique on both synthetic benchmarks and real-world applications for the Linux Kernel and OpenJDK Hotspot Virtual Machine on the ARMv8 and POWERv7 architectures.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Narayanaswamy:2016:VCA, author = "Ganesh Narayanaswamy and Saurabh Joshi and Daniel Kroening", title = "The virtues of conflict: analysing modern concurrency", journal = j-SIGPLAN, volume = "51", number = "8", pages = "25:1--25:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851165", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern shared memory multiprocessors permit reordering of memory operations for performance reasons. These reorderings are often a source of subtle bugs in programs written for such architectures. Traditional approaches to verify weak memory programs often rely on interleaving semantics, which is prone to state space explosion, and thus severely limits the scalability of the analysis. In recent times, there has been a renewed interest in modelling dynamic executions of weak memory programs using partial orders. However, such an approach typically requires ad-hoc mechanisms to correctly capture the data and control-flow choices/conflicts present in real-world programs. In this work, we propose a novel, conflict-aware, composable, truly concurrent semantics for programs written using C/C++ for modern weak memory architectures. We exploit our symbolic semantics based on general event structures to build an efficient decision procedure that detects assertion violations in bounded multi-threaded programs. Using a large, representative set of benchmarks, we show that our conflict-aware semantics outperforms the state-of-the-art partial-order based approaches.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Perrin:2016:CCB, author = "Matthieu Perrin and Achour Mostefaoui and Claude Jard", title = "Causal consistency: beyond memory", journal = j-SIGPLAN, volume = "51", number = "8", pages = "26:1--26:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851170", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In distributed systems where strong consistency is costly when not impossible, causal consistency provides a valuable abstraction to represent program executions as partial orders. In addition to the sequential program order of each computing entity, causal order also contains the semantic links between the events that affect the shared objects --- messages emission and reception in a communication channel, reads and writes on a shared register. Usual approaches based on semantic links are very difficult to adapt to other data types such as queues or counters because they require a specific analysis of causal dependencies for each data type. This paper presents a new approach to define causal consistency for any abstract data type based on sequential specifications. It explores, formalizes and studies the differences between three variations of causal consistency and highlights them in the light of PRAM, eventual consistency and sequential consistency: weak causal consistency, that captures the notion of causality preservation when focusing on convergence; causal convergence that mixes weak causal consistency and convergence; and causal consistency, that coincides with causal memory when applied to shared memory.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Chatzopoulos:2016:EES, author = "Georgios Chatzopoulos and Aleksandar Dragojevi{\'c} and Rachid Guerraoui", title = "{ESTIMA}: extrapolating scalability of in-memory applications", journal = j-SIGPLAN, volume = "51", number = "8", pages = "27:1--27:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851159", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents ESTIMA, an easy-to-use tool for extrapolating the scalability of in-memory applications. ESTIMA is designed to perform a simple, yet important task: given the performance of an application on a small machine with a handful of cores, ESTIMA extrapolates its scalability to a larger machine with more cores, while requiring minimum input from the user. The key idea underlying ESTIMA is the use of stalled cycles (e.g. cycles that the processor spends waiting for various events, such as cache misses or waiting on a lock). ESTIMA measures stalled cycles on a few cores and extrapolates them to more cores, estimating the amount of waiting in the system. ESTIMA can be effectively used to predict the scalability of in-memory applications. For instance, using measurements of memcached and SQLite on a desktop machine, we obtain accurate predictions of their scalability on a server. Our extensive evaluation on a large number of in-memory benchmarks shows that ESTIMA has generally low prediction errors.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Muddukrishna:2016:GGO, author = "Ananya Muddukrishna and Peter A. Jonsson and Artur Podobas and Mats Brorsson", title = "Grain graphs: {OpenMP} performance analysis made easy", journal = j-SIGPLAN, volume = "51", number = "8", pages = "28:1--28:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851156", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Average programmers struggle to solve performance problems in OpenMP programs with tasks and parallel for-loops. Existing performance analysis tools visualize OpenMP task performance from the runtime system's perspective where task execution is interleaved with other tasks in an unpredictable order. Problems with OpenMP parallel for-loops are similarly difficult to resolve since tools only visualize aggregate thread-level statistics such as load imbalance without zooming into a per-chunk granularity. The runtime system/threads oriented visualization provides poor support for understanding problems with task and chunk execution time, parallelism, and memory hierarchy utilization, forcing average programmers to rely on experts or use tedious trial-and-error tuning methods for performance. We present grain graphs, a new OpenMP performance analysis method that visualizes grains --- computation performed by a task or a parallel for-loop chunk instance --- and highlights problems such as low parallelism, work inflation and poor parallelization benefit at the grain level. We demonstrate that grain graphs can quickly reveal performance problems that are difficult to detect and characterize in fine detail using existing visualizations in standard OpenMP programs, simplifying OpenMP performance analysis. This enables average programmers to make portable optimizations for poor performing OpenMP programs, reducing pressure on experts and removing the need for tedious trial-and-error tuning.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Machado:2016:PGC, author = "Nuno Machado and Brandon Lucia and Lu{\'\i}s Rodrigues", title = "Production-guided concurrency debugging", journal = j-SIGPLAN, volume = "51", number = "8", pages = "29:1--29:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851149", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrency bugs that stem from schedule-dependent branches are hard to understand and debug, because their root causes imply not only different event orderings, but also changes in the control-flow between failing and non-failing executions. We present Cortex: a system that helps exposing and understanding concurrency bugs that result from schedule-dependent branches, without relying on information from failing executions. Cortex preemptively exposes failing executions by perturbing the order of events and control-flow behavior in non-failing schedules from production runs of a program. By leveraging this information from production runs, Cortex synthesizes executions to guide the search for failing schedules. Production-guided search helps cope with the large execution search space by targeting failing executions that are similar to observed non-failing executions. Evaluation on popular benchmarks shows that Cortex is able to expose failing schedules with only a few perturbations to non-failing executions, and takes a practical amount of time.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Farooqui:2016:AAW, author = "Naila Farooqui and Rajkishore Barik and Brian T. Lewis and Tatiana Shpeisman and Karsten Schwan", title = "Affinity-aware work-stealing for integrated {CPU--GPU} processors", journal = j-SIGPLAN, volume = "51", number = "8", pages = "30:1--30:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent integrated CPU-GPU processors like Intel's Broadwell and AMD's Kaveri support hardware CPU-GPU shared virtual memory, atomic operations, and memory coherency. This enables fine-grained CPU-GPU work-stealing, but architectural differences between the CPU and GPU hurt the performance of traditionally-implemented work-stealing on such processors. These architectural differences include different clock frequencies, atomic operation costs, and cache and shared memory latencies. This paper describes a preliminary implementation of our work-stealing scheduler, Libra, which includes techniques to deal with these architectural differences in integrated CPU-GPU processors. Libra's affinity-aware techniques achieve significant performance gains over classically-implemented work-stealing. We show preliminary results using a diverse set of nine regular and irregular workloads running on an Intel Broadwell Core-M processor. Libra currently achieves up to a 2$ \times $ performance improvement over classical work-stealing, with a 20\% average improvement.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Gindraud:2016:ICM, author = "Fran{\c{c}}ois Gindraud and Fabrice Rastello and Albert Cohen and Fran{\c{c}}ois Broquedis", title = "An interval constrained memory allocator for the {Givy} {GAS} runtime", journal = j-SIGPLAN, volume = "51", number = "8", pages = "31:1--31:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851195", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The shared memory model helps parallel programming productivity, but it also has a high hardware cost and imposes scalability constraints. Ultimately, higher performance will use distributed memories, which scales better but requires programmers to manually transfer data between local memories, which is a complex task. Distributed memories are also more energy efficient than shared memories, and are used in a family of embedded computing solutions called multi processor system on chip (MPSoC).", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Chang:2016:PSF, author = "Li-Wen Chang and Izzat {El Hajj} and Hee-Seok Kim and Juan G{\'o}mez-Luna and Abdul Dakkak and Wen-mei Hwu", title = "A programming system for future proofing performance critical libraries", journal = j-SIGPLAN, volume = "51", number = "8", pages = "32:1--32:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851178", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Tangram, a programming system for writing performance-portable programs. The language enables programmers to write computation and composition codelets, supported by tuning knobs and primitives for expressing data parallelism and work decomposition. The compiler and runtime use a set of techniques such as hierarchical composition, coarsening, data placement, tuning, and runtime selection based on input characteristics and micro-profiling. The resulting performance is competitive with optimized vendor libraries.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Nielsen:2016:SLF, author = "Jesper Puge Nielsen and Sven Karlsson", title = "A scalable lock-free hash table with open addressing", journal = j-SIGPLAN, volume = "51", number = "8", pages = "33:1--33:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851196", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent data structures synchronized with locks do not scale well with the number of threads. As more scalable alternatives, concurrent data structures and algorithms based on widely available, however advanced, atomic operations have been proposed. These data structures allow for correct and concurrent operations without any locks. In this paper, we present a new fully lock-free open addressed hash table with a simpler design than prior published work. We split hash table insertions into two atomic phases: first inserting a value ignoring other concurrent operations, then in the second phase resolve any duplicate or conflicting values. Our hash table has a constant and low memory usage that is less than existing lock-free hash tables at a fill level of 33\% and above. The hash table exhibits good cache locality. Compared to prior art, our hash table results in 16\% and 15\% fewer L1 and L2 cache misses respectively, leading to 21\% fewer memory stall cycles. Our experiments show that our hash table scales close to linearly with the number of threads and outperforms, in throughput, other lock-free hash tables by 19\%.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Maier:2016:CHT, author = "Tobias Maier and Peter Sanders and Roman Dementiev", title = "Concurrent hash tables: fast and general?(!)", journal = j-SIGPLAN, volume = "51", number = "8", pages = "34:1--34:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851188", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent hash tables are one of the most important concurrent data structures with numerous applications. Since hash table accesses can dominate the execution time of the overall application, we need implementations that achieve good speedup. Unfortunately, currently available concurrent hashing libraries turn out to be far away from this requirement in particular when contention on some elements occurs. Our starting point for better performing data structures is a fast and simple lock-free concurrent hash table based on linear probing that is limited to word-sized key-value types and does not support dynamic size adaptation. We explain how to lift these limitations in a provably scalable way and demonstrate that dynamic growing has a performance overhead comparable to the same generalization in sequential hash tables. We perform extensive experiments comparing the performance of our implementations with six of the most widely used concurrent hash tables. Ours are considerably faster than the best algorithms with similar restrictions and an order of magnitude faster than the best more general tables. In some extreme cases, the difference even approaches four orders of magnitude.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Prades:2016:CAX, author = "Javier Prades and Carlos Rea{\~n}o and Federico Silla", title = "{CUDA} acceleration for {Xen} virtual machines in {InfiniBand} clusters with {rCUDA}", journal = j-SIGPLAN, volume = "51", number = "8", pages = "35:1--35:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851181", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many data centers currently use virtual machines (VMs) to achieve a more efficient usage of hardware resources. However, current virtualization solutions, such as Xen, do not easily provide graphics processing unit (GPU) accelerators to applications running in the virtualized domain with the flexibility usually required in data centers (i.e., managing virtual GPU instances and concurrently sharing them among several VMs). Remote GPU virtualization frameworks such as the rCUDA solution may address this problem. In this work we analyze the use of the rCUDA framework to accelerate scientific applications running inside Xen VMs. Results show that the use of the rCUDA framework is a feasible approach, featuring a very low overhead if an InfiniBand fabric is already present in the cluster.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Umar:2016:EPF, author = "Ibrahim Umar and Otto J. Anshus and Phuong H. Ha", title = "Effect of portable fine-grained locality on energy efficiency and performance in concurrent search trees", journal = j-SIGPLAN, volume = "51", number = "8", pages = "36:1--36:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851186", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent research has suggested that improving fine-grained data-locality is one of the main approaches to improving energy efficiency and performance. However, no previous research has investigated the effect of the approach on these metrices in the case of concurrent data structures. This paper investigates how fine-grained data locality influences energy efficiency and performance in concurrent search trees, a crucial data structure that is widely used in several important systems. We conduct a set of experiments on three lock-based concurrent search trees: DeltaTree, a portable fine-grained locality-aware concurrent search tree; CBTree, a coarse-grained locality-aware B+tree; and BST-TK, a locality-oblivious concurrent search tree. We run the experiments on a commodity x86 platform and an embedded ARM platform. The experimental results show that DeltaTree has 13--25\% better energy efficiency and 10--22\% more operations/second on the x86 and ARM platforms, respectively. The results confirm that portable fine-grained locality can improve energy efficiency and performance in concurrent search trees.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Parikh:2016:EDW, author = "Hrushit Parikh and Vinit Deodhar and Ada Gavrilovska and Santosh Pande", title = "Efficient distributed workstealing via matchmaking", journal = j-SIGPLAN, volume = "51", number = "8", pages = "37:1--37:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851175", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many classes of high-performance applications and combinatorial problems exhibit large degree of runtime load variability. One approach to achieving balanced resource use is to over decompose the problem on fine-grained tasks that are then dynamically balanced using approaches such as workstealing. Existing work stealing techniques for such irregular applications, running on large clusters, exhibit high overheads due to potential untimely interruption of busy nodes, excessive communication messages and delays experienced by idle nodes in finding work due to repeated failed steals. We contend that the fundamental problem of distributed work-stealing is of rapidly bringing together work producers and consumers. In response, we develop an algorithm that performs timely, lightweight and highly efficient matchmaking between work producers and consumers which results in accurate load balance. Experimental evaluations show that our scheduler is able to outperform other distributed work stealing schedulers, and to achieve scale beyond what is possible with current approaches.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Luo:2016:DCC, author = "Hao Luo and Guoyang Chen and Pengcheng Li and Chen Ding and Xipeng Shen", title = "Data-centric combinatorial optimization of parallel code", journal = j-SIGPLAN, volume = "51", number = "8", pages = "38:1--38:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851182", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Memory performance is one essential factor for tapping into the full potential of the massive parallelism of GPU. It has motivated some recent efforts in GPU cache modeling. This paper presents a new data-centric way to model the performance of a system with heterogeneous memory resources. The new model is composable, meaning it can predict the performance difference due to placing data differently by profiling the execution just once.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Maleki:2016:DSD, author = "Saeed Maleki and Donald Nguyen and Andrew Lenharth and Mar{\'\i}a Garzar{\'a}n and David Padua and Keshav Pingali", title = "{DSMR}: a shared and distributed memory algorithm for single-source shortest path problem", journal = j-SIGPLAN, volume = "51", number = "8", pages = "39:1--39:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851183", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The Single-Source Shortest Path (SSSP) problem is to find the shortest paths from a source vertex to all other vertices in a graph. In this paper, we introduce the Dijkstra Strip-Mined Relaxation (DSMR) algorithm, an efficient parallel SSSP algorithm for shared and distributed memory systems. Our results show that, DSMR is faster than parallel \Delta -Stepping by a factor of up-to 1.66.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Salucci:2016:GMC, author = "Luca Salucci and Daniele Bonetta and Stefan Marr and Walter Binder", title = "Generic messages: capability-based shared memory parallelism for event-loop systems", journal = j-SIGPLAN, volume = "51", number = "8", pages = "40:1--40:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851184", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Systems based on event-loops have been popularized by Node.JS, and are becoming a key technology in the domain of cloud computing. Despite their popularity, such systems support only share-nothing parallelism via message passing between parallel entities usually called workers. In this paper, we introduce a novel parallel programming abstraction called Generic Messages (GEMs), which enables shared-memory parallelism for share-nothing event-based systems. A key characteristic of GEMs is that they enable workers to share state by specifying how the state can be accessed once it is shared. We call this aspect of the GEMs model capability-based parallelism.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Liu:2016:HCG, author = "Jianqiao Liu and Nikhil Hegde and Milind Kulkarni", title = "Hybrid {CPU--GPU} scheduling and execution of tree traversals", journal = j-SIGPLAN, volume = "51", number = "8", pages = "41:1--41:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851174", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "GPUs offer the promise of massive, power-efficient parallelism. However, exploiting this parallelism requires code to be carefully structured to deal with the limitations of the SIMT execution model. In recent years, there has been much interest in mapping irregular applications to GPUs: applications with unpredictable, data-dependent behaviors. While most of the work in this space has focused on ad hoc implementations of specific algorithms, recent work has looked at generic techniques for mapping a large class of tree traversal algorithms to GPUs, through careful restructuring of the tree traversal algorithms to make them behave more regularly. Unfortunately, even this general approach for GPU execution of tree traversal algorithms is reliant on ad hoc, handwritten, algorithm-specific scheduling ( i.e., assignment of threads to warps) to achieve high performance. The key challenge of scheduling is that it is a highly irregular process, that requires the inspection of thread behavior and then careful sorting of the threads into warps. In this paper, we present a novel scheduling and execution technique for tree traversal algorithms that is both general and automatic. The key novelty is a hybrid approach: the GPU partially executes tasks to inspect thread behavior and transmits information back to the CPU, which uses that information to perform the scheduling itself, before executing the remaining, carefully scheduled, portion of the traversals on the GPU. We applied this framework to five tree traversal algorithms, achieving significant speedups over optimized GPU code that does not perform application-specific scheduling. Further, we show that in many cases, our hybrid approach is able to deliver better performance even than GPU code that uses hand-tuned, application-specific scheduling.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Ramachandran:2016:IEI, author = "Arunmoezhi Ramachandran and Neeraj Mittal", title = "Improving efficacy of internal binary search trees using local recovery", journal = j-SIGPLAN, volume = "51", number = "8", pages = "42:1--42:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851173", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Binary Search Tree (BST) is an important data structure for managing ordered data. Many algorithms---blocking as well as non-blocking---have been proposed for concurrent manipulation of a binary search tree in an asynchronous shared memory system that supports search, insert and delete operations based on both external and internal representations of a search tree. An important step in executing an operation on a tree is to traverse the tree from top-to-down in order to locate the operation's window. A process may need to perform this traversal several times to handle any failures occurring due to other processes performing conflicting actions on the tree. Most concurrent algorithms that have proposed so far use a na{\"\i}ve approach and simply restart the traversal from the root of the tree. In this work, we present a new approach to recover from such failures more efficiently in a concurrent binary search tree based on internal representation using local recovery by restarting the traversal from the ``middle'' of the tree in order to locate an operation's window. Our approach is sufficiently general in the sense that it can be applied to a variety of concurrent binary search trees based on both blocking and non-blocking approaches. Using experimental evaluation, we demonstrate that our local recovery approach can yield significant speed-ups of up to 69\% for many concurrent algorithms.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Merrill:2016:MBS, author = "Duane Merrill and Michael Garland", title = "Merge-based sparse matrix-vector multiplication {(SpMV)} using the {CSR} storage format", journal = j-SIGPLAN, volume = "51", number = "8", pages = "43:1--43:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851190", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a perfectly balanced, ``merge-based'' parallel method for computing sparse matrix-vector products (SpMV). Our algorithm operates directly upon the Compressed Sparse Row (CSR) sparse matrix format, a predominant in-memory representation for general-purpose sparse linear algebra computations. Our CsrMV performs an equitable multi-partitioning of the input dataset, ensuring that no single thread can be overwhelmed by assignment to (a) arbitrarily-long rows or (b) an arbitrarily-large number of zero-length rows. This parallel decomposition requires neither offline preprocessing nor specialized/ancillary data formats. We evaluate our method on both CPU and GPU microarchitecture across an enormous corpus of diverse real world matrix datasets. We show that traditional CsrMV methods are inconsistent performers subject to order-of-magnitude slowdowns, whereas the performance response of our method is substantially impervious to row-length heterogeneity.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Drebes:2016:NAS, author = "Andi Drebes and Antoniu Pop and Karine Heydemann and Nathalie Drach and Albert Cohen", title = "{NUMA}-aware scheduling and memory allocation for data-flow task-parallel applications", journal = j-SIGPLAN, volume = "51", number = "8", pages = "44:1--44:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851193", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic task parallelism is a popular programming model on shared-memory systems. Compared to data parallel loop-based concurrency, it promises enhanced scalability, load balancing and locality. These promises, however, are undermined by non-uniform memory access (NUMA) systems. We show that it is possible to preserve the uniform hardware abstraction of contemporary task-parallel programming models, for both computing and memory resources, while achieving near-optimal data locality. Our run-time algorithms for NUMA-aware task and data placement are fully automatic, application-independent, performance-portable across NUMA machines, and adapt to dynamic changes. Placement decisions use information about inter-task data dependences and reuse. This information is readily available in the run-time systems of modern task-parallel programming frameworks, and from the operating system regarding the placement of previously allocated memory. Our algorithms take advantage of data-flow style task parallelism, where the privatization of task data enhances scalability through the elimination of false dependences and enables fine-grained dynamic control over the placement of application data. We demonstrate that the benefits of dynamically managing data placement outweigh the privatization cost, even when comparing with target-specific optimizations through static, NUMA-aware data interleaving. Our implementation and the experimental evaluation on a set of high-performance benchmarks executing on a 192-core system with 24 NUMA nodes show that the fraction of local memory accesses can be increased to more than 99\%, resulting in a speedup of up to 5$ \times $ compared to a NUMA-aware hierarchical work-stealing baseline.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Mohamedin:2016:DNA, author = "Mohamed Mohamedin and Roberto Palmieri and Sebastiano Peluso and Binoy Ravindran", title = "On designing {NUMA}-aware concurrency control for scalable transactional memory", journal = j-SIGPLAN, volume = "51", number = "8", pages = "45:1--45:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851189", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "NUMA architectures posed the challenge of rethinking parallel applications due to the non-homogeneity introduced by their design, and their real benefits are limited to the characteristics of the particular workload. We name as partitionable transactional workloads such workloads that may be able to exploit the distributed nature of NUMA, such as transactional workloads where data and accesses can be easily partitioned among the so called NUMA zones. However, in case those workloads require the synchronization on shared data, we have to face the issue of exploiting the NUMA architecture also in the concurrency control for their transactions. Therefore in this paper we present a NUMA-aware concurrency control for transactional memory that we designed for promoting scalability in scenarios where both the transactional workload is prone to scale, and the characteristics of the underlying memory model are inherently non-uniform, such as NUMA architectures.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Saad:2016:OTC, author = "Mohamed M. Saad and Roberto Palmieri and Binoy Ravindran", title = "On ordering transaction commit", journal = j-SIGPLAN, volume = "51", number = "8", pages = "46:1--46:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851191", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this poster paper, we briefly introduce an effective solution to address the problem of committing transactions enforcing a predefined order. To do that, we overview the design of two algorithms that deploy a cooperative transaction execution that circumvents the transaction isolation constraint in favor of propagating written values among conflicting transactions. A preliminary implementation shows that even in the presence of data conflicts, the proposed algorithms outperform other competitors, significantly.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Qian:2016:ODG, author = "Xuehai Qian and Koushik Sen and Paul Hargrove and Costin Iancu", title = "{OPR}: deterministic group replay for one-sided communication", journal = j-SIGPLAN, volume = "51", number = "8", pages = "47:1--47:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851179", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ability to reproduce a parallel execution is desirable for debugging and program reliability purposes. In debugging (13), the programmer needs to manually step back in time, while for resilience (6) this is automatically performed by the application upon failure. To be useful, replay has to faithfully reproduce the original execution. For parallel programs the main challenge is inferring and maintaining the order of conflicting operations (data races). Deterministic record and replay (R{\&}R) techniques have been developed for multithreaded shared memory programs (5), as well as distributed memory programs (14). Our main interest is techniques for large scale scientific (3; 4) programming models.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Rabozzi:2016:PAP, author = "Marco Rabozzi and Matteo Mazzucchelli and Roberto Cordone and Giovanni Matteo Fumarola and Marco D. Santambrogio", title = "Preemption-aware planning on big-data systems", journal = j-SIGPLAN, volume = "51", number = "8", pages = "48:1--48:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851187", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent developments in Big Data frameworks are moving towards reservation based approaches as a mean to manage the increasingly complex mix of computations, whereas preemption techniques are employed to meet strict jobs deadlines. Within this work we propose and evaluate a new planning algorithm in the context of reservation based scheduling. Our approach is able to achieve high cluster utilization while minimizing the need for preemption that causes system overheads and planning mispredictions.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Chen:2016:SPN, author = "Yifeng Chen and Kun Huang and Bei Wang and Guohui Li and Xiang Cui", title = "{Samsara Parallel}: a non-{BSP} parallel-in-time model", journal = j-SIGPLAN, volume = "51", number = "8", pages = "49:1--49:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851185", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many time-dependent problems like molecular dynamics of protein folding require a large number of time steps. The latencies and overheads of common-purpose clusters with accelerators are too big for high-frequency iteration. We introduce an algorithmic model called Samsara Parallel (or SP) which, unlike BSP, relies on asynchronous communications and can repeatedly return to earlier time steps to refine the precision of computation. This also extends a line of research called Parallel-in-Time in computational chemistry and physics.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Zhang:2016:SAN, author = "Mingzhe Zhang and Francis C. M. Lau and Cho-Li Wang and Luwei Cheng and Haibo Chen", title = "Scalable adaptive {NUMA}-aware lock: combining local locking and remote locking for efficient concurrency", journal = j-SIGPLAN, volume = "51", number = "8", pages = "50:1--50:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851176", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scalable locking is a key building block for scalable multi-threaded software. Its performance is especially critical in multi-socket, multi-core machines with non-uniform memory access (NUMA). Previous schemes such as local locking and remote locking only perform well under a certain level of contention, and often require non-trivial tuning for a particular configuration. Besides, for large NUMA systems, because of unmanaged lock server's nomination, current distance-first NUMA policies cannot perform satisfactorily. In this work, we propose SANL, a locking scheme that can deliver high performance under various contention levels by adaptively switching between the local and the remote lock scheme. Furthermore, we introduce a new NUMA policy for the remote lock that jointly considers node distances and server utilization when choosing lock servers. A comparison with seven representative locking schemes shows that SANL outperforms the others in most contention situations. In one group test, SANL is 3.7 times faster than RCL lock and 17 times faster than POSIX mutex.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Hegde:2016:SRS, author = "Nikhil Hegde and Jianqiao Liu and Milind Kulkarni", title = "{SPIRIT}: a runtime system for distributed irregular tree applications", journal = j-SIGPLAN, volume = "51", number = "8", pages = "51:1--51:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851177", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Repeated, depth-first traversal of trees is a common algorithmic pattern in an important set of applications from diverse domains such as cosmological simulations, data mining, and computer graphics. As these applications operate over massive data sets, it is often necessary to distribute the trees to process all of the data. In this work, we introduce SPIRIT, a runtime system to ease the writing of distributed tree applications. SPIRIT automates the challenging tasks of tree distribution, optimizing communication and parallelizing independent computations. The common algorithmic pattern in tree traversals is exploited to effectively schedule parallel computations and improve locality. As a result, pipeline parallelism in distributed traversals is identified, which is complemented by load-balancing, and locality-enhancing, message aggregation optimizations. Evaluation of SPIRIT on tree traversal in Point Correlation (PC) shows a scalable system, achieving speedups upto 38x on a 16-node, 64 process system compared to a 1-node, baseline configuration. We also find that SPIRIT results in substantially less communication and achieves significant performance improvements over implementations in other distributed graph systems.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Ramalhete:2016:TME, author = "Pedro Ramalhete and Andreia Correia", title = "{Tidex}: a mutual exclusion lock", journal = j-SIGPLAN, volume = "51", number = "8", pages = "52:1--52:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851171", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several basic mutual exclusion lock algorithms are known, with one of the simplest being the Ticket Lock. We present a new mutual exclusion lock with properties similar to the Ticket Lock but using atomic_exchange() instead of atomic_fetch_add() that can be more efficient on systems without a native instruction for atomic_fetch_add(), or in which the native instruction for atomic_exchange() is faster than the one for atomic_fetch_add(). Similarly to the Ticket Lock, our lock has small memory foot print, is extremely simple, respects FIFO order, and provides starvation freedom in architectures that implement atomic_exchange() as a single instruction, like x86.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Mastoras:2016:UFC, author = "Aristeidis Mastoras and Thomas R. Gross", title = "Unifying fixed code and fixed data mapping of load-imbalanced pipelined loops", journal = j-SIGPLAN, volume = "51", number = "8", pages = "53:1--53:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851172", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Some loops with cross-iteration dependences can execute in parallel by pipelining. The loop body is partitioned into stages such that the data dependences are not violated and then the stages are mapped onto threads. Two well-known mapping techniques are fixed code and fixed data; they achieve high performance for load-balanced loops, but they fail to perform well for load-imbalanced loops. In this article, we present a novel hybrid mapping that eliminates drawbacks of both prior mapping techniques and enables dynamic scheduling of stages.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Kurt:2016:UAS, author = "Mehmet Can Kurt and Bin Ren and Sriram Krishnamoorthy and Gagan Agrawal", title = "User-assisted storage reuse determination for dynamic task graphs", journal = j-SIGPLAN, volume = "51", number = "8", pages = "54:1--54:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851180", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Models based on task graphs that operate on single-assignment data are attractive in several ways, but also require nuanced algorithms for scheduling and memory management for efficient execution. In this paper, we consider memory-efficient dynamic scheduling of task graphs, and present a novel approach for dynamically recycling the memory locations assigned to data items as they are produced by tasks.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Rehman:2016:VMJ, author = "Waqas Ur Rehman and Muhammad Sohaib Ayub and Junaid Haroon Siddiqui", title = "Verification of {MPI} {Java} programs using software model checking", journal = j-SIGPLAN, volume = "51", number = "8", pages = "55:1--55:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851192", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Development of concurrent software requires the programmer to be aware of non-determinism, data races, and deadlocks. MPI (message passing interface) is a popular standard for writing message oriented distributed applications. Some messages in MPI systems can be processed by one of the many machines and in many possible orders. This non-determinism can affect the result of an MPI application. The alternate results may or may not be correct. To verify MPI applications, we need to check all these possible orderings and use an application specific oracle to decide if these orderings give correct output. MPJ Express is an open source Java implementation of the MPI standard. We developed a Java based model of MPJ Express, where processes are modeled as threads, and which can run unmodified MPI Java programs on a single system. This enabled us to adapt the Java PathFinder explicit state software model checker (JPF) using a custom listener to verify our model running real MPI Java programs. We evaluated our approach using small examples where model checking revealed message orders that would result in incorrect system behavior.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '16 conference proceedings.", } @Article{Sarkar:2016:VEC, author = "Vivek Sarkar", title = "Virtualizing the Edge of the Cloud: the New Frontier", journal = j-SIGPLAN, volume = "51", number = "7", pages = "1--1", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892243", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the last two decades, virtualization technologies have turned datacenter infrastructure into multitenant, dynamically provisionable, elastic resource, and formed the basis for the wide adoption of cloud computing. Many of today's cloud applications, however, are based on continuous interactions with end users and their devices, and the trend is only expected to intensify with the expansion of the Internet of Things. The consequent bandwidth and latency requirements of these emerging workloads push the cloud boundary outside of traditional datacenters, giving rise to an edge tier in the end-device-to-cloud-backend infrastructure. Computational resources embedded in anything from standalone microservers to WiFi routers and small cell access points, and their open APIs, present opportunities for deploying application logic and state closer to where it is being used, addressing both latency and backhaul bandwidth problems. This talk will look at the role that existing virtualization technologies can play in providing in this edge tier the required flexibility, dynamic provisioning and isolation, and will outline open problems that require development of new solutions. We will also discuss the opportunities to leverage these technologies to further deal with the diversity in the end-user device and IoT space.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Huang:2016:BKB, author = "Yu-Ju Huang and Hsuan-Heng Wu and Yeh-Ching Chung and Wei-Chung Hsu", title = "Building a {KVM}-based Hypervisor for a Heterogeneous System Architecture Compliant System", journal = j-SIGPLAN, volume = "51", number = "7", pages = "3--15", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892246", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Heterogeneous System Architecture (HSA) is an architecture developed by the HSA foundation aiming at reducing programmability barriers as well as improving communication efficiency for heterogeneous computing. For example, HSA allows heterogeneous computing devices to share the same virtual address space. This feature allows programmers to bypass explicit data copying between devices, as was required in the past. HSA features such as job dispatching through user level queues and memory based signaling help to reduce communication latency between the host and other computing devices. While the new features in HSA enable more efficient heterogeneous computing, they also introduce new challenges to system virtualization, especially in memory virtualization and I/O virtualization. This work investigates the issues involved in HSA virtualization and implements a KVM-based hypervisor that supports the main features of HSA inside guest operating systems. Furthermore, this work shows that with the newly introduced hypervisor for HSA, system resources in HSA-compliant AMD Kaveri can be effectively shared between multiple guest operating systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Ouyang:2016:SUV, author = "Jiannan Ouyang and John R. Lange and Haoqiang Zheng", title = "{Shoot4U}: Using {VMM} Assists to Optimize {TLB} Operations on Preempted {vCPUs}", journal = j-SIGPLAN, volume = "51", number = "7", pages = "17--23", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892245", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Virtual Machine based approaches to workload consolidation, as seen in IaaS cloud as well as datacenter platforms, have long had to contend with performance degradation caused by synchronization primitives inside the guest environments. These primitives can be affected by virtual CPU preemptions by the host scheduler that can introduce delays that are orders of magnitude longer than those primitives were designed for. While a significant amount of work has focused on the behavior of spinlock primitives as a source of these performance issues, spinlocks do not represent the entirety of synchronization mechanisms that are susceptible to scheduling issues when running in a virtualized environment. In this paper we address the virtualized performance issues introduced by TLB shootdown operations. Our profiling study, based on the PARSEC benchmark suite, has shown that up to 64\% of a VM's CPU time can be spent on TLB shootdown operations under certain workloads. In order to address this problem, we present a paravirtual TLB shootdown scheme named Shoot4U. Shoot4U completely eliminates TLB shootdown preemptions by invalidating guest TLB entries from the VMM and allowing guest TLB shootdown operations to complete without waiting for remote virtual CPUs to be scheduled. Our performance evaluation using the PARSEC benchmark suite demonstrates that Shoot4U can reduce benchmark runtime by up to 85\% compared an unmodified Linux kernel, and up to 44\% over a state-of-the-art paravirtual TLB shootdown scheme.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Merrifield:2016:PIE, author = "Timothy Merrifield and H. Reza Taheri", title = "Performance Implications of Extended Page Tables on Virtualized x86 Processors", journal = j-SIGPLAN, volume = "51", number = "7", pages = "25--35", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892258", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Managing virtual memory is an expensive operation, and becomes even more expensive on virtualized servers. Processing TLB misses on a virtualized x86 server requires a two-dimensional page walk that can have 6x more page table lookups, hence 6x more memory references, than a native page table walk. Thus much of the recent research on the subject starts from the assumption that TLB miss processing in virtual environments is significantly more expensive than on native servers. However, we will show that with the latest software stack on modern x86 processors, most of these page-table lookups are satisfied by internal paging structure caches and the L1/L2 data caches, and the actual virtualization overhead of TLB miss processing is a modest fraction of the overall time spent processing TLB misses. In this paper, we present a detailed accounting of the TLB miss processing costs on virtualized x86 servers for an exhaustive set of workloads, in particular, two very demanding industry standard workloads. We show that an implementation of the TPC-C workload that actively uses 475 GB of memory on a 72-CPU Haswell-EP server spends 20\% of its time processing TLB misses when the application runs in a VM. Although this is a non-trivial amount, it is only 4.2\% higher than the TLB miss processing costs on bare metal. The multi-VM VMmark benchmark sees 12.3\% in TLB miss processing, but only 4.3\% of that can be attributed to virtualization overheads. We show that even for the heaviest workloads, a well-tuned application that uses large pages on a recent OS release with a modern hypervisor running on the latest x86 processors sees only minimal degradation from the additional overhead of the two-dimensional page walks in a virtualized server.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Nathan:2016:SRO, author = "Senthil Nathan and Umesh Bellur and Purushottam Kulkarni", title = "On Selecting the Right Optimizations for Virtual Machine Migration", journal = j-SIGPLAN, volume = "51", number = "7", pages = "37--49", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892247", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "To reduce the migration time of a virtual machine and network traffic generated during migration, existing works have proposed a number of optimizations to pre-copy live migration. These optimizations are delta compression, page skip, deduplication, and data compression. The cost-benefit analysis of these optimizations may preclude the use of certain optimizations in specific scenarios. However, no study has compared the performance {\&} cost of these optimizations, and identified the impact of application behaviour on performance gain. Hence, it is not clear for a given migration scenario and an application, what is the best optimization that one must employ? In this paper, we present a comprehensive empirical study using a large number of workloads to provide recommendations on selection of optimizations for pre-copy live migration. The empirical study reveals that page skip is an important optimization as it reduces network traffic by 20\% with negligible additional CPU cost. Data compression yields impressive gains in reducing network traffic (37\%) but at the cost of a significant increase in CPU consumption (5$ \times $). De-duplication needs to be applied with utmost care as the increase in CPU utilization might outweigh the benefits considerably. The combination of page skip and data compression works the best across workloads and results in a significant reduction in network traffic (40\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Abe:2016:UVM, author = "Yoshihisa Abe and Roxana Geambasu and Kaustubh Joshi and Mahadev Satyanarayanan", title = "Urgent Virtual Machine Eviction with Enlightened Post-Copy", journal = j-SIGPLAN, volume = "51", number = "7", pages = "51--64", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892252", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Virtual machine (VM) migration demands distinct properties under resource oversubscription and workload surges. We present enlightened post-copy, a new mechanism for VMs under contention that evicts the target VM with fast execution transfer and short total duration. This design contrasts with common live migration, which uses the down time of the migrated VM as its primary metric; it instead focuses on recovering the aggregate performance of the VMs being affected. In enlightened post-copy, the guest OS identifies memory state that is expected to encompass the VM's working set. The hypervisor accordingly transfers its state, mitigating the performance impact on the migrated VM resulting from post-copy transfer. We show that our implementation, with modest instrumentation in guest Linux, resolves VM contention up to several times faster than live migration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Xu:2016:SHS, author = "Xin Xu and Bhavesh Davda", title = "{SRVM}: Hypervisor Support for Live Migration with Passthrough {SR-IOV} Network Devices", journal = j-SIGPLAN, volume = "51", number = "7", pages = "65--77", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892256", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Single-Root I/O Virtualization (SR-IOV) is a specification that allows a single PCI Express (PCIe) device (physical function or PF) to be used as multiple PCIe devices (virtual functions or VF). In a virtualization system, each VF can be directly assigned to a virtual machine (VM) in passthrough mode to significantly improve the network performance. However, VF passthrough mode is not compatible with live migration, which is an essential capability that enables many advanced virtualization features such as high availability and resource provisioning. To solve this problem, we design SRVM which provides hypervisor support to ensure the VF device can be correctly used by the migrated VM and the applications. SRVM is implemented in the hypervisor without modification in guest operating systems or guest VM drivers. Our experimental results show that SRVM can effectively migrate all memory state, and there is no data loss or corruption in applications after live migration. SRVM does not increase VM downtime. It only costs limited resources (an extra CPU core), and there is no significant runtime overhead in VM network performance. In fact, since the VF can continue to be used during the pre-copy phase, it offers network throughput which is 9.6 times and network latency which is 98\% lower compared to other solutions that switch to para-virtualization mode during live migration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Williams:2016:EEH, author = "Dan Williams and Yaohui Hu and Umesh Deshpande and Piush K. Sinha and Nilton Bila and Kartik Gopalan and Hani Jamjoom", title = "Enabling Efficient Hypervisor-as-a-Service Clouds with Ephemeral Virtualization", journal = j-SIGPLAN, volume = "51", number = "7", pages = "79--92", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892254", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When considering a hypervisor, cloud providers must balance conflicting requirements for simple, secure code bases with more complex, feature-filled offerings. This paper introduces Dichotomy, a new two-layer cloud architecture in which the roles of the hypervisor are split. The cloud provider runs a lean hyperplexor that has the sole task of multiplexing hardware and running more substantial hypervisors (called featurevisors) that implement features. Cloud users choose featurevisors from a selection of lightly-modified hypervisors potentially offered by third-parties in an ``as-a-service'' model for each VM. Rather than running the featurevisor directly on the hyperplexor using nested virtualization, Dichotomy uses a new virtualization technique called ephemeral virtualization which efficiently (and repeatedly) transfers control of a VM between the hyperplexor and featurevisor using memory mapping techniques. Nesting overhead is only incurred when the VM is accessed by the featurevisor. We have implemented Dichotomy in KVM/QEMU and demonstrate average switching times of 80 ms, two to three orders of magnitude faster than live VM migration. We show that, for the featurevisor applications we evaluated, VMs hosted in Dichotomy deliver up to 12\% better performance than those hosted on nested hypervisors, and continue to show benefit even when the featurevisor applications run as often as every 2.5~seconds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Burtsev:2016:APV, author = "Anton Burtsev and David Johnson and Mike Hibler and Eric Eide and John Regehr", title = "Abstractions for Practical Virtual Machine Replay", journal = j-SIGPLAN, volume = "51", number = "7", pages = "93--106", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892257", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient deterministic replay of whole operating systems is feasible and useful, so why isn't replay a default part of the software stack? While implementing deterministic replay is hard, we argue that the main reason is the lack of general abstractions for understanding and addressing the significant engineering challenges involved in the development of a replay engine for a modern VMM. We present a design blueprint---a set of abstractions, general principles, and low-level implementation details---for efficient deterministic replay in a modern hypervisor. We build and evaluate our architecture in Xen, a full-featured hypervisor. Our architecture can be readily followed and adopted, enabling replay as a ubiquitous part of a modern virtualization stack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{McKinley:2016:NGV, author = "Kathryn S. McKinley", title = "Next Generation Virtual Memory Management", journal = j-SIGPLAN, volume = "51", number = "7", pages = "107--107", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892244", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The goal of virtual memory is an abstraction of infinite and private memory for every process. Unfortunately, the insatiable memory demands of modern applications increasingly violate this abstraction by exposing capacity, bandwidth, and performance limitations of modern hardware. Furthermore, emerging memory technologies are likely to exacerbate this problem. For instance, non-volatile memory differs from DRAM due to its asymmetric read/write performance and thus will likely be an addition rather than a drop-in replacement for DRAM. This talk will describe these problems and recent architecture and software innovations that address of some of them. If adopted, these solutions will impose substantial challenges for operating system memory management, which has evolved very slowly over the past 30 years. I will draw lessons from the past 15 years of garbage collection advances to suggest some promising directions for innovation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Qian:2016:EFS, author = "Junjie Qian and Witawas Srisa-an and Sharad Seth and Hong Jiang and Du Li and Pan Yi", title = "Exploiting {FIFO} Scheduler to Improve Parallel Garbage Collection Performance", journal = j-SIGPLAN, volume = "51", number = "7", pages = "109--121", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892248", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent studies have found that parallel garbage collection performs worse with more CPUs and more collector threads. As part of this work, we further investigate this phenomenon and find that poor scalability is worst in highly scalable Java applications. Our investigation to find the causes clearly reveals that efficient multi-threading in an application can prolong the average object lifespan, which results in less effective garbage collection. We also find that prolonging lifespan is the direct result of Linux's Completely Fair Scheduler due to its round-robin like behavior that can increase the heap contention between the application threads. Instead, if we use pseudo first-in-first-out to schedule application threads in large multicore systems, the garbage collection scalability is significantly improved while the time spent in garbage collection is reduced by as much as 21\%. The average execution time of the 24 Java applications used in our study is also reduced by 11\%. Based on this observation, we propose two approaches to optimally select scheduling policies based on application scalability profile. Our first approach uses the profile information from one execution to tune the subsequent executions. Our second approach dynamically collects profile information and performs policy selection during execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Yu:2016:PAO, author = "Yang Yu and Tianyang Lei and Weihua Zhang and Haibo Chen and Binyu Zang", title = "Performance Analysis and Optimization of Full Garbage Collection in Memory-hungry Environments", journal = j-SIGPLAN, volume = "51", number = "7", pages = "123--130", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892251", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Garbage collection (GC), especially full GC, would nontrivially impact overall application performance, especially for those memory-hungry ones handling large data sets. This paper presents an in-depth performance analysis on the full GC performance of Parallel Scavenge (PS), a state-of-the-art and the default garbage collector in the HotSpot JVM, using traditional and big-data applications running atop JVM on CPU (e.g., Intel Xeon) and many-integrated cores (e.g., Intel Xeon i). The analysis uncovers that unnecessary memory accesses and calculations during reference updating in the compaction ase are the main causes of lengthy full GC. To this end, this paper describes an incremental query model for reference calculation, which is further embodied with three schemes (namely optimistic, sort-based and region-based) for different query patterns. Performance evaluation shows that the incremental query model leads to averagely 1.9X (up to 2.9X) in full GC and 19.3\% (up to 57.2\%) improvement in application throughput, as well as 31.2\% reduction in pause time over the vanilla PS collector on CPU, and the numbers are 2.1X (up to 3.4X), 11.1\% (up to 41.2\%) and 34.9\% for Xeon i accordingly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Smith:2016:LMR, author = "Rebecca Smith and Scott Rixner", title = "Leveraging Managed Runtime Systems to Build, Analyze, and Optimize Memory Graphs", journal = j-SIGPLAN, volume = "51", number = "7", pages = "131--143", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892253", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Optimizing memory management is a major challenge of embedded systems programming, as memory is scarce. Further, embedded systems often have heterogeneous memory architectures, complicating the task of memory allocation during both compilation and migration. However, new opportunities for addressing these challenges have been created by the recent emergence of managed runtimes for embedded systems. By imposing structure on memory, these systems have opened the doors for new techniques for analyzing and optimizing memory usage within embedded systems. This paper presents GEM (Graphs of Embedded Memory), a tool which capitalizes on the structure that managed runtime systems provide in order to build memory graphs which facilitate memory analysis and optimization. At GEM's core are a set of fundamental graph transformations which can be layered to support a wide range of use cases, including interactive memory visualization, de-duplication of objects and code, compilation for heterogeneous memory architectures, and transparent migration. Moreover, since the same underlying infrastructure supports all of these orthogonal functionalities, they can easily be applied together to complement each other.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Ben-Yehuda:2016:NPM, author = "Muli Ben-Yehuda and Orna Agmon Ben-Yehuda and Dan Tsafrir", title = "The nom Profit-Maximizing Operating System", journal = j-SIGPLAN, volume = "51", number = "7", pages = "145--160", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892250", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the near future, cloud providers will sell their users virtual machines with CPU, memory, network, and storage resources whose prices constantly change according to market-driven supply and demand conditions. Running traditional operating systems in these virtual machines is a poor fit: traditional operating systems are not aware of changing resource prices and their sole aim is to maximize performance with no consideration of costs. Consequently, they yield low profits. We present nom, a profit-maximizing operating system designed for cloud computing platforms with dynamic resource prices. Applications running on nom aim to maximize profits by optimizing simultaneously for performance and resource costs. The nom kernel provides them with direct access to the underlying hardware and full control over their private software stacks. Since nom applications know there is no single ``best'' software stack, they adapt their stacks' behavior on the fly according to the current price of available resources and their private utility from them, which differs between applications. We show that in addition to achieving up to 3.9x better throughput and up to 9.1x better latency, nom applications yield up to 11.1x higher profits when compared with the same applications running on Linux and OSv.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Hale:2016:EHP, author = "Kyle C. Hale and Peter A. Dinda", title = "Enabling Hybrid Parallel Runtimes Through Kernel and Virtualization Support", journal = j-SIGPLAN, volume = "51", number = "7", pages = "161--175", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892255", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In our hybrid runtime (HRT) model, a parallel runtime system and the application are together transformed into a specialized OS kernel that operates entirely in kernel mode and can thus implement exactly its desired abstractions on top of fully privileged hardware access. We describe the design and implementation of two new tools that support the HRT model. The first, the Nautilus Aerokernel, is a kernel framework specifically designed to enable HRTs for x64 and Xeon Phi hardware. Aerokernel primitives are specialized for HRT creation and thus can operate much faster, up to two orders of magnitude faster, than related primitives in Linux. Aerokernel primitives also exhibit much lower variance in their performance, an important consideration for some forms of parallelism. We have realized several prototype HRTs, including one based on the Legion runtime, and we provide application macrobenchmark numbers for our Legion HRT. The second tool, the hybrid virtual machine (HVM), is an extension to the Palacios virtual machine monitor that allows a single virtual machine to simultaneously support a traditional OS and software stack alongside an HRT with specialized hardware access. The HRT can be booted in a time comparable to a Linux user process startup, and functions in the HRT, which operate over the user process's memory, can be invoked by the process with latencies not much higher than those of a function call.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Waldspurger:2016:SSL, author = "Carl Waldspurger and Emery Berger and Abhishek Bhattacharjee and Kevin Pedretti and Simon Peter and Chris Rossbach", title = "Sweet Spots and Limits for Virtualization", journal = j-SIGPLAN, volume = "51", number = "7", pages = "177--177", month = jul, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3007611.2892249", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This year at VEE, we added a panel to discuss the state of virtualization: what problems are solved? what problems are important? and what problems may not be worth solving? The panelist are experts in areas ranging from hardware virtualization up to language-level virtualization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '16 conference proceedings.", } @Article{Nitu:2017:SBQ, author = "Vlad Nitu and Pierre Olivier and Alain Tchana and Daniel Chiba and Antonio Barbalace and Daniel Hagimont and Binoy Ravindran", title = "Swift Birth and Quick Death: Enabling Fast Parallel Guest Boot and Destruction in the {Xen} Hypervisor", journal = j-SIGPLAN, volume = "52", number = "7", pages = "1--14", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050758", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The ability to quickly set up and tear down a virtual machine is critical for today's cloud elasticity, as well as in numerous other scenarios: guest migration/consolidation, event-driven invocation of micro-services, dynamically adaptive unikernel-based applications, micro-reboots for security or stability, etc. In this paper, we focus on the process of setting up/freeing the hypervisor and host control layer data structures at boot/destruction time, showing that it does not scale in current virtualization solutions. In addition to the direct overhead of long VM set-up/destruction times, we demonstrate by experimentation the indirect costs on real world auto scaling systems. Focusing on the popular Xen hypervisor, we identify three critical issues hindering the scalability of the boot and destruction processes: serialized boot, unscalable interactions with the Xenstore at guest creation time, and remote NUMA memory scrubbing at destruction time. For each of these issues we present the design and implementation of a solution in the Xen infrastructure: parallel boot with fine-grained locking, caching of Xenstore data, and local NUMA scrubbing. We evaluate these solutions using micro-benchmarks, macro-benchmarks, and real world datacenter traces. Results show that our work improves the current Xen implementation by a significant factor, for example macro-benchmarks indicate a speedup of more than 4X in high-load scenarios.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Kuenzer:2017:UEC, author = "Simon Kuenzer and Anton Ivanov and Filipe Manco and Jose Mendes and Yuri Volchkov and Florian Schmidt and Kenichi Yasukata and Michio Honda and Felipe Huici", title = "Unikernels Everywhere: The Case for Elastic {CDNs}", journal = j-SIGPLAN, volume = "52", number = "7", pages = "15--29", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050757", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Video streaming dominates the Internet's overall traffic mix, with reports stating that it will constitute 90\% of all consumer traffic by 2019. Most of this video is delivered by Content Delivery Networks (CDNs), and, while they optimize QoE metrics such as buffering ratio and start-up time, no single CDN provides optimal performance. In this paper we make the case for elastic CDNs, the ability to build virtual CDNs on-the-fly on top of shared, third-party infrastructure at a scale. To bring this idea closer to reality we begin by large-scale simulations to quantify the effects that elastic CDNs would have if deployed, and build and evaluate MiniCache, a specialized, minimalistic virtualized content cache that runs on the Xen hypervisor. MiniCache is able to serve content at rates of up to 32 Gb/s and handle up to 600K reqs/sec on a single CPU core, as well as boot in about 90 milliseconds on x86 and around 370 milliseconds on ARM32.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Zhang:2017:MAP, author = "Jinshi Zhang and Eddie Dong and Jian Li and Haibing Guan", title = "{MigVisor}: Accurate Prediction of {VM} Live Migration Behavior using a Working-Set Pattern Model", journal = j-SIGPLAN, volume = "52", number = "7", pages = "30--43", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050753", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Live migration of a virtual machine (VM) is a powerful technique with benefits of server maintenance, resource management, dynamic workload re-balance, etc. Modern research has effectively reduced the VM live migration (VMLM) time to dozens of milliseconds, but live migration still exhibits failures if it cannot terminate within the given time constraint. The ability to predict this type of failure can avoid wasting networking and computing resources on the VM migration, and the associated system performance degradation caused by wasting these resources. The cost of VM live migration highly depends on the application workload of the VM, which may undergo frequent changes. At the same time, the available system resources for VM migration can also change substantially and frequently. To account for these issues, we present a solution called MigVisor, which can accurately predict the behaviour of VM migration using working-set model. This can enable system managers to predict the migration cost and enhance the system management efficacy. The experimental results prove the design suitability and show that the MigVisor has a high prediction accuracy since the average relative error between the predicted value and the measured value is only 6.2\%~9\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Garg:2017:CGA, author = "Anshuj Garg and Debadatta Mishra and Purushottam Kulkarni", title = "{Catalyst}: {GPU}-assisted rapid memory deduplication in virtualization environments", journal = j-SIGPLAN, volume = "52", number = "7", pages = "44--59", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050760", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Content based page sharing techniques improve memory efficiency in virtualized systems by identifying and merging identical pages. Kernel Same-page Merging (KSM), a Linux kernel utility for page sharing, sequentially scans memory pages of virtual machines to deduplicate pages. Sequential scanning of pages has several undesirable side effects---wasted CPU cycles when no sharing opportunities exist, and rate of discovery of sharing being dependent on the scanning rate and corresponding CPU availability. In this work, we exploit presence of GPUs on modern systems to enable rapid memory sharing through targeted scanning of pages. Our solution, Catalyst, works in two phases, the first where pages of virtual machines are processed by the GPU to identify likely pages for sharing and a second phase that performs page-level similarity checks on a targeted set of shareable pages. Opportunistic usage of the GPU to produce sharing hints enables rapid and low-overhead duplicate detection, and sharing of memory pages in virtualization environments. We evaluate Catalyst against various benchmarks and workloads to demonstrate that Catalyst can achieve higher memory sharing in lesser time compared to different scan rate configurations of KSM, at lower or comparable compute costs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Fumero:2017:JTG, author = "Juan Fumero and Michel Steuwer and Lukas Stadler and Christophe Dubach", title = "Just-In-Time {GPU} Compilation for Interpreted Languages with Partial Evaluation", journal = j-SIGPLAN, volume = "52", number = "7", pages = "60--73", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050761", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computer systems are increasingly featuring powerful parallel devices with the advent of many-core CPUs and GPUs. This offers the opportunity to solve computationally-intensive problems at a fraction of the time traditional CPUs need. However, exploiting heterogeneous hardware requires the use of low-level programming language approaches such as OpenCL, which is incredibly challenging, even for advanced programmers. On the application side, interpreted dynamic languages are increasingly becoming popular in many domains due to their simplicity, expressiveness and flexibility. However, this creates a wide gap between the high-level abstractions offered to programmers and the low-level hardware-specific interface. Currently, programmers must rely on high performance libraries or they are forced to write parts of their application in a low-level language like OpenCL. Ideally, nonexpert programmers should be able to exploit heterogeneous hardware directly from their interpreted dynamic languages. In this paper, we present a technique to transparently and automatically offload computations from interpreted dynamic languages to heterogeneous devices. Using just-in-time compilation, we automatically generate OpenCL code at runtime which is specialized to the actual observed data types using profiling information. We demonstrate our technique using R, which is a popular interpreted dynamic language predominately used in big data analytic. Our experimental results show the execution on a GPU yields speedups of over 150x compared to the sequential FastR implementation and the obtained performance is competitive with manually written GPU code. We also show that when taking into account start-up time, large speedups are achievable, even when the applications run for as little as a few seconds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Kotselidis:2017:HMR, author = "Christos Kotselidis and James Clarkson and Andrey Rodchenko and Andy Nisbet and John Mawer and Mikel Luj{\'a}n", title = "Heterogeneous Managed Runtime Systems: a Computer Vision Case Study", journal = j-SIGPLAN, volume = "52", number = "7", pages = "74--82", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050764", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Real-time 3D space understanding is becoming prevalent across a wide range of applications and hardware platforms. To meet the desired Quality of Service (QoS), computer vision applications tend to be heavily parallelized and exploit any available hardware accelerators. Current approaches to achieving real-time computer vision, evolve around programming languages typically associated with High Performance Computing along with binding extensions for OpenCL or CUDA execution. Such implementations, although high performing, lack portability across the wide range of diverse hardware resources and accelerators. In this paper, we showcase how a complex computer vision application can be implemented within a managed runtime system. We discuss the complexities of achieving high-performing and portable execution across embedded and desktop configurations. Furthermore, we demonstrate that it is possible to achieve the QoS target of over 30 frames per second (FPS) by exploiting FPGA and GPGPU acceleration transparently through the managed runtime system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Deng:2017:DWT, author = "Liang Deng and Peng Liu and Jun Xu and Ping Chen and Qingkai Zeng", title = "Dancing with Wolves: Towards Practical Event-driven {VMM} Monitoring", journal = j-SIGPLAN, volume = "52", number = "7", pages = "83--96", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050750", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel framework that enables practical event-driven monitoring for untrusted virtual machine monitors (VMMs) in cloud computing. Unlike previous approaches for VMM monitoring, our framework neither relies on a higher privilege level nor requires any special hardware support. Instead, we place the trusted monitor at the same privilege level and in the same address space with the untrusted VMM to achieve superior efficiency, while proposing a unique mutual-protection mechanism to ensure the integrity of the monitor. Our security analysis demonstrates that our framework can provide high-assurance for event-driven VMM monitoring, even if the highest-privilege VMM is fully compromised. The experimental results show that our framework only incurs trivial performance overhead for enforcing event-driven monitoring policies, exhibiting tremendous performance improvement on previous approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Suneja:2017:SIL, author = "Sahil Suneja and Ricardo Koller and Canturk Isci and Eyal de Lara and Ali Hashemi and Arnamoy Bhattacharyya and Cristiana Amza", title = "Safe Inspection of Live Virtual Machines", journal = j-SIGPLAN, volume = "52", number = "7", pages = "97--111", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050766", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With DevOps automation and an everything-as-code approach to lifecycle management for cloud-native applications, challenges emerge from an operational visibility and control perspective. Once a VM is deployed in production it typically becomes a hands-off entity in terms of restrictions towards inspecting or tuning it, for the fear of negatively impacting its operation. We present CIVIC (Cloning and Injection based VM Inspection for Cloud), a new mechanism that enables safe inspection of unmodified production VMs on-the-fly. CIVIC restricts all impact and side-effects of inspection or analysis operations inside a live clone of the production VM. New functionality over the replicated VM state is introduced using code injection. In this paper, we describe the design and implementation of our solution over KVM/QEMU. We demonstrate four of its use-cases-(i) safe reuse of system monitoring agents, (ii) impact-heavy problem diagnostics and troubleshooting, (iii) attaching an intrusive anomaly detector to a live service, and (iv) live tuning of a webserver's configuration parameters. Our evaluation shows CIVIC is nimble and lightweight in terms of memory footprint as well as clone activation time (6.5s), and has a low impact on the original VM ({$<$} 10\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Arulraj:2017:IVS, author = "Leo Arulraj and Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau", title = "Improving Virtualized Storage Performance with Sky", journal = j-SIGPLAN, volume = "52", number = "7", pages = "112--128", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050755", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce Sky, an extension to the VMM that gathers insights and information by intercepting system calls made by guest applications. We show how Sky gains three specific insights --- guest file-size information, metadata-data distinction, and file-content hints --- and uses said information to enhance virtualized-storage performance. By caching small files and metadata with higher priority, Sky reduces the runtime by 2.3 to 8.8 times for certain workloads. Sky also achieves 4.5 to 18.7 times reduction in the runtime of an open-source block-layer deduplication system by exploiting hints about file contents. Sky works underneath both Linux and FreeBSD guests, as well as under a range of file systems, thus enabling portable and general VMM-level optimization underneath a wide range of storage stacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Hetzelt:2017:SAE, author = "Felicitas Hetzelt and Robert Buhren", title = "Security Analysis of Encrypted Virtual Machines", journal = j-SIGPLAN, volume = "52", number = "7", pages = "129--142", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050763", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cloud computing has become indispensable in today's computer landscape. The flexibility it offers for customers as well as for providers has become a crucial factor for large parts of the computer industry. Virtualization is the key technology that allows for sharing of hardware resources among different customers. The controlling software component, called hypervisor, provides a virtualized view of the computer resources and ensures separation of different guest virtual machines. However, this important cornerstone of cloud computing is not necessarily trustworthy or bug-free. To mitigate this threat AMD introduced Secure Encrypted Virtualization, short SEV, which transparently encrypts a virtual machines memory. In this paper we analyse to what extend the proposed features can resist a malicious hypervisor and discuss the tradeoffs imposed by additional protection mechanisms. To do so, we developed a model of SEV's security capabilities based on the available documentation as actual silicon implementations are not yet on the market. We found that the first proposed version of SEV is not up to the task owing to three design shortcomings. First the virtual machine control block is not encrypted and handled directly by the hypervisor, allowing it to bypass VM memory encryption by executing conveniently chosen gadgets. Secondly, the general purpose registers are not encrypted upon vmexit, leaking potentially sensitive data. Finally, the control over the nested pagetables allows a malicious hypervisor to closely monitor the execution state of a VM and attack it with memory replay attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Wang:2017:RLW, author = "Zhe Wang and Chenggang Wu and Jianjun Li and Yuanming Lai and Xiangyu Zhang and Wei-Chung Hsu and Yueqiang Cheng", title = "{ReRanz}: a Light-Weight Virtual Machine to Mitigate Memory Disclosure Attacks", journal = j-SIGPLAN, volume = "52", number = "7", pages = "143--156", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050752", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent code reuse attacks are able to circumvent various address space layout randomization (ASLR) techniques by exploiting memory disclosure vulnerabilities. To mitigate sophisticated code reuse attacks, we proposed a light-weight virtual machine, ReRanz, which deployed a novel continuous binary code re-randomization to mitigate memory disclosure oriented attacks. In order to meet security and performance goals, costly code randomization operations were outsourced to a separate process, called the ``shuffling process''. The shuffling process continuously flushed the old code and replaced it with a fine-grained randomized code variant. ReRanz repeated the process each time an adversary might obtain the information and upload a payload. Our performance evaluation shows that ReRanz Virtual Machine incurs a very low performance overhead. The security evaluation shows that ReRanz successfully protect the Nginx web server against the Blind-ROP attack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Estrada:2017:UDP, author = "Zachary J. Estrada and Read Sprabery and Lok Yan and Zhongzhi Yu and Roy Campbell and Zbigniew Kalbarczyk and Ravishankar K. Iyer", title = "Using {OS} Design Patterns to Provide Reliability and Security as-a-Service for {VM-based} Clouds", journal = j-SIGPLAN, volume = "52", number = "7", pages = "157--170", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050759", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper extends the concepts behind cloud services to offer hypervisor-based reliability and security monitors for cloud virtual machines. Cloud VMs can be heterogeneous and as such guest OS parameters needed for monitoring can vary across different VMs and must be obtained in some way. Past work involves running code inside the VM, which is unacceptable for a cloud environment. We solve this problem by recognizing that there are common OS design patterns that can be used to infer monitoring parameters from the guest OS. We extract information about the cloud user's guest OS with the user's existing VM image and knowledge of OS design patterns as the only inputs to analysis. To demonstrate the range of monitoring functionality possible with this technique, we implemented four sample monitors: a guest OS process tracer, an OS hang detector, a return-to-user attack detector, and a process-based keylogger detector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Hussein:2017:OPR, author = "Ahmed Hussein and Mathias Payer and Antony L. Hosking and Chris Vick", title = "One Process to Reap Them All: Garbage Collection as-a-Service", journal = j-SIGPLAN, volume = "52", number = "7", pages = "171--186", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Ubiquitous mobile platforms such as Android rely on managed language run-time environments, also known as language virtual machines (VMs), to run a diverse range of user applications (apps). Each app runs in its own private VM instance, and each VM makes its own private local decisions in managing its use of processor and memory resources. Moreover, the operating system and the hardware do not communicate their low-level decisions regarding power management with the high-level app environment. This lack of coordination across layers and across apps restricts more effective global use of resources on the device. We address this problem by devising and implementing a global memory manager service for Android that optimizes memory usage, run-time performance, and power consumption globally across all apps running on the device. The service focuses on the impact of garbage collection (GC) along these dimensions, since GC poses a significant overhead within managed run-time environments. Our prototype collects system-wide statistics from all running VMs, makes centralized decisions about memory management across apps and across software layers, and also collects garbage centrally. Furthermore, the global memory manager coordinates with the power manager to tune collector scheduling. In our evaluation, we illustrate the impact of such a central memory management service in reducing total energy consumption (up to 18\%) and increasing throughput (up to 12\%), and improving memory utilization and adaptability to user activities.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Zhang:2017:DLN, author = "Jie Zhang and Xiaoyi Lu and Dhabaleswar K. (DK) Panda", title = "Designing Locality and {NUMA} Aware {MPI} Runtime for Nested Virtualization based {HPC} Cloud with {SR--IOV} Enabled {InfiniBand}", journal = j-SIGPLAN, volume = "52", number = "7", pages = "187--200", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050765", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hypervisor-based virtualization solutions reveal good security and isolation, while container-based solutions make applications and workloads more portable and distributed in an effective, standardized and repeatable way. Therefore, nested virtualization based computing environments (e.g., container over virtual machine), which inherit the capabilities from both solutions, are becoming more and more attractive in clouds (e.g., running Docker over Amazon EC2 VMs). Recent studies have shown that running applications in either VMs or containers still has significant overhead, especially for I/O intensive workloads. This motivates us to investigate whether the nested virtualization based solution can be adopted to build high-performance computing (HPC) clouds for running MPI applications efficiently and where the bottlenecks lie. To eliminate performance bottlenecks, we propose a high-performance two-layer locality and NUMA aware MPI library, which is able to dynamically detect co-resident containers inside one VM as well as detect co-resident VM inside one host at MPI runtime. Thus the MPI processes across different containers and VMs can communicate to each other by shared memory or Cross Memory Attach (CMA) channels instead of network channel if they are co-resident. We further propose an enhanced NUMA aware hybrid design to utilize InfiniBand loopback based channel to optimize large message transfer across containers when they are running on different sockets. Performance evaluations show that compared with the performance of the state-of-art (1Layer) design, our proposed enhance-hybrid design can bring up to 184\%, 81\% and 12\% benefit on point-to-point, collective operations, and end applications. Compared with the default performance, our enhanced-hybrid design delivers up to 184\%, 85\% and 16\% performance improvement.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Lu:2017:FPL, author = "Kai Lu and Wenzhe Zhang and Xiaoping Wang and Mikel Luj{\'a}n and Andy Nisbet", title = "Flexible Page-level Memory Access Monitoring Based on Virtualization Hardware", journal = j-SIGPLAN, volume = "52", number = "7", pages = "201--213", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050751", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Page protection is often used to achieve memory access monitoring in many applications, dealing with program-analysis, checkpoint-based failure recovery, and garbage collection in managed runtime systems. Typically, low overhead access monitoring is limited by the relatively large page-level granularity of memory management unit hardware support for virtual memory protection. In this paper, we improve upon traditional page-level mechanisms by additionally using hardware support for virtualization in order to achieve fine and flexible granularities that can be smaller than a page. We first introduce a memory allocator based on page protection that can achieve fine-grained monitoring. Second, we explain how virtualization hardware support can be used to achieve dynamic adjustment of the monitoring granularity. In all, we propose a process-level virtual machine to achieve dynamic and fine-grained monitoring. Any application can run on our process-level virtual machine without modification. Experimental results for an incremental checkpoint tool provide a use-case to demonstrate our work. Comparing with traditional page-based checkpoint, our work can effectively reduce the amount of checkpoint data and improve performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Yang:2017:CLA, author = "Chun Yang and Xianhua Liu and Xu Cheng", title = "Content Look-Aside Buffer for Redundancy-Free Virtual Disk {I/O} and Caching", journal = j-SIGPLAN, volume = "52", number = "7", pages = "214--227", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050762", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Storage consolidation in a virtualized environment introduces numerous duplications in virtual disks and imposes considerable pressure on disk I/O and caching. In this paper, we present a content look-aside buffer (CLB) approach for simultaneously providing redundancy-free virtual disk I/O and caching. CLB attaches persistent fingerprints to virtual disk blocks, which enables detection of I/O redundancy before disk access. At run time, CLB exploits content pages already present in the guest disk caches to service the redundant reads through page sharing, thus eliminating both redundant I/O requests and redundant disk cache copies. For write requests, CLB uses a group invalidating writeback protocol for updating fingerprints to support crash consistency while minimizing disk write overhead. By implementing and evaluating a CLB prototype on KVM hypervisor, we demonstrate that CLB delivers considerably improved I/O performance with realistic workloads. Our CLB prototype improves the throughput of sequential and random read on duplicate data by 4.1x and 26.2x, respectively. For typical read-intensive workloads, such as booting VM and launching application, CLB's I/O deduplication and cache deduplication eliminates 94.9\%--98.5\% of read requests and saves 50\%--100\% cache memory in each VM, respectively. Compared with the QEMU's raw virtual disk format, CLB improves the per-disk VM density by 8x--16x. For mixed read-write workloads, the cost of on-line fingerprint updating offsets the read benefit; nevertheless, CLB substantially improves overall performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{dAntras:2017:HXU, author = "Amanieu d'Antras and Cosmin Gorgovan and Jim Garside and John Goodacre and Mikel Luj{\'a}n", title = "{HyperMAMBO-X64}: Using Virtualization to Support High-Performance Transparent Binary Translation", journal = j-SIGPLAN, volume = "52", number = "7", pages = "228--241", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050756", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Current computer architectures --- ARM, MIPS, PowerPC, SPARC, x86 --- have evolved from a 32-bit architecture to a 64-bit one. Computer architects often consider whether it could be possible to eliminate hardware support for a subset of the instruction set as to reduce hardware complexity, which could improve performance, reduce power usage and accelerate processor development. This paper considers the scenario where we want to eliminate 32-bit hardware support from the ARMv8 architecture. Dynamic binary translation can be used for this purpose and generally comes in one of two forms: application-level translators that translate a single user mode process on top of a native operating system, and system-level translators that translate an entire operating system and all its processes. Application-level translators can have good performance but is not totally transparent; system-level translators may be 100\% compatible but performance suffers. HyperMAMBO-X64 uses a new approach that gets the best of both worlds, being able to run the translator as an application under the hypervisor but still react to the behavior of guest operating systems. It works with complete transparency with regards to the virtualized system whilst delivering performance close to that provided by hardware execution. A key factor in the low overhead of HyperMAMBO-X64 is its deep integration with the virtualization and memory management features of ARMv8. These are exploited to support caching of translations across multiple address spaces while ensuring that translated code remains consistent with the source instructions it is based on. We show how these attributes are achieved without sacrificing either performance or accuracy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Zhu:2017:VLV, author = "Min Zhu and Bibo Tu and Wei Wei and Dan Meng", title = "{HA-VMSI}: a Lightweight Virtual Machine Isolation Approach with Commodity Hardware for {ARM}", journal = j-SIGPLAN, volume = "52", number = "7", pages = "242--256", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050767", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Once compromising the hypervisor, remote or local adversaries can easily access other customers' sensitive data in the memory and context of guest virtual machines (VMs). VM isolation is an efficient mechanism for protecting the memory of guest VMs from unauthorized access. However, previous VM isolation systems either modify hardware architecture or introduce a software module without being protected, and most of them focus on the x86 architecture. This paper proposes HA-VMSI, a lightweight hardware-assisted VM isolation approach for ARM, to provide runtime protection of guest VMs, even with a compromised hypervisor. In the ARM TrustZone secure world, a thin security monitor is introduced as HA-VMSI's entire TCB. Hence, the security monitor is much less vulnerable and safe from attacks that can compromise the hypervisor. The key of HA-VMSI is decoupling the functions of memory isolation among VMs from the hypervisor into the security monitor. As a result, the hypervisor can only update the Stage-2 page tables of VMs via the security monitor, which inspects and approves each new mapping. It is worth noting that HA-VMSI is more secure and effective than current software approaches, and more flexible and compatible than hardware approaches. We have implemented a prototype for KVM hypervisor with multiple Linux as guest OSes on Juno board. The security assessment and performance evaluation show that HA-VMSI is effective, efficient and practical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '17 conference proceedings.", } @Article{Steele:2017:TNO, author = "Guy L. {Steele, Jr.}", title = "It's Time for a New Old Language", journal = j-SIGPLAN, volume = "52", number = "8", pages = "1--1", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018773", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The most popular programming language in computer science has no compiler or interpreter. Its definition is not written down in any one place. It has changed a lot over the decades, and those changes have introduced ambiguities and inconsistencies. Today, dozens of variations are in use, and its complexity has reached the point where it needs to be re-explained, at least in part, every time it is used. Much effort has been spent in hand-translating between this language and other languages that do have compilers. The language is quite amenable to parallel computation, but this fact has gone unexploited. In this talk we will summarize the history of the language, highlight the variations and some of the problems that have arisen, and propose specific solutions. We suggest that it is high time that this language be given a complete formal specification, and that compilers, IDEs, and proof-checkers be created to support it, so that all the best tools and techniques of our trade may be applied to it also.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Chen:2017:ESF, author = "Guoyang Chen and Yue Zhao and Xipeng Shen and Huiyang Zhou", title = "{EffiSha}: a Software Framework for Enabling Efficient Preemptive Scheduling of {GPU}", journal = j-SIGPLAN, volume = "52", number = "8", pages = "3--16", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018748", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern GPUs are broadly adopted in many multitasking environments, including data centers and smartphones. However, the current support for the scheduling of multiple GPU kernels (from different applications) is limited, forming a major barrier for GPU to meet many practical needs. This work for the first time demonstrates that on existing GPUs, efficient preemptive scheduling of GPU kernels is possible even without special hardware support. Specifically, it presents EffiSha, a pure software framework that enables preemptive scheduling of GPU kernels with very low overhead. The enabled preemptive scheduler offers flexible support of kernels of different priorities, and demonstrates significant potential for reducing the average turnaround time and improving the system overall throughput of programs that time share a modern GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Cohen:2017:LLS, author = "Nachshon Cohen and Arie Tal and Erez Petrank", title = "Layout Lock: a Scalable Locking Paradigm for Concurrent Data Layout Modifications", journal = j-SIGPLAN, volume = "52", number = "8", pages = "17--29", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018753", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data-structures can benefit from dynamic data layout modifications when the size or the shape of the data structure changes during the execution, or when different phases in the program execute different workloads. However, in a modern multi-core environment, layout modifications involve costly synchronization overhead. In this paper we propose a novel layout lock that incurs a negligible overhead for reads and a small overhead for updates of the data structure. We then demonstrate the benefits of layout changes and also the advantages of the layout lock as its supporting synchronization mechanism for two data structures. In particular, we propose a concurrent binary search tree, and a concurrent array set, that benefit from concurrent layout modifications using the proposed layout lock. Experience demonstrates performance advantages and integration simplicity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Zhang:2017:UGM, author = "Xiuxia Zhang and Guangming Tan and Shuangbai Xue and Jiajia Li and Keren Zhou and Mingyu Chen", title = "Understanding the {GPU} Microarchitecture to Achieve Bare-Metal Performance Tuning", journal = j-SIGPLAN, volume = "52", number = "8", pages = "31--43", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018755", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we present a methodology to understand GPU microarchitectural features and improve performance for compute-intensive kernels. The methodology relies on a reverse engineering approach to crack the GPU ISA encodings in order to build a GPU assembler. An assembly microbenchmark suite correlates microarchitectural features with their performance factors to uncover instruction-level and memory hierarchy preferences. We use SGEMM as a running example to show the ways to achieve bare-metal performance tuning. The performance boost is achieved by tuning FFMA throughput by activating dual-issue, eliminating register bank conflicts, adding non-FFMA instructions with little penalty, and choosing proper width of global/shared load instructions. On NVIDIA Kepler K20m, we develop a faster SGEMM with 3.1Tflop/s performance and 88\% efficiency; the performance is 15\% higher than cuBLAS7.0. Applying these optimizations to convolution, the implementation gains 39\%--62\% performance improvement compared with cuDNN4.0. The toolchain is an attempt to automatically crack different GPU ISA encodings and build an assembler adaptively for the purpose of performance enhancements to applications on GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Ou:2017:CCD, author = "Peizhao Ou and Brian Demsky", title = "Checking Concurrent Data Structures Under the {C\slash C++11} Memory Model", journal = j-SIGPLAN, volume = "52", number = "8", pages = "45--59", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018749", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent data structures often provide better performance on multi-core processors but are significantly more difficult to design and test than their sequential counterparts. The C/C++11 standard introduced a weak memory model with support for low-level atomic operations such as compare and swap (CAS). While low-level atomic operations can significantly improve the performance of concurrent data structures, they introduce non-intuitive behaviors that can increase the difficulty of developing code. In this paper, we develop a correctness model for concurrent data structures that make use of atomic operations. Based on this correctness model, we present CDSSPEC, a specification checker for concurrent data structures under the C/C++11 memory model. We have evaluated CDSSPEC on 10 concurrent data structures, among which CDSSPEC detected 3 known bugs and 93\% of the injected bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Chabbi:2017:EAL, author = "Milind Chabbi and Abdelhalim Amer and Shasha Wen and Xu Liu", title = "An Efficient Abortable-locking Protocol for Multi-level {NUMA} Systems", journal = j-SIGPLAN, volume = "52", number = "8", pages = "61--74", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018768", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The popularity of Non-Uniform Memory Access (NUMA) architectures has led to numerous locality-preserving hierarchical lock designs, such as HCLH, HMCS, and cohort locks. Locality-preserving locks trade fairness for higher throughput. Hence, some instances of acquisitions can incur long latencies, which may be intolerable for certain applications. Few locks admit a waiting thread to abandon its protocol on a timeout. State-of-the-art abortable locks are not fully locality aware, introduce high overheads, and unsuitable for frequent aborts. Enhancing locality-aware locks with lightweight timeout capability is critical for their adoption. In this paper, we design and evaluate the HMCS-T lock, a Hierarchical MCS (HMCS) lock variant that admits a timeout. HMCS-T maintains the locality benefits of HMCS while ensuring aborts to be lightweight. HMCS-T offers the progress guarantee missing in most abortable queuing locks. Our evaluations show that HMCS-T offers the timeout feature at a moderate overhead over its HMCS analog. HMCS-T, used in an MPI runtime lock, mitigated the poor scalability of an MPI+OpenMP BFS code and resulted in 4.3x superior scaling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Acar:2017:CSC, author = "Umut A. Acar and Naama Ben-David and Mike Rainey", title = "Contention in Structured Concurrency: Provably Efficient Dynamic Non-Zero Indicators for Nested Parallelism", journal = j-SIGPLAN, volume = "52", number = "8", pages = "75--88", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018762", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the past two decades, many concurrent data structures have been designed and implemented. Nearly all such work analyzes concurrent data structures empirically, omitting asymptotic bounds on their efficiency, partly because of the complexity of the analysis needed, and partly because of the difficulty of obtaining relevant asymptotic bounds: when the analysis takes into account important practical factors, such as contention, it is difficult or even impossible to prove desirable bounds. In this paper, we show that considering structured concurrency or relaxed concurrency models can enable establishing strong bounds, also for contention. To this end, we first present a dynamic relaxed counter data structure that indicates the non-zero status of the counter. Our data structure extends a recently proposed data structure, called SNZI, allowing our structure to grow dynamically in response to the increasing degree of concurrency in the system. Using the dynamic SNZI data structure, we then present a concurrent data structure for series-parallel directed acyclic graphs (sp-dags), a key data structure widely used in the implementation of modern parallel programming languages. The key component of sp-dags is an in-counter data structure that is an instance of our dynamic SNZI. We analyze the efficiency of our concurrent sp-dags and in-counter data structures under nested-parallel computing paradigm. This paradigm offers a structured model for concurrency. Under this model, we prove that our data structures require amortized (1) shared memory steps, including contention. We present an implementation and an experimental evaluation that suggests that the sp-dags data structure is practical and can perform well in practice.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Sato:2017:NIT, author = "Kento Sato and Dong H. Ahn and Ignacio Laguna and Gregory L. Lee and Martin Schulz and Christopher M. Chambreau", title = "Noise Injection Techniques to Expose Subtle and Unintended Message Races", journal = j-SIGPLAN, volume = "52", number = "8", pages = "89--101", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018767", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging intermittently occurring bugs within MPI applications is challenging, and message races, a condition in which two or more sends race to match with a receive, are one of the common root causes. Many debugging tools have been proposed to help programmers resolve them, but their runtime interference perturbs the timing such that subtle races often cannot be reproduced with debugging tools. We present novel noise injection techniques to expose message races even under a tool's control. We first formalize this race problem in the context of non-deterministic parallel applications and use this analysis to determine an effective noise-injection strategy to uncover them. We codified these techniques in NINJA (Noise INJection Agent) that exposes these races without modification to the application. Our evaluations on synthetic cases as well as a real-world bug in Hypre-2.10.1 show that NINJA significantly helps expose races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Luo:2017:TDS, author = "Hao Luo and Pengcheng Li and Chen Ding", title = "Thread Data Sharing in Cache: Theory and Measurement", journal = j-SIGPLAN, volume = "52", number = "8", pages = "103--115", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018759", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "On modern multi-core processors, independent workloads often interfere with each other by competing for shared cache space. However, for multi-threaded workloads, where a single copy of data can be accessed by multiple threads, the threads can cooperatively share cache. Because data sharing consolidates the collective working set of threads, the effective size of shared cache becomes larger than it would have been when data are not shared. This paper presents a new theory of data sharing. It includes (1) a new metric called the shared footprint to mathematically compute the amount of data shared by any group of threads in any size cache, and (2) a linear-time algorithm to measure shared footprint by scanning the memory trace of a multi-threaded program. The paper presents the practical implementation and evaluates the new theory using 14 PARSEC and SPEC OMP benchmarks, including an example use of shared footprint in program optimization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Ren:2017:EVM, author = "Bin Ren and Sriram Krishnamoorthy and Kunal Agrawal and Milind Kulkarni", title = "Exploiting Vector and Multicore Parallelism for Recursive, Data- and Task-Parallel Programs", journal = j-SIGPLAN, volume = "52", number = "8", pages = "117--130", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018763", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern hardware contains parallel execution resources that are well-suited for data-parallelism-vector units-and task parallelism-multicores. However, most work on parallel scheduling focuses on one type of hardware or the other. In this work, we present a scheduling framework that allows for a unified treatment of task- and data-parallelism. Our key insight is an abstraction, task blocks, that uniformly handles data-parallel iterations and task-parallel tasks, allowing them to be scheduled on vector units or executed independently as multicores. Our framework allows us to define schedulers that can dynamically select between executing task-blocks on vector units or multicores. We show that these schedulers are asymptotically optimal, and deliver the maximum amount of parallelism available in computation trees. To evaluate our schedulers, we develop program transformations that can convert mixed data- and task-parallel programs into task block-based programs. Using a prototype instantiation of our scheduling framework, we show that, on an 8-core system, we can simultaneously exploit vector and multicore parallelism to achieve $ 14 \times $--$ 108 \times $ speedup over sequential baselines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Shudler:2017:IPC, author = "Sergei Shudler and Alexandru Calotoiu and Torsten Hoefler and Felix Wolf", title = "Isoefficiency in Practice: Configuring and Understanding the Performance of Task-based Applications", journal = j-SIGPLAN, volume = "52", number = "8", pages = "131--143", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018770", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Task-based programming offers an elegant way to express units of computation and the dependencies among them, making it easier to distribute the computational load evenly across multiple cores. However, this separation of problem decomposition and parallelism requires a sufficiently large input problem to achieve satisfactory efficiency on a given number of cores. Unfortunately, finding a good match between input size and core count usually requires significant experimentation, which is expensive and sometimes even impractical. In this paper, we propose an automated empirical method for finding the isoefficiency function of a task-based program, binding efficiency, core count, and the input size in one analytical expression. This allows the latter two to be adjusted according to given (realistic) efficiency objectives. Moreover, we not only find (i) the actual isoefficiency function but also (ii) the function one would yield if the program execution was free of resource contention and (iii) an upper bound that could only be reached if the program was able to maintain its average parallelism throughout its execution. The difference between the three helps to explain low efficiency, and in particular, it helps to differentiate between resource contention and structural conflicts related to task dependencies or scheduling. The insights gained can be used to co-design programs and shared system resources.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Utterback:2017:POR, author = "Robert Utterback and Kunal Agrawal and I-Ting Angelina Lee and Milind Kulkarni", title = "Processor-Oblivious Record and Replay", journal = j-SIGPLAN, volume = "52", number = "8", pages = "145--161", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018764", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Record-and-replay systems are useful tools for debugging non-deterministic parallel programs by first recording an execution and then replaying that execution to produce the same access pattern. Existing record-and-replay systems generally target thread-based execution models, and record the behaviors and interleavings of individual threads. Dynamic multithreaded languages and libraries, such as the Cilk family, OpenMP, TBB, etc., do not have a notion of threads. Instead, these languages provide a processor-oblivious model of programming, where programs expose task-parallelism using high-level constructs such as spawn/sync without regard to the number of threads/cores available to run the program. Thread-based record-and-replay would violate the processor-oblivious nature of these programs, as they incorporate the number of threads into the recorded information, constraining the replayed execution to the same number of threads. In this paper, we present a processor-oblivious record-and-replay scheme for such languages where record and replay can use different number of processors and both are scheduled using work stealing. We provide theoretical guarantees for our record and replay scheme --- namely that record is optimal for programs with one lock and replay is near-optimal for all cases. In addition, we implemented this scheme in the Cilk Plus runtime system and our evaluation indicates that processor-obliviousness does not cause substantial overheads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Prajapati:2017:SAA, author = "Nirmal Prajapati and Waruna Ranasinghe and Sanjay Rajopadhye and Rumen Andonov and Hristo Djidjev and Tobias Grosser", title = "Simple, Accurate, Analytical Time Modeling and Optimal Tile Size Selection for {GPGPU} Stencils", journal = j-SIGPLAN, volume = "52", number = "8", pages = "163--177", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018744", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Stencil computations are an important class of compute and data intensive programs that occur widely in scientific and engineering applications. A number of tools use sophisticated tiling, parallelization, and memory mapping strategies, and generate code that relies on vendor-supplied compilers. This code has a number of parameters, such as tile sizes, that are then tuned via empirical exploration. We develop a model that guides such a choice. Our model is a simple set of analytical functions that predict the execution time of the generated code. It is deliberately optimistic, since tile sizes and, moreover, the optimistic assumptions are intended to enable we are targeting modeling and parameter selections yielding highly tuned codes. We experimentally validate the model on a number of 2D and 3D stencil codes, and show that the root mean square error in the execution time is less than 10\% for the subset of the codes that achieve performance within 20\% of the best. Furthermore, based on using our model, we are able to predict tile sizes that achieve a further improvement of 9\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Jiang:2017:CSM, author = "Peng Jiang and Gagan Agrawal", title = "Combining {SIMD} and Many\slash Multi-core Parallelism for Finite State Machines with Enumerative Speculation", journal = j-SIGPLAN, volume = "52", number = "8", pages = "179--191", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018760", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Finite State Machine (FSM) is the key kernel behind many popular applications, including regular expression matching, text tokenization, and Huffman decoding. Parallelizing FSMs is extremely difficult because of the strong dependencies and unpredictable memory accesses. Previous efforts have largely focused on multi-core parallelization, and used different approaches, including {\em speculative\/} and {\em enumerative\/} execution, both of which have been effective but also have limitations. With increasing width and improving flexibility in SIMD instruction sets, this paper focuses on combining SIMD and multi/many-core parallelism for FSMs. We have developed a novel strategy, called {\em enumerative speculation}. Instead of speculating on a single state as in speculative execution or enumerating all possible states as in enumerative execution, our strategy speculates transitions from several possible states, reducing the prediction overheads of speculation approach and the large amount of redundant work in the enumerative approach. A simple lookback approach produces a set of guessed states to achieve high speculation success rates in our enumerative speculation. We evaluate our method with four popular FSM applications: Huffman decoding, regular expression matching, HTML tokenization, and Div7. We obtain up to 2.5x speedup using SIMD on one core and up to 95x combining SIMD with 60 cores of an Intel Xeon Phi. On a single core, we outperform the best single-state speculative execution version by an average of 1.6x, and in combining SIMD and many-core parallelism, outperform enumerative execution by an average of 2x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Awan:2017:CCD, author = "Ammar Ahmad Awan and Khaled Hamidouche and Jahanzeb Maqbool Hashmi and Dhabaleswar K. Panda", title = "{S-Caffe}: Co-designing {MPI} Runtimes and {Caffe} for Scalable Deep Learning on Modern {GPU} Clusters", journal = j-SIGPLAN, volume = "52", number = "8", pages = "193--205", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018769", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Availability of large data sets like ImageNet and massively parallel computation support in modern HPC devices like NVIDIA GPUs have fueled a renewed interest in Deep Learning (DL) algorithms. This has triggered the development of DL frameworks like Caffe, Torch, TensorFlow, and CNTK. However, most DL frameworks have been limited to a single node. In order to scale out DL frameworks and bring HPC capabilities to the DL arena, we propose, S-Caffe; a scalable and distributed Caffe adaptation for modern multi-GPU clusters. With an in-depth analysis of new requirements brought forward by the DL frameworks and limitations of current communication runtimes, we present a co-design of the Caffe framework and the MVAPICH2-GDR MPI runtime. Using the co-design methodology, we modify Caffe's workflow to maximize the overlap of computation and communication with multi-stage data propagation and gradient aggregation schemes. We bring DL-Awareness to the MPI runtime by proposing a hierarchical reduction design that benefits from CUDA-Aware features and provides up to a massive 133x speedup over OpenMPI and 2.6x speedup over MVAPICH2 for 160 GPUs. S-Caffe successfully scales up to 160 K-80 GPUs for GoogLeNet (ImageNet) with a speedup of 2.5x over 32 GPUs. To the best of our knowledge, this is the first framework that scales up to 160 GPUs. Furthermore, even for single node training, S-Caffe shows an improvement of 14\% and 9\% over Nvidia's optimized Caffe for 8 and 16 GPUs, respectively. In addition, S-Caffe achieves up to 1395 samples per second for the AlexNet model, which is comparable to the performance of Microsoft CNTK.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Sabne:2017:MBI, author = "Amit Sabne and Xiao Wang and Sherman J. Kisner and Charles A. Bouman and Anand Raghunathan and Samuel P. Midkiff", title = "Model-based Iterative {CT} Image Reconstruction on {GPUs}", journal = j-SIGPLAN, volume = "52", number = "8", pages = "207--220", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018765", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computed Tomography (CT) Image Reconstruction is an important technique used in a variety of domains, including medical imaging, electron microscopy, non-destructive testing and transportation security. Model-based Iterative Reconstruction (MBIR) using Iterative Coordinate Descent (ICD) is a CT algorithm that produces state-of-the-art results in terms of image quality. However, MBIR is highly computationally intensive and challenging to parallelize, and has traditionally been viewed as impractical in applications where reconstruction time is critical. We present the first GPU-based algorithm for ICD-based MBIR. The algorithm leverages the recently-proposed concept of SuperVoxels, and efficiently exploits the three levels of parallelism available in MBIR to better utilize the GPU hardware resources. We also explore data layout transformations to obtain more coalesced accesses and several GPU-specific optimizations for MBIR that boost performance. Across a suite of 3200 test cases, our GPU implementation obtains a geometric mean speedup of 4.43X over a state-of-the-art multi-core implementation on a 16-core iso-power CPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Yeh:2017:PFG, author = "Tsung Tai Yeh and Amit Sabne and Putt Sakdhnagool and Rudolf Eigenmann and Timothy G. Rogers", title = "{Pagoda}: Fine-Grained {GPU} Resource Virtualization for Narrow Tasks", journal = j-SIGPLAN, volume = "52", number = "8", pages = "221--234", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Massively multithreaded GPUs achieve high throughput by running thousands of threads in parallel. To fully utilize the hardware, workloads spawn work to the GPU in bulk by launching large tasks, where each task is a kernel that contains thousands of threads that occupy the entire GPU. GPUs face severe underutilization and their performance benefits vanish if the tasks are narrow, i.e., they contain {$<$} 500 threads. Latency-sensitive applications in network, signal, and image processing that generate a large number of tasks with relatively small inputs are examples of such limited parallelism. This paper presents Pagoda, a runtime system that virtualizes GPU resources, using an OS-like daemon kernel called MasterKernel. Tasks are spawned from the CPU onto Pagoda as they become available, and are scheduled by the MasterKernel at the warp granularity. Experimental results demonstrate that Pagoda achieves a geometric mean speedup of 5.70x over PThreads running on a 20-core CPU, 1.51x over CUDA-HyperQ, and 1.69x over GeMTC, the state-of- the-art runtime GPU task scheduling system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Ben-Nun:2017:GAM, author = "Tal Ben-Nun and Michael Sutton and Sreepathi Pai and Keshav Pingali", title = "{Groute}: an Asynchronous Multi-{GPU} Programming Model for Irregular Computations", journal = j-SIGPLAN, volume = "52", number = "8", pages = "235--248", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018756", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nodes with multiple GPUs are becoming the platform of choice for high-performance computing. However, most applications are written using bulk-synchronous programming models, which may not be optimal for irregular algorithms that benefit from low-latency, asynchronous communication. This paper proposes constructs for asynchronous multi-GPU programming, and describes their implementation in a thin runtime environment called Groute. Groute also implements common collective operations and distributed work-lists, enabling the development of irregular applications without substantial programming effort. We demonstrate that this approach achieves state-of-the-art performance and exhibits strong scaling for a suite of irregular applications on 8-GPU and heterogeneous systems, yielding over 7x speedup for some algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Schardl:2017:TEF, author = "Tao B. Schardl and William S. Moses and Charles E. Leiserson", title = "{Tapir}: Embedding Fork-Join Parallelism into {LLVM}'s Intermediate Representation", journal = j-SIGPLAN, volume = "52", number = "8", pages = "249--265", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018758", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper explores how fork-join parallelism, as supported by concurrency platforms such as Cilk and OpenMP, can be embedded into a compiler's intermediate representation (IR). Mainstream compilers typically treat parallel linguistic constructs as syntactic sugar for function calls into a parallel runtime. These calls prevent the compiler from performing optimizations across parallel control constructs. Remedying this situation is generally thought to require an extensive reworking of compiler analyses and code transformations to handle parallel semantics. Tapir is a compiler IR that represents logically parallel tasks asymmetrically in the program's control flow graph. Tapir allows the compiler to optimize across parallel control constructs with only minor changes to its existing analyses and code transformations. To prototype Tapir in the LLVM compiler, for example, we added or modified about 6000 lines of LLVM's 4-million-line codebase. Tapir enables LLVM's existing compiler optimizations for serial code --- including loop-invariant-code motion, common-subexpression elimination, and tail-recursion elimination --- to work with parallel control constructs such as spawning and parallel loops. Tapir also supports parallel optimizations such as loop scheduling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Matveev:2017:MPC, author = "Alexander Matveev and Yaron Meirovitch and Hayk Saribekyan and Wiktor Jakubiuk and Tim Kaler and Gergely Odor and David Budden and Aleksandar Zlateski and Nir Shavit", title = "A Multicore Path to Connectomics-on-Demand", journal = j-SIGPLAN, volume = "52", number = "8", pages = "267--281", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018766", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The current design trend in large scale machine learning is to use distributed clusters of CPUs and GPUs with MapReduce-style programming. Some have been led to believe that this type of horizontal scaling can reduce or even eliminate the need for traditional algorithm development, careful parallelization, and performance engineering. This paper is a case study showing the contrary: that the benefits of algorithms, parallelization, and performance engineering, can sometimes be so vast that it is possible to solve ``cluster-scale'' problems on a single commodity multicore machine. Connectomics is an emerging area of neurobiology that uses cutting edge machine learning and image processing to extract brain connectivity graphs from electron microscopy images. It has long been assumed that the processing of connectomics data will require mass storage, farms of CPU/GPUs, and will take months (if not years) of processing time. We present a high-throughput connectomics-on-demand system that runs on a multicore machine with less than 100 cores and extracts connectomes at the terabyte per hour pace of modern electron microscopes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Vollmer:2017:SHS, author = "Michael Vollmer and Ryan G. Scott and Madanlal Musuvathi and Ryan R. Newton", title = "{SC-Haskell}: Sequential Consistency in Languages That Minimize Mutable Shared Heap", journal = j-SIGPLAN, volume = "52", number = "8", pages = "283--298", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018746", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A core, but often neglected, aspect of a programming language design is its memory (consistency) model. Sequential consistency~(SC) is the most intuitive memory model for programmers as it guarantees sequential composition of instructions and provides a simple abstraction of shared memory as a single global store with atomic read and writes. Unfortunately, SC is widely considered to be impractical due to its associated performance overheads. Perhaps contrary to popular opinion, this paper demonstrates that SC is achievable with acceptable performance overheads for mainstream languages that minimize mutable shared heap. In particular, we modify the Glasgow Haskell Compiler to insert fences on all writes to shared mutable memory accessed in nonfunctional parts of the program. For a benchmark suite containing 1,279 programs, SC adds a geomean overhead of less than 0.4\% on an x86 machine. The efficiency of SC arises primarily due to the isolation provided by the Haskell type system between purely functional and thread-local imperative computations on the one hand, and imperative computations on the global heap on the other. We show how to use new programming idioms to further reduce the SC overhead; these create a virtuous cycle of less overhead and even stronger semantic guarantees (static data-race freedom).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Battig:2017:SDC, author = "Martin B{\"a}ttig and Thomas R. Gross", title = "Synchronized-by-Default Concurrency for Shared-Memory Systems", journal = j-SIGPLAN, volume = "52", number = "8", pages = "299--312", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018747", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We explore a programming approach for concurrency that synchronizes all accesses to shared memory by default. Synchronization takes place by ensuring that all program code runs inside atomic sections even if the program code has external side effects. Threads are mapped to atomic sections that a programmer must explicitly split to increase concurrency. A naive implementation of this approach incurs a large amount of overhead. We show how to reduce this overhead to make the approach suitable for realistic application programs on existing hardware. We present an implementation technique based on a special-purpose software transactional memory system. To reduce the overhead, the technique exploits properties of managed, object-oriented programming languages as well as intraprocedural static analyses and uses field-level granularity locking in combination with transactional I/O to provide good scaling properties. We implemented the synchronized-by-default (SBD) approach for the Java language and evaluate its performance for six programs from the DaCapo benchmark suite. The evaluation shows that, compared to explicit synchronization, the SBD approach has an overhead between 0.4\% and 102\% depending on the benchmark and the number of threads, with a mean (geom.) of 23.9\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Moreira:2017:FCR, author = "Rubens E. A. Moreira and Sylvain Collange and Fernando Magno Quint{\~a}o Pereira", title = "Function Call Re-Vectorization", journal = j-SIGPLAN, volume = "52", number = "8", pages = "313--326", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018751", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Programming languages such as C for CUDA, OpenCL or ISPC have contributed to increase the programmability of SIMD accelerators and graphics processing units. However, these languages still lack the flexibility offered by low-level SIMD programming on explicit vectors. To close this expressiveness gap while preserving performance, this paper introduces the notion of \ourinvention{} (CREV). CREV allows changing the dimension of vectorization during the execution of a kernel, exposing it as a nested parallel kernel call. CREV affords programmability close to dynamic parallelism, a feature that allows the invocation of kernels from inside kernels, but at much lower cost. In this paper, we present a formal semantics of CREV, and an implementation of it on the ISPC compiler. We have used CREV to implement some classic algorithms, including string matching, depth first search and Bellman-Ford, with minimum effort. These algorithms, once compiled by ISPC to Intel-based vector instructions, are as fast as state-of-the-art implementations, yet much simpler. Thus, CREV gives developers the elegance of dynamic programming, and the performance of explicit SIMD programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Rajbhandari:2017:OFI, author = "Samyam Rajbhandari and Fabrice Rastello and Karol Kowalski and Sriram Krishnamoorthy and P. Sadayappan", title = "Optimizing the Four-Index Integral Transform Using Data Movement Lower Bounds Analysis", journal = j-SIGPLAN, volume = "52", number = "8", pages = "327--340", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018771", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The four-index integral transform is a fundamental and computationally demanding calculation used in many computational chemistry suites such as NWChem. It transforms a four-dimensional tensor from one basis to another. This transformation is most efficiently implemented as a sequence of four tensor contractions that each contract a four-dimensional tensor with a two-dimensional transformation matrix. Differing degrees of permutation symmetry in the intermediate and final tensors in the sequence of contractions cause intermediate tensors to be much larger than the final tensor and limit the number of electronic states in the modeled systems. Loop fusion, in conjunction with tiling, can be very effective in reducing the total space requirement, as well as data movement. However, the large number of possible choices for loop fusion and tiling, and data/computation distribution across a parallel system, make it challenging to develop an optimized parallel implementation for the four-index integral transform. We develop a novel approach to address this problem, using lower bounds modeling of data movement complexity. We establish relationships between available aggregate physical memory in a parallel computer system and ineffective fusion configurations, enabling their pruning and consequent identification of effective choices and a characterization of optimality criteria. This work has resulted in the development of a significantly improved implementation of the four-index transform that enables higher performance and the ability to model larger electronic systems than the current implementation in the NWChem quantum chemistry software suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Steele:2017:UBP, author = "Guy L. {Steele, Jr.} and Jean-Baptiste Tristan", title = "Using Butterfly-Patterned Partial Sums to Draw from Discrete Distributions", journal = j-SIGPLAN, volume = "52", number = "8", pages = "341--355", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018757", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a SIMD technique for drawing values from multiple discrete distributions, such as sampling from the random variables of a mixture model, that avoids computing a complete table of partial sums of the relative probabilities. A table of alternate (``butterfly-patterned'') form is faster to compute, making better use of coalesced memory accesses; from this table, complete partial sums are computed on the fly during a binary search. Measurements using CUDA 7.5 on an NVIDIA Titan Black GPU show that this technique makes an entire machine-learning application that uses a Latent Dirichlet Allocation topic model with 1024 topics about about 13\% faster (when using single-precision floating-point data) or about 35\% faster (when using double-precision floating-point data) than doing a straightforward matrix transposition after using coalesced accesses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Basin:2017:KKV, author = "Dmitry Basin and Edward Bortnikov and Anastasia Braginsky and Guy Golan-Gueta and Eshcar Hillel and Idit Keidar and Moshe Sulamy", title = "{KiWi}: a Key--Value Map for Scalable Real-Time Analytics", journal = j-SIGPLAN, volume = "52", number = "8", pages = "357--369", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018761", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern big data processing platforms employ huge in-memory key--value (KV) maps. Their applications simultaneously drive high-rate data ingestion and large-scale analytics. These two scenarios expect KV-map implementations that scale well with both real-time updates and large atomic scans triggered by range queries. We present KiWi, the first atomic KV-map to efficiently support simultaneous large scans and real-time access. The key to achieving this is treating scans as first class citizens,and organizing the data structure around them. KiWi provides wait-free scans, whereas its put operations are lightweight and lock-free. It optimizes memory management jointly with data structure access.We implement KiWi and compare it to state-of-the-art solutions. Compared to other KV-maps providing atomic scans, KiWi performs either long scans or concurrent puts an order of magnitude faster. Its scans are twice as fast as non-atomic ones implemented via iterators in the Java skiplist.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Jiang:2017:GAP, author = "Lin Jiang and Zhijia Zhao", title = "Grammar-aware Parallelization for Scalable {XPath} Querying", journal = j-SIGPLAN, volume = "52", number = "8", pages = "371--383", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018772", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Semi-structured data emerge in many domains, especially in web analytics and business intelligence. However, querying such data is inherently sequential due to the nested structure of input data. Existing solutions pessimistically enumerate all execution paths to circumvent dependencies, yielding sub-optimal performance and limited scalability. This paper presents GAP, a parallelization scheme that, for the first time, leverages the grammar of the input data to boost the parallelization efficiency. GAP leverages static analysis to infer feasible execution paths for specific contexts based on the grammar of the semi-structured data. It can eliminate unnecessary paths without compromising the correctness. In the absence of a pre-defined grammar, GAP switches into a speculative execution mode and takes potentially incomplete grammar extracted either from prior inputs. Together, the dual-mode GAP reduces the execution paths from all paths to a minimum, therefore maximizing the parallelization efficiency and scalability. The benefits of path elimination go beyond reducing extra computation --- it also enables the use of more efficient data structures, which further improves the efficiency. An evaluation on a large set of standard benchmarks with diverse queries shows that GAP yields significant efficiency increase and boosts the speedup of the state-of-the-art from 2.9X to 17.6X on a 20-core machine for a set of 200 queries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Wang:2017:ESC, author = "Xin Wang and Weihua Zhang and Zhaoguo Wang and Ziyun Wei and Haibo Chen and Wenyun Zhao", title = "{Eunomia}: Scaling Concurrent Search Trees under Contention Using {HTM}", journal = j-SIGPLAN, volume = "52", number = "8", pages = "385--399", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018752", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While hardware transactional memory (HTM) has recently been adopted to construct efficient concurrent search tree structures, such designs fail to deliver scalable performance under contention. In this paper, we first conduct a detailed analysis on an HTM-based concurrent B+Tree, which uncovers several reasons for excessive HTM aborts induced by both false and true conflicts under contention. Based on the analysis, we advocate Eunomia, a design pattern for search trees which contains several principles to reduce HTM aborts, including splitting HTM regions with version-based concurrency control to reduce HTM working sets, partitioned data layout to reduce false conflicts, proactively detecting and avoiding true conflicts, and adaptive concurrency control. To validate their effectiveness, we apply such designs to construct a scalable concurrent B+Tree using HTM. Evaluation using key--value store benchmarks on a 20-core HTM-capable multi-core machine shows that Eunomia leads to 5x--11x speedup under high contention, while incurring small overhead under low contention.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Tang:2017:SCM, author = "Xiongchao Tang and Jidong Zhai and Bowen Yu and Wenguang Chen and Weimin Zheng", title = "Self-Checkpoint: an In-Memory Checkpoint Method Using Less Space and Its Practice on Fault-Tolerant {HPL}", journal = j-SIGPLAN, volume = "52", number = "8", pages = "401--413", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018745", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Fault tolerance is increasingly important in high performance computing due to the substantial growth of system scale and decreasing system reliability. In-memory/diskless checkpoint has gained extensive attention as a solution to avoid the IO bottleneck of traditional disk-based checkpoint methods. However, applications using previous in-memory checkpoint suffer from little available memory space. To provide high reliability, previous in-memory checkpoint methods either need to keep two copies of checkpoints to tolerate failures while updating old checkpoints or trade performance for space by flushing in-memory checkpoints into disk. In this paper, we propose a novel in-memory checkpoint method, called self-checkpoint, which can not only achieve the same reliability of previous in-memory checkpoint methods, but also increase the available memory space for applications by almost 50\%. To validate our method, we apply the self-checkpoint to an important problem, fault tolerant HPL. We implement a scalable and fault tolerant HPL based on this new method, called SKT-HPL, and validate it on two large-scale systems. Experimental results with 24,576 processes show that SKT-HPL achieves over 95\% of the performance of the original HPL. Compared to the state-of-the-art in-memory checkpoint method, it improves the available memory size by 47\% and the performance by 5\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Wu:2017:SDC, author = "Panruo Wu and Nathan DeBardeleben and Qiang Guan and Sean Blanchard and Jieyang Chen and Dingwen Tao and Xin Liang and Kaiming Ouyang and Zizhong Chen", title = "Silent Data Corruption Resilient Two-sided Matrix Factorizations", journal = j-SIGPLAN, volume = "52", number = "8", pages = "415--427", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018750", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents an algorithm based fault tolerance method to harden three two-sided matrix factorizations against soft errors: reduction to Hessenberg form, tridiagonal form, and bidiagonal form. These two sided factorizations are usually the prerequisites to computing eigenvalues/eigenvectors and singular value decomposition. Algorithm based fault tolerance has been shown to work on three main one-sided matrix factorizations: LU, Cholesky, and QR, but extending it to cover two sided factorizations is non-trivial because there are no obvious {\it offline, problem} specific maintenance of checksums. We thus develop an {\it online, algorithm} specific checksum scheme and show how to systematically adapt the two sided factorization algorithms used in LAPACK and ScaLAPACK packages to introduce the algorithm based fault tolerance. The resulting ABFT scheme can detect and correct arithmetic errors {\it continuously} during the factorizations that allow timely error handling. Detailed analysis and experiments are conducted to show the cost and the gain in resilience. We demonstrate that our scheme covers a significant portion of the operations of the factorizations. Our checksum scheme achieves high error detection coverage and error correction coverage compared to the state of the art, with low overhead and high scalability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Arbel-Raviv:2017:PRD, author = "Maya Arbel-Raviv and Trevor Brown", title = "{Poster}: Reuse, don't Recycle: Transforming Algorithms that Throw Away Descriptors", journal = j-SIGPLAN, volume = "52", number = "8", pages = "429--430", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019035", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lock-free algorithms guarantee progress by having threads help one another. Complex lock-free operations facilitate helping by creating descriptor objects that describe how other threads should help them. In many lock-free algorithms, a new descriptor is allocated for each operation. After an operation completes, its descriptor must be reclaimed by a memory reclamation scheme. Allocating and reclaiming descriptors introduces significant space and time overhead. We present a transformation for a class of lock-free algorithms that allows each thread to efficiently reuse a single descriptor. Experiments on a variety of workloads show that our transformation yields significant improvements over implementations that reclaim descriptors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Balaji:2017:PAP, author = "Vignesh Balaji and Dhruva Tirumala and Brandon Lucia", title = "{Poster}: an Architecture and Programming Model for Accelerating Parallel Commutative Computations via Privatization", journal = j-SIGPLAN, volume = "52", number = "8", pages = "431--432", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019030", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Synchronization and data movement are the key impediments to an efficient parallel execution. To ensure that data shared by multiple threads remain consistent, the programmer must use synchronization (e.g., mutex locks) to serialize threads' accesses to data. This limits parallelism because it forces threads to sequentially access shared resources. Additionally, systems use cache coherence to ensure that processors always operate on the most up-to-date version of a value even in the presence of private caches. Coherence protocol implementations cause processors to serialize their accesses to shared data, further limiting parallelism and performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Bhattacharyya:2017:PHE, author = "Arnamoy Bhattacharyya and Mike Dai Wang and Mihai Burcea and Yi Ding and Allen Deng and Sai Varikooty and Shafaaf Hossain and Cristiana Amza", title = "{Poster}: {HythTM}: Extending the Applicability of {Intel TSX} Hardware Transactional Support", journal = j-SIGPLAN, volume = "52", number = "8", pages = "433--434", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019027", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this work, we introduce and experimentally evaluate a new hybrid software-hardware Transactional Memory prototype based on Intel's Haswell TSX architecture. Our prototype extends the applicability of the existing hardware support for TM by interposing a hybrid fall-back layer before the sequential, big-lock fall-back path, used by standard TSX-supported solutions in order to guarantee progress. In our experimental evaluation we use SynQuake, a realistic game benchmark modeled after Quake. Our results show that our hybrid transactional system,which we call HythTM, is able to reduce the number of transactions that go to the sequential software layer, hence avoiding hardware transaction aborts and loss of parallelism. HythTM optimizes application throughput and scalability up to 5.05x, when compared to the hardware TM with sequential fall-back path.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Chowdhury:2017:PPE, author = "Rezaul Chowdhury and Pramod Ganapathi and Yuan Tang and Jesmin Jahan Tithi", title = "{Poster}: Provably Efficient Scheduling of Cache-Oblivious Wavefront Algorithms", journal = j-SIGPLAN, volume = "52", number = "8", pages = "435--436", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019031", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Standard cache-oblivious recursive divide-and-conquer algorithms for evaluating dynamic programming recurrences have optimal serial cache complexity but often have lower parallelism compared with iterative wavefront algorithms due to artificial dependencies among subtasks. Very recently cache-oblivious recursive wavefront (COW) algorithms have been introduced which do not have any artificial dependencies. Though COW algorithms are based on fork-join primitives, they extensively use atomic operations, and as a result, performance guarantees provided by state-of-the-art schedulers for programs with fork-join primitives do not apply. In this work, we show how to systematically transform standard cache-oblivious recursive divide-and-conquer algorithms into recursive wavefront algorithms to achieve optimal parallel cache complexity and high parallelism under state-of-the-art schedulers for fork-join programs. Unlike COW algorithms these new algorithms do not use atomic operations. Instead, they use closed-form formulas to compute at what time each recursive function must be launched in order to achieve high parallelism without losing cache performance. The resulting implementations are arguably much simpler than implementations of known COW algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Cohen:2017:PST, author = "Nachshon Cohen and Maurice Herlihy and Erez Petrank and Elias Wald", title = "{Poster}: State Teleportation via Hardware Transactional Memory", journal = j-SIGPLAN, volume = "52", number = "8", pages = "437--438", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019026", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "State teleportation is a new technique for exploiting hardware transactional memory (HTM) to improve existing synchronization and memory management schemes for highly-concurrent data structures. When applied to fine-grained locking, a thread holding the lock for a node launches a hardware transaction that traverses multiple successor nodes, acquires the lock for the last node reached, and releases the lock on the starting node, skipping lock acquisitions for intermediate nodes. When applied to lock-free data structures, a thread visiting a node protected by a hazard pointer launches a hardware transaction that traverses multiple successor nodes, and publishes the hazard pointer only for the last node reached, skipping the memory barriers needed to publish intermediate hazard pointers. Experimental results show that these applications of state teleportation can substantially increase the performance of both lock-based and lock-free data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Dai:2017:PII, author = "Dong Dai and Wei Zhang and Yong Chen", title = "{Poster}: {IOGP}: an Incremental Online Graph Partitioning for Large-Scale Distributed Graph Databases", journal = j-SIGPLAN, volume = "52", number = "8", pages = "439--440", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019037", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large-scale graphs are becoming critical in various domains such as social network, scientific application, knowledge discovery, and even system software, etc. Many of those use cases require large-scale high-performance graph databases, which are designed for serving continuous updates from the clients, and at the same time, answering complex queries towards the current graph in an on-line manner. Those operations in graph databases, also referred as OLTP (online transaction processing) operations, need specific design and implementation in graph partitioning algorithms. In this study, we designed an incremental online graph partitioning (IOGP), optimized for OLTP workloads. It is designed to achieve better locality, generate balanced partitions, and increase the parallelism for accessing hotspots of the graph. Our evaluation results on both real world and synthetic graphs in both simulation and real system confirm a better performance on graph queries (as much as 2X) with small overheads during graph insertion (less than 10\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Firoz:2017:PDC, author = "Jesun Shariar Firoz and Thejaka Amila Kanewala and Marcin Zalewski and Martina Barnas and Andrew Lumsdaine", title = "{Poster}: Distributed Control: The Benefits of Eliminating Global Synchronization via Effective Scheduling", journal = j-SIGPLAN, volume = "52", number = "8", pages = "441--442", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019036", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In distributed computing, parallel overheads such as \emph{synchronization overhead} may hinder performance. We introduce the idea of \emph{Distributed Control} (DC) where global synchronization is reduced to \emph{termination detection} and each worker proceeds ahead optimistically, based on the local knowledge of the global computation. To avoid ``wasted'' work, \DC relies on local work prioritization. However, the work order obtained by local prioritization is susceptible to interference from the runtime. We show that employing effective scheduling policies and optimizations in the runtime, in conjunction with eliminating global barriers, improves performance in two graph applications: single-source shortest paths and connected components.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Jo:2017:PMA, author = "Gangwon Jo and Jaehoon Jung and Jiyoung Park and Jaejin Lee", title = "{Poster}: {MAPA}: an Automatic Memory Access Pattern Analyzer for {GPU} Applications", journal = j-SIGPLAN, volume = "52", number = "8", pages = "443--444", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019034", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Various existing optimization and memory consistency management techniques for GPU applications rely on memory access patterns of kernels. However, they suffer from poor practicality because they require explicit user interventions to extract kernel memory access patterns. This paper proposes an automatic memory-access-pattern analysis framework called MAPA. MAPA is based on a source-level analysis technique derived from traditional symbolic analyses and a run-time pattern selection technique. The experimental results show that MAPA properly analyzes 116 real-world OpenCL kernels from Rodinia and Parboil.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Li:2017:PCO, author = "Shigang Li and Yunquan Zhang and Torsten Hoefler", title = "{Poster}: Cache-Oblivious {MPI} All-to-All Communications on Many-Core Architectures", journal = j-SIGPLAN, volume = "52", number = "8", pages = "445--446", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019025", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the many-core era, the performance of MPI collectives is more dependent on the intra-node communication component. However, the communication algorithms generally inherit from the inter-node version and ignore the cache complexity. We propose cache-oblivious algorithms for MPI all-to-all operations, in which data blocks are copied into the receive buffers in Morton order to exploit data locality. Experimental results on different many-core architectures show that our cache-oblivious implementations significantly outperform the naive implementations based on shared heap and the highly optimized MPI libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Menon:2017:PAL, author = "Harshitha Menon and Kavitha Chandrasekar and Laxmikant V. Kale", title = "{Poster}: Automated Load Balancer Selection Based on Application Characteristics", journal = j-SIGPLAN, volume = "52", number = "8", pages = "447--448", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019033", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many HPC applications require dynamic load balancing to achieve high performance and system utilization. Different applications have different characteristics and hence require different load balancing strategies. Invocation of a suboptimal load balancing strategy can lead to inefficient execution. We propose Meta-Balancer, a framework to automatically decide the best load balancing strategy. It employs randomized decision forests, a machine learning method, to learn a model for choosing the best load balancing strategy for an application represented by a set of features that capture the application characteristics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Moscovici:2017:PGF, author = "Nurit Moscovici and Nachshon Cohen and Erez Petrank", title = "{Poster}: a {GPU}-Friendly Skiplist Algorithm", journal = j-SIGPLAN, volume = "52", number = "8", pages = "449--450", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019032", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a design for a fine-grained lock-based skiplist optimized for Graphics Processing Units (GPUs). While GPUs are often used to accelerate streaming parallel computations, it remains a significant challenge to efficiently offload concurrent computations with more complicated data-irregular access and fine-grained synchronization. Natural building blocks for such computations would be concurrent data structures, such as skiplists, which are widely used in general purpose computations. Our design utilizes array-based nodes which are accessed and updated by warp-cooperative functions, thus taking advantage of the fact that GPUs are most efficient when memory accesses are coalesced and execution divergence is minimized. The proposed design has been implemented, and measurements demonstrate improved performance of up to 2.6x over skiplist designs for the GPU existing today.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Ramalhete:2017:PPM, author = "Pedro Ramalhete and Andreia Correia", title = "{Poster}: Poor Man's {URCU}", journal = j-SIGPLAN, volume = "52", number = "8", pages = "451--452", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019021", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "RCU is, among other things, a well known mechanism for memory reclamation that is meant to be used in languages without an automatic Garbage Collector, unfortunately, it requires operating system support, which is currently provided only in Linux. An alternative is to use Userspace RCU (URCU) which has two variants that can be deployed on other operating systems, named \emph{Memory Barrier} and \emph{Bullet Proof}. We present a novel algorithm that implements the three core APIs of RCU: \texttt{rcu\_read\_lock()}, \texttt{rcu\_read\_unlock()}, and \texttt{synchronize\_rcu()}. Our algorithm uses one mutual exclusion lock and two reader-writer locks with \texttt{trylock()} capabilities, which means it does not need a language with a memory model or atomics API, and as such, it can be easily implemented in almost any language, regardless of the underlying CPU architecture, or operating system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Ramalhete:2017:PWF, author = "Pedro Ramalhete and Andreia Correia", title = "{Poster}: a Wait-Free Queue with Wait-Free Memory Reclamation", journal = j-SIGPLAN, volume = "52", number = "8", pages = "453--454", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019022", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Queues are a widely deployed data structure. They are used extensively in many multi threaded applications, or as a communication mechanism between threads or processes. We propose a new linearizable multi-producer-multi-consumer queue we named Turn queue, with wait-free progress bounded by the number of threads, and with wait-free bounded memory reclamation. Its main characteristics are: a simple algorithm that does no memory allocation apart from creating the node that is placed in the queue, a new wait-free consensus algorithm using only the atomic instruction compare-and-swap (CAS), and is easy to plugin with other algorithms for either enqueue or dequeue methods.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Tang:2017:PSS, author = "Yuan Tang and Ronghui You", title = "{Poster}: {STAR} (Space-Time Adaptive and Reductive) Algorithms for Real-World Space-Time Optimality", journal = j-SIGPLAN, volume = "52", number = "8", pages = "455--456", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019029", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It's important to hit a space-time balance for a real-world algorithm to achieve high performance on modern shared-memory multi-core or many-core systems. However, a large class of dynamic programs with more than $ O(1) $ dependency achieve optimality either in space or time, but not both. In the literature, the problem is known as the fundamental space-time tradeoff. By exploiting properly on the runtime system, we show that our STAR (Space-Time Adaptive and Reductive) technique can help these dynamic programs to achieve sublinear parallel time bounds while still maintaining work-, space-, and cache-optimality in a processor- and cache-oblivious fashion.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Wu:2017:PRP, author = "Mingyu Wu and Haibing Guan and Binyu Zang and Haibo Chen", title = "{Poster}: Recovering Performance for Vector-based Machine Learning on Managed Runtime", journal = j-SIGPLAN, volume = "52", number = "8", pages = "457--458", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019039", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Zhang:2017:PPC, author = "Minjia Zhang and Swarnendu Biswas and Michael D. Bond", title = "{Poster}: On the Problem of Consistency Exceptions in the Context of Strong Memory Models", journal = j-SIGPLAN, volume = "52", number = "8", pages = "459--460", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019024", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This work considers the problem of availability for memory models that throw consistency exceptions. We define a new memory model called RIx based on isolation of synchronization-free regions and a new approach called Avalon that provides RIx. Our evaluation shows that Avalon and RIx substantially reduce consistency exceptions, by 1-3 orders of magnitude and sometimes eliminate them completely. Furthermore, our exploration provides new, compelling points in the performance-availability tradeoff space.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Zhao:2017:PIH, author = "Yue Zhao and Chunhua Liao and Xipeng Shen", title = "{Poster}: an Infrastructure for {HPC} Knowledge Sharing and Reuse", journal = j-SIGPLAN, volume = "52", number = "8", pages = "461--462", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019023", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a prototype infrastructure for addressing the barriers for effective accumulation, sharing, and reuse of the various types of knowledge for high performance parallel computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '17 conference proceedings.", } @Article{Shen:2017:BGB, author = "Xipeng Shen", title = "Bridging the gap between memory performance and massive parallelism: the critical role of programming systems innovations (keynote)", journal = j-SIGPLAN, volume = "52", number = "9", pages = "1--1", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This talk examines some trends in the modern developments of memory systems and their relations with the massive parallelism in processors and applications. It then draws on some recent work on GPU to explain the important role of programming systems in bridging the gap; it particularly emphasizes the importance of innovations for enabling better software controllability, more software elasticity, and inter-thread data locality enhancements. The talk further discusses the implications brought to programming systems by the increasingly blurred boundaries among memory, storage, and processing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Bruno:2017:NPG, author = "Rodrigo Bruno and Lu{\'\i}s Picciochi Oliveira and Paulo Ferreira", title = "{NG2C}: pretenuring garbage collection with dynamic generations for {HotSpot} big data applications", journal = j-SIGPLAN, volume = "52", number = "9", pages = "2--13", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092272", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Big Data applications suffer from unpredictable and unacceptably high pause times due to Garbage Collection (GC). This is the case in latency-sensitive applications such as on-line credit-card fraud detection, graph-based computing for analysis on social networks, etc. Such pauses compromise latency requirements of the whole application stack and result from applications' aggressive buffering/caching of data, exposing an ill-suited GC design, which assumes that most objects will die young and does not consider that applications hold large amounts of middle-lived data in memory. To avoid such pauses, we propose NG2C, a new GC algorithm that combines pretenuring with user-defined dynamic generations. By being able to allocate objects into different generations, NG2C is able to group objects with similar lifetime profiles in the same generation. By allocating objects with similar lifetime profiles close to each other, i.e. in the same generation, we avoid object promotion (copying between generations) and heap fragmentation (which leads to heap compactions) both responsible for most of the duration of HotSpot GC pause times. NG2C is implemented for the OpenJDK 8 HotSpot Java Virtual Machine, as an extension of the Garbage First GC. We evaluate NG2C using Cassandra, Lucene, and GraphChi with three different GCs: Garbage First (G1), Concurrent Mark Sweep (CMS), and NG2C. Results show that NG2C decreases the worst observable GC pause time by up to 94.8\% for Cassandra, 85.0\% for Lucene and 96.45\% for GraphChi, when compared to current collectors (G1 and CMS). In addition, NG2c has no negative impact on application throughput or memory usage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Yang:2017:TAA, author = "Albert Mingkun Yang and Tobias Wrigstad", title = "Type-assisted automatic garbage collection for lock-free data structures", journal = j-SIGPLAN, volume = "52", number = "9", pages = "14--24", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092274", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce Isolde, an automatic garbage collection scheme designed specifically for managing memory in lock-free data structures, such as stacks, lists, maps and queues. Isolde exists as a plug-in memory manager, designed to sit on-top of another memory manager, and use it's allocator and reclaimer (if exists). Isolde treats a lock-free data structure as a logical heap, isolated from the rest of the program. This allows garbage collection outside of Isolde to take place without affecting the lock-free data structure. Isolde further manages objects allocated on a Isolde heap in a fully concurrent manner, allowing garbage collection to incrementally remove garbage without stopping other threads doing work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Vrvilo:2017:MDF, author = "Nick Vrvilo and Lechen Yu and Vivek Sarkar", title = "A marshalled data format for pointers in relocatable data blocks", journal = j-SIGPLAN, volume = "52", number = "9", pages = "25--35", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092276", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As future computing hardware progresses towards extreme-scale technology, new challenges arise for addressing heterogeneous compute and memory resources, for providing application resilience in the presence of more frequent failures, and for working within strict energy constraints. While C++ has gained popularity in recent years within the HPC community, some concepts of object-oriented program design may be at odds with the techniques we use to address the challenges of extreme-scale computing. In this work, we focus on the challenges related to using aggregate data structures that include pointer values within a programming model where the runtime may frequently relocate data, and traditional serialization techniques are not practical. We propose and evaluate a marshalled encoding for relocatable data blocks, and present a C++ library and other tools to simplify the work of the application programmer developing new applications or porting existing applications to such emerging programming models.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Liu:2017:FEM, author = "Zhengyang Liu and John Criswell", title = "Flexible and efficient memory object metadata", journal = j-SIGPLAN, volume = "52", number = "9", pages = "36--46", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092268", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Compiler-based tools can protect software from attack and find bugs within programs. To support programs written in type-unsafe languages such as C, such tools need to add code into a program that must, at run-time, take a pointer into a memory object and locate metadata for that memory object. Current methods of locating metadata are either flexible (supporting metadata of varying sizes) at the expense of speed and scalability or are fast (e.g., by using shadow tables) at the cost of flexibility (metadata is small and must always be the same size). This paper presents a new method of attaching metadata to memory objects, named Padding Area MetaData (PAMD), that is both flexible and efficient. Metadata can be any size, and different memory objects can have different sized metadata. While flexible, the algorithm for finding the metadata given a pointer into the memory object takes constant time. Our method extends Baggy Bounds with Accurate Checking (BBAC) which attaches constant-sized metadata to memory objects for performing precise dynamic bounds checks. Our design supports variable-sized metadata, and our implementation supports larger programs. We evaluated the performance and scalability of PAMD using dynamic bounds checking as an exemplar of our method. Our results show that our method adds at most 33\% overhead to an identical dynamic bounds checking tool that trades precision for performance by using a simple shadow table. Our results also show that our method, while having the same flexibility as splay trees, performs significantly faster and scales better as a program allocates more memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Vorobyov:2017:SSE, author = "Kostyantyn Vorobyov and Julien Signoles and Nikolai Kosmatov", title = "Shadow state encoding for efficient monitoring of block-level properties", journal = j-SIGPLAN, volume = "52", number = "9", pages = "47--58", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092269", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Memory shadowing associates addresses from an application's memory to values stored in a disjoint memory space called shadow memory. At runtime shadow values store metadata about application memory locations they are mapped to. Shadow state encodings --- the structure of shadow values and their interpretation --- vary across different tools. Encodings used by the state-of-the-art monitoring tools have been proven useful for tracking memory at a byte-level, but cannot address properties related to memory block boundaries. Tracking block boundaries is however crucial for spatial memory safety analysis, where a spatial violation such as out-of-bounds access, may dereference an allocated location belonging to an adjacent block or a different struct member. This paper describes two novel shadow state encodings which capture block-boundary-related properties. These encodings have been implemented in E-ACSL --- a runtime verification tool for C programs. Initial experiments involving checking validity of pointer and array accesses in computationally intensive runs of programs selected from SPEC CPU benchmarks demonstrate runtime and memory overheads comparable to state-of-the-art memory debuggers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Dashti:2017:AMM, author = "Mohammad Dashti and Alexandra Fedorova", title = "Analyzing memory management methods on integrated {CPU--GPU} systems", journal = j-SIGPLAN, volume = "52", number = "9", pages = "59--69", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092256", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous systems that integrate a multicore CPU and a GPU on the same die are ubiquitous. On these systems, both the CPU and GPU share the same physical memory as opposed to using separate memory dies. Although integration eliminates the need to copy data between the CPU and the GPU, arranging transparent memory sharing between the two devices can carry large overheads. Memory on CPU/GPU systems is typically managed by a software framework such as OpenCL or CUDA, which includes a runtime library, and communicates with a GPU driver. These frameworks offer a range of memory management methods that vary in ease of use, consistency guarantees and performance. In this study, we analyze some of the common memory management methods of the most widely used software frameworks for heterogeneous systems: CUDA, OpenCL 1.2, OpenCL 2.0, and HSA, on NVIDIA and AMD hardware. We focus on performance/functionality trade-offs, with the goal of exposing their performance impact and simplifying the choice of memory management methods for programmers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Giles:2017:CCH, author = "Ellis Giles and Kshitij Doshi and Peter Varman", title = "Continuous checkpointing of {HTM} transactions in {NVM}", journal = j-SIGPLAN, volume = "52", number = "9", pages = "70--81", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092270", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper addresses the challenges of coupling byte addressable non-volatile memory (NVM) and hardware transaction memory (HTM) in high-performance transaction processing. We first show that HTM transactions can be ordered using existing processor instructions without any hardware changes. In contrast, existing solutions posit changes to HTM mechanisms in the form of special instructions or modified functionality. We exploit the ordering mechanism to design a novel persistence method that decouples HTM concurrency from back-end NVM operations. Failure atomicity is achieved using redo logging coupled with aliasing to guard against mistimed cache evictions. Our algorithm uses efficient lock-free mechanisms with bounded static memory requirements. We evaluated our approach using both micro-benchmarks, and, benchmarks in the STAMP suite, and showed that it compares well with standard (volatile) HTM transactions. We also showed that it yields significant gains in throughput and latency in comparison with persistent transactional locking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Peng:2017:RTD, author = "Ivy Bo Peng and Roberto Gioiosa and Gokcen Kestor and Pietro Cicotti and Erwin Laure and Stefano Markidis", title = "{RTHMS}: a tool for data placement on hybrid memory system", journal = j-SIGPLAN, volume = "52", number = "9", pages = "82--91", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092273", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Traditional scientific and emerging data analytics applications require fast, power-efficient, large, and persistent memories. Combining all these characteristics within a single memory technology is expensive and hence future supercomputers will feature different memory technologies side-by-side. However, it is a complex task to program hybrid-memory systems and to identify the best object-to-memory mapping. We envision that programmers will probably resort to use default configurations that only require minimal interventions on the application code or system settings. In this work, we argue that intelligent, fine-grained data placement can achieve higher performance than default setups. We present an algorithm for data placement on hybrid-memory systems. Our algorithm is based on a set of single-object allocation rules and global data placement decisions. We also present RTHMS, a tool that implements our algorithm and provides recommendations about the object-to-memory mapping. Our experiments on a hybrid memory system, an Intel Knights Landing processor with DRAM and HBM, show that RTHMS is able to achieve higher performance than the default configuration. We believe that RTHMS will be a valuable tool for programmers working on complex hybrid-memory systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Kanvar:2017:WNG, author = "Vini Kanvar and Uday P. Khedker", title = "``{What}'s in a name?'' going beyond allocation site names in heap analysis", journal = j-SIGPLAN, volume = "52", number = "9", pages = "92--103", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092267", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A points-to analysis computes a sound abstraction of heap memory conventionally using a name-based abstraction that summarizes runtime memory by grouping locations using the names of allocation sites: All concrete heap locations allocated by the same statement are grouped together. The locations in the same group are treated alike i.e., a pointer to any one location of the group is assumed to point to every location in the group leading to an over-approximation of points-to relations. We propose an access-based abstraction that partitions each name-based group of locations into equivalence classes at every program point using an additional criterion of the sets of access paths (chains of pointer indirections) reaching the locations in the memory. The intuition is that the locations that are both allocated and accessed alike should be grouped into the same equivalence class. Since the access paths in the memory could reach different locations at different program points, our groupings change flow sensitively unlike the name-based groupings. This creates a more precise view of the memory. Theoretically, it is strictly more precise than the name-based abstraction except in some trivial cases; practically it is far more precise. Our empirical measurements show the benefits of our tool Access-Based Heap Analyzer (ABHA) on SPEC CPU 2006 and heap manipulating SV-COMP benchmarks. ABHA, which is field-, flow-, and context-sensitive, scales to 20 kLoC and can improve the precision even up to 99\% (in terms of the number of aliases). Additionally, ABHA allows any user-defined summarization of an access path to be plugged in; we have implemented and evaluated four summarization techniques. ABHA can also act as a front-end to TVLA, a parametrized shape analyzer, in order to automate its parametrization by generating predicates that capture the program behaviour more accurately.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Fang:2017:RHF, author = "Bin Fang and Mihaela Sighireanu", title = "A refinement hierarchy for free list memory allocators", journal = j-SIGPLAN, volume = "52", number = "9", pages = "104--114", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092275", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Existing implementations of dynamic memory allocators (DMA) employ a large spectrum of policies and techniques. The formal specifications of these techniques are quite complicated in isolation and very complex when combined. Therefore, the formal reasoning on a specific DMA implementation is difficult for automatic tools and mostly single-use. This paper proposes a solution to this problem by providing formal models for a full class of DMA, the free list class. To obtain manageable formal reasoning and reusable formal models, we organize these models in a hierarchy ranked by refinement relations. We prove the soundness of models and refinement relations using an off-the-shelf theorem prover. We demonstrate that our hierarchy is a basis for an algorithm theory for the class of free list DMA: it abstracts various existing implementations of DMA and leads to new DMA implementations. We illustrate its application to model-based code generation, testing, run-time verification, and static analysis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Zhang:2017:ACE, author = "Minjia Zhang and Swarnendu Biswas and Michael D. Bond", title = "Avoiding consistency exceptions under strong memory models", journal = j-SIGPLAN, volume = "52", number = "9", pages = "115--127", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092271", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Shared-memory languages and systems generally provide weak or undefined semantics for executions with data races. Prior work has proposed memory consistency models that ensure well-defined, easy-to-understand semantics based on region serializability (RS), but the resulting system may throw a consistency exception in the presence of a data race. Consistency exceptions can occur unexpectedly even in well-tested programs, hurting availability and thus limiting the practicality of RS-based memory models. To our knowledge, this paper is the first to consider the problem of availability for memory consistency models that throw consistency exceptions. We first extend existing approaches that enforce RSx, a memory model based on serializability of synchronization-free regions (SFRs), to avoid region conflicts and thus consistency exceptions. These new approaches demonstrate both the potential for and limitations of avoiding consistency exceptions under RSx. To improve availability further, we introduce (1) a new memory model called RIx based on isolation of SFRs and (2) a new approach called Avalon that provides RIx. We demonstrate two variants of Avalon that offer different performance--availability tradeoffs for RIx. An evaluation on real Java programs shows that this work's novel approaches are able to reduce consistency exceptions, thereby improving the applicability of strong memory consistency models. Furthermore, the approaches provide compelling points in the performance--availability tradeoff space for memory consistency enforcement. RIx and Avalon thus represent a promising direction for tackling the challenge of availability under strong consistency models that throw consistency exceptions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '17 conference proceedings.", } @Article{Remy:2017:OEP, author = "Didier R{\'e}my", title = "{Ornaments}: exploiting parametricity for safer, more automated code refactorization and code reuse (invited talk)", journal = j-SIGPLAN, volume = "52", number = "10", pages = "1--1", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3127333", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Inductive datatypes and parametric polymorphism are two key features introduced in the ML family of languages, which have already been widely exploited for structuring programs: Haskell and ML programs are often more elegant and more correct by construction. Still, we sometimes need code to be refactored or adapted to be reused in a slightly different context. While the type system is considerably helpful in these situations, by automatically locating type-inconsistent program points or incomplete pattern matchings, this process could be made safer and more automated by further exploiting parametricity. We propose a posteriori program abstraction as a principle for such code transformations. We apply this principle to ornamentation which is a way to describe changes in datatype definitions reorganizing, adding, or dropping some pieces of data so that functions operating on the bare definition can be partially and sometimes totally lifted into functions operating on the ornamented structure. We view ornamentation as an a posteriori abstraction of the bare code, called a generic lifting, which can then be instantiated into a concrete lifting, meta-reduced, and simplified. Both the source and target code live in core ML while the lifted code lives in a meta-language above ML equipped with a limited form of dependent types needed to capture some invariants of the generic lifting so that the concrete lifting can be simplified back into an ML program. Importantly, the lifted code can be closely related to the bare code, using logical relations thanks to the generic lifting detour. Different, typical use cases of ornaments will be shown and the approach will be mainly illustrated on examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Mokhov:2017:AGC, author = "Andrey Mokhov", title = "Algebraic graphs with class (functional pearl)", journal = j-SIGPLAN, volume = "52", number = "10", pages = "2--13", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122956", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The paper presents a minimalistic and elegant approach to working with graphs in Haskell. It is built on a rigorous mathematical foundation --- an algebra of graphs --- that allows us to apply equational reasoning for proving the correctness of graph transformation algorithms. Algebraic graphs let us avoid partial functions typically caused by `malformed graphs' that contain an edge referring to a non-existent vertex. This helps to liberate APIs of existing graph libraries from partial functions. The algebra of graphs can represent directed, undirected, reflexive and transitive graphs, as well as hypergraphs, by appropriately choosing the set of underlying axioms. The flexibility of the approach is demonstrated by developing a library for constructing and transforming polymorphic graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Blazevic:2017:PPP, author = "Mario Blazevi{\'c} and Jacques L{\'e}gar{\'e}", title = "Packrats parse in packs", journal = j-SIGPLAN, volume = "52", number = "10", pages = "14--25", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122958", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel but remarkably simple formulation of formal language grammars in Haskell as functions mapping a record of production parsers to itself. Thus formulated grammars are first-class objects, composable and reusable. We also provide a simple parser implementation for them, based on an improved packrat algorithm. In order to make the grammar manipulation code reusable, we introduce a set of type classes mirroring the existing type classes from Haskell base library, but whose methods have rank-2 types.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Lampropoulos:2017:ORU, author = "Leonidas Lampropoulos and Antal Spector-Zabusky and Kenneth Foner", title = "Ode on a random urn (functional pearl)", journal = j-SIGPLAN, volume = "52", number = "10", pages = "26--37", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122959", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the urn, a simple tree-based data structure that supports sampling from and updating discrete probability distributions in logarithmic time. We avoid the usual complexity of traditional self-balancing binary search trees by not keeping values in a specific order. Instead, we keep the tree maximally balanced at all times using a single machine word of overhead: its size. Urns provide an alternative interface for the frequency combinator from the QuickCheck library that allows for asymptotically more efficient sampling from dynamically-updated distributions. They also facilitate backtracking in property-based random testing, and can be applied to such complex examples from the literature as generating well-typed lambda terms or information flow machine states, demonstrating significant speedups.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Algehed:2017:QLT, author = "Maximilian Algehed and Koen Claessen and Moa Johansson and Nick Smallbone", title = "{QuickSpec}: a lightweight theory exploration tool for programmers (system demonstration)", journal = j-SIGPLAN, volume = "52", number = "10", pages = "38--39", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122960", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This document gives the outline of a system demonstration for the QuickSpec theory exploration tool.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Braquehais:2017:SDC, author = "Rudy Braquehais and Colin Runciman", title = "{Speculate}: discovering conditional equations and inequalities about black-box functions by reasoning from test results", journal = j-SIGPLAN, volume = "52", number = "10", pages = "40--51", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122961", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents Speculate, a tool that automatically conjectures laws involving conditional equations and inequalities about Haskell functions. Speculate enumerates expressions involving a given collection of Haskell functions, testing to separate those expressions into apparent equivalence classes. Expressions in the same equivalence class are used to conjecture equations. Representative expressions of different equivalence classes are used to conjecture conditional equations and inequalities. Speculate uses lightweight equational reasoning based on term rewriting to discard redundant laws and to avoid needless testing. Several applications demonstrate the effectiveness of Speculate.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Wiegley:2017:UCW, author = "John Wiegley and Benjamin Delaware", title = "Using {Coq} to write fast and correct {Haskell}", journal = j-SIGPLAN, volume = "52", number = "10", pages = "52--62", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122962", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Correctness and performance are often at odds in the field of systems engineering, either because correct programs are too costly to write or impractical to execute, or because well-performing code involves so many tricks of the trade that formal analysis is unable to isolate the main properties of the algorithm. As a prime example of this tension, Coq is an established proof environment that allows writing correct, dependently-typed code, but it has been criticized for exorbitant development times, forcing the developer to choose between optimal code or tractable proofs. On the other side of the divide, Haskell has proven itself to be a capable, well-typed programming environment, yet easy-to-read, straightforward code must all too often be replaced by highly optimized variants that obscure the author's original intention. This paper builds on the existing Fiat refinement framework to bridge this divide, demonstrating how to derive a correct-by-construction implementation that meets (or exceeds) the performance characteristics of highly optimized Haskell, starting from a high-level Coq specification. To achieve this goal, we extend Fiat with a stateful notion of refinement of abstract data types and add support for extracting stateful code via a free monad equipped with an algebra of heap-manipulating operations. As a case study, we reimplement a subset of the popular bytestring library, with little to no loss of performance, while retaining a high guarantee of program correctness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Vazou:2017:TTP, author = "Niki Vazou and Leonidas Lampropoulos and Jeff Polakow", title = "A tale of two provers: verifying monoidal string matching in liquid {Haskell} and {Coq}", journal = j-SIGPLAN, volume = "52", number = "10", pages = "63--74", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122963", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "We demonstrate for the first time that Liquid Haskell, a refinement type checker for Haskell programs, can be used for arbitrary theorem proving by verifying a parallel, monoidal string matching algorithm implemented in Haskell. We use refinement types to specify correctness properties, Haskell terms to express proofs of these properties, and Liquid Haskell to check the proofs. We evaluate Liquid Haskell as a theorem prover by replicating our 1428 LoC proof in a dependently-typed language (Coq --- 1136 LoC). Finally, we compare both proofs, uncovering the relative advantages and disadvantages of the two provers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Ekblad:2017:MED, author = "Anton Ekblad", title = "A meta-{EDSL} for distributed web applications", journal = j-SIGPLAN, volume = "52", number = "10", pages = "75--85", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122969", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a domain-specific language for constructing and configuring web applications distributed across any number of networked, heterogeneous systems. Our language is embedded in Haskell, provides a common framework for integrating components written in third-party EDSLs, and enables type-safe, access-controlled communication between nodes, as well as effortless sharing and movement of functionality between application components. We give an implementation of our language and demonstrate its applicability by using it to implement several important components of distributed web applications, including RDBMS integration, load balancing, and fine-grained sandboxing of untrusted third party code. The rising popularity of cloud computing and heterogeneous computer architectures is putting a strain on conventional programming models, which commonly assume that one application executes on one machine, or at best on one out of several identical machines. With our language, we take the first step towards a programming model better suited for a computationally multicultural future.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Dawson:2017:CNS, author = "Justin Dawson and Mark Grebe and Andy Gill", title = "Composable network stacks and remote monads", journal = j-SIGPLAN, volume = "52", number = "10", pages = "86--97", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122968", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Monads and applicative functors are two ways that Haskell programmers bundle effectful primitives into effectful program fragments. In this paper, we investigate using monads and applicative functors to bundle remote effectful primitives, specifically aiming to amortize the cost of remote communications using bundling. We look at several ways of maximizing the bundling of primitives, drawing from the remote monad design pattern and Haxl system, and provide a taxonomy of mechanism for amortization, with examples. The result of this investigation is that monadic fragments can be efficiently bundled into packets, almost for free, when given a user-supplied packet transportation mechanism, and the primitives obey some simple pre- and post-conditions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Quick:2017:AMH, author = "Donya Quick", title = "Algorithmic music in {Haskell} (invited talk)", journal = j-SIGPLAN, volume = "52", number = "10", pages = "98--98", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3127334", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional programming is becoming increasingly popular in artistic areas such as algorithmic music composition. Euterpea and Kulitta are two libraries for working with music in Haskell. Euterpea is a library for representing and manipulating basic musical structures, and is useful both in a pedagogical setting to teach functional programming through the arts and as a tool to create complex pieces of algorithmic music. Kulitta is a framework for automated composition that addresses music at a more abstract level than Euterpea, capturing aspects of musical style through geometric models and probabilistic grammars. Both of these libraries leverage Haskell's pure functional nature and strong type system to achieve versatile, yet concise designs that allow the creation of diverse and interesting music. Features from these libraries have also been integral in the design of newer systems for natural language processing and artificial intelligence in the musical domain. This talk will explore challenges presented by creating these kinds of domain-specific embedded languages for working with music, and how taking functional approaches to them yields elegant solutions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Szamozvancev:2017:WTM, author = "Dmitrij Szamozvancev and Michael B. Gale", title = "Well-typed music does not sound wrong (experience report)", journal = j-SIGPLAN, volume = "52", number = "10", pages = "99--104", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122964", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Music description and generation are popular use cases for Haskell, ranging from live coding libraries to automatic harmonisation systems. Some approaches use probabilistic methods, others build on the theory of Western music composition, but there has been little work done on checking the correctness of musical pieces in terms of voice leading, harmony, and structure. Haskell's recent additions to the type-system now enable us to perform such analysis statically. We present our experience of implementing a type-level model of classical music and an accompanying EDSL which enforce the rules of classical music at compile-time, turning composition mistakes into compiler errors. Along the way, we discuss the strengths and limitations of doing this in Haskell and demonstrate that the type system of the language is fully capable of expressing non-trivial and practical logic specific to a particular domain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Perez:2017:BFT, author = "Ivan Perez", title = "Back to the future: time travel in {FRP}", journal = j-SIGPLAN, volume = "52", number = "10", pages = "105--116", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122957", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional Reactive Programming (FRP) allows interactive applications to be modelled in a declarative manner using time-varying values. For practical reasons, however, operational constraints are often imposed, such as having a fixed time domain, time always flowing forward, and limiting the exploration of the past. In this paper we show how these constraints can be overcome, giving local control over the time domain, the direction of time and the sampling step. We study the behaviour of FRP expressions when time flows backwards, and demonstrate how to synchronize subsystems running asynchronously and at different sampling rates. We have verified the practicality of our approach with two non-trivial games in which time control is central to the gameplay.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Paykin:2017:LM, author = "Jennifer Paykin and Steve Zdancewic", title = "The Linearity Monad", journal = j-SIGPLAN, volume = "52", number = "10", pages = "117--132", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122965", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce a technique for programming with domain-specific linear languages using the monad that arises from the theory of linear/non-linear logic. In this work we interpret the linear/non-linear model as a simple, effectful linear language embedded inside an existing non-linear host language. We implement a modular framework for defining these linear EDSLs in Haskell, allowing both shallow and deep embeddings. To demonstrate the effectiveness of the framework and the linearity monad, we implement languages for file handles, mutable arrays, session types, and quantum computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Karachalias:2017:EFD, author = "Georgios Karachalias and Tom Schrijvers", title = "Elaboration on functional dependencies: functional dependencies are dead, long live functional dependencies!", journal = j-SIGPLAN, volume = "52", number = "10", pages = "133--147", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional dependencies are a popular extension to Haskell's type-class system because they provide fine-grained control over type inference, resolve ambiguities and even enable type-level computations. Unfortunately, several aspects of Haskell's functional dependencies are ill-understood. In particular, the GHC compiler does not properly enforce the functional dependency property, and rejects well-typed programs because it does not know how to elaborate them into its core language, System F$_C$. This paper presents a novel formalization of functional dependencies that addresses these issues: We explicitly capture the functional dependency property in the type system, in the form of explicit type equalities. We also provide a type inference algorithm and an accompanying elaboration strategy which allows all well-typed programs to be elaborated into System F$_C$.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Bottu:2017:QCC, author = "Gert-Jan Bottu and Georgios Karachalias and Tom Schrijvers and Bruno C. d. S. Oliveira and Philip Wadler", title = "Quantified class constraints", journal = j-SIGPLAN, volume = "52", number = "10", pages = "148--161", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122967", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Quantified class constraints have been proposed many years ago to raise the expressive power of type classes from Horn clauses to the universal fragment of Hereditary Harrop logic. Yet, while it has been much asked for over the years, the feature was never implemented or studied in depth. Instead, several workarounds have been proposed, all of which are ultimately stopgap measures. This paper revisits the idea of quantified class constraints and elaborates it into a practical language design. We show the merit of quantified class constraints in terms of more expressive modeling and in terms of terminating type class resolution. In addition, we provide a declarative specification of the type system as well as a type inference algorithm that elaborates into System F. Moreover, we discuss termination conditions of our system and also provide a prototype implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Aronsson:2017:HSC, author = "Markus Aronsson and Mary Sheeran", title = "Hardware software co-design in {Haskell}", journal = j-SIGPLAN, volume = "52", number = "10", pages = "162--173", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122970", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a library in Haskell for programming Field Programmable Gate Arrays (FPGAs), including hardware software co-design. Code for software (in C) and hardware (in VHDL) is generated from a single program, along with the code to support communication between hardware and software. We present type-based techniques for the simultaneous implementation of more than one embedded domain specific language (EDSL). We build upon a generic representation of imperative programs that is loosely coupled to instruction and expression types, allowing the individual parts to be developed and improved separately. Code generation is implemented as a series of translations between progressively smaller, typed EDSLs, safeguarding against errors that arise in untyped translations. Initial case studies show promising performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Clifton-Everest:2017:SIA, author = "Robert Clifton-Everest and Trevor L. McDonell and Manuel M. T. Chakravarty and Gabriele Keller", title = "Streaming irregular arrays", journal = j-SIGPLAN, volume = "52", number = "10", pages = "174--185", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122971", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Previous work has demonstrated that it is possible to generate efficient and highly parallel code for multicore CPUs and GPUs from combinator-based array languages for a range of applications. That work, however, has been limited to operating on flat, rectangular structures without any facilities for irregularity or nesting. In this paper, we show that even a limited form of nesting provides substantial benefits both in terms of the expressiveness of the language (increasing modularity and providing support for simple irregular structures) and the portability of the code (increasing portability across resource-constrained devices, such as GPUs). Specifically, we generalise Blelloch's flattening transformation along two lines: (1) we explicitly distinguish between definitely regular and potentially irregular computations; and (2) we handle multidimensional arrays. We demonstrate the utility of this generalisation by an extension of the embedded array language Accelerate to include irregular streams of multidimensional arrays. We discuss code generation, optimisation, and irregular stream scheduling as well as a range of benchmarks on both multicore CPUs and GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Yates:2017:ISP, author = "Ryan Yates and Michael L. Scott", title = "Improving {STM} performance with transactional structs", journal = j-SIGPLAN, volume = "52", number = "10", pages = "186--196", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122972", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software transactional memory (STM) has made it significantly easier to write correct concurrent programs in Haskell. Its performance, however, is limited by several inefficiencies. While safe concurrent computations are easy to express in Haskell's STM, concurrent data structures suffer unfortunate bloat in the implementation due to an extra level of indirection for mutable references as well as the inability to express unboxed mutable transactional values. We address these deficiencies by introducing {$<$ pre$>$TStruct$<$}/{pre$>$} to the GHC run-time system, allowing strict unboxed transactional values as well as mutable references without an extra indirection. Using {$<$ pre$>$TStruct$<$}/{pre$>$} we implement several data structures, discuss their design, and provide benchmark results on a large multicore machine. Our benchmarks show that concurrent data structures built with {$<$ pre$>$TStruct$<$}/{pre$>$} out-scale and out-perform their {$<$ pre$>$TVar$<$}/{pre$>$}-based equivalents.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Chen:2017:ALF, author = "Chao-Hong Chen and Vikraman Choudhury and Ryan R. Newton", title = "Adaptive lock-free data structures in {Haskell}: a general method for concurrent implementation swapping", journal = j-SIGPLAN, volume = "52", number = "10", pages = "197--211", month = oct, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156695.3122973", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A key part of implementing high-level languages is providing built- in and default data structures. Yet selecting good defaults is hard. A mutable data structure's workload is not known in advance, and it may shift over its lifetime --- e.g., between read-heavy and write-heavy, or from heavy contention by multiple threads to single-threaded or low-frequency use. One idea is to switch implementations adaptively, but it is nontrivial to switch the implementation of a concurrent data structure at runtime. Performing the transition requires a concurrent snapshot of data structure contents, which normally demands special engineering in the data structure's design. However, in this paper we identify and formalize an relevant property of lock-free algorithms. Namely, lock-freedom is sufficient to guarantee that freezing memory locations in an arbitrary order will result in a valid snapshot. Several functional languages have data structures that freeze and thaw, transitioning between mutable and immutable, such as Haskell vectors and Clojure transients, but these enable only single-threaded writers. We generalize this approach to augment an arbitrary lock-free data structure with the ability to gradually freeze and optionally transition to a new representation. This augmentation doesn't require changing the algorithm or code for the data structure, only replacing its datatype for mutable references with a freezable variant. In this paper, we present an algorithm for lifting plain to adaptive data and prove that the resulting hybrid data structure is itself lock-free, linearizable, and simulates the original. We also perform an empirical case study in the context of heating up and cooling down concurrent maps.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '17 conference proceedings.", } @Article{Pizlo:2017:JVM, author = "Filip Pizlo", title = "The {JavaScriptCore} virtual machine (invited talk)", journal = j-SIGPLAN, volume = "52", number = "11", pages = "1--1", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3148567", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "JavaScriptCore (JSC) is an open-source high-performance implementation of JavaScript. JSC is used in the WebKit open source browser engine as well as a system framework on macOS and iOS. This talk will give a broad high-level overview of JSC's performance-oriented architecture, including specific details about the object model, garbage collector, optimizing compilers, type inference, and deoptimization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Siek:2017:CPT, author = "Jeremy Siek", title = "Challenges and progress toward efficient gradual typing (invited talk)", journal = j-SIGPLAN, volume = "52", number = "11", pages = "2--2", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3148570", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mixing static and dynamic type checking in the same language is catching on, with the TypeScript and Flow variants of JavaScript, the MyPy and Reticulated variants of Python, the Strongtalk and Gradualtalk variants of Smalltalk, as well as Typed Racket, Typed Clojure, and Perl 6. The gradual typing approach to such mixing seeks to protect the statically typed code from the dynamically typed code, allowing compilers to leverage type information when optimizing the static code. Unfortunately, ensuring soundness requires runtime checking at the boundaries of typed and untyped code, and the cost of this checking can drown out the performance benefits of optimization. For example, in Typed Racket, some partially typed programs are 1000X slower than the untyped or fully typed version of the same program. But all is not lost! In this talk I present the results of ongoing research to tame the runtime overheads of gradual typing in the context of a prototype compiler, named Grift, that we are developing at Indiana University.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Marr:2017:CAP, author = "Stefan Marr and Carmen Torres Lopez and Dominik Aumayr and Elisa Gonzalez Boix and Hanspeter M{\"o}ssenb{\"o}ck", title = "A concurrency-agnostic protocol for multi-paradigm concurrent debugging tools", journal = j-SIGPLAN, volume = "52", number = "11", pages = "3--14", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133842", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today's complex software systems combine high-level concurrency models. Each model is used to solve a specific set of problems. Unfortunately, debuggers support only the low-level notions of threads and shared memory, forcing developers to reason about these notions instead of the high-level concurrency models they chose. This paper proposes a concurrency-agnostic debugger protocol that decouples the debugger from the concurrency models employed by the target application. As a result, the underlying language runtime can define custom breakpoints, stepping operations, and execution events for each concurrency model it supports, and a debugger can expose them without having to be specifically adapted. We evaluated the generality of the protocol by applying it to SOMns, a Newspeak implementation, which supports a diversity of concurrency models including communicating sequential processes, communicating event loops, threads and locks, fork/join parallelism, and software transactional memory. We implemented 21 breakpoints and 20 stepping operations for these concurrency models. For none of these, the debugger needed to be changed. Furthermore, we visualize all concurrent interactions independently of a specific concurrency model. To show that tooling for a specific concurrency model is possible, we visualize actor turns and message sends separately.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Ungar:2017:DAO, author = "David Ungar and David Grove and Hubertus Franke", title = "Dynamic atomicity: optimizing swift memory management", journal = j-SIGPLAN, volume = "52", number = "11", pages = "15--26", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133843", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Swift is a modern multi-paradigm programming language with an extensive developer community and open source ecosystem. Swift 3's memory management strategy is based on Automatic Reference Counting (ARC) augmented with unsafe APIs for manually-managed memory. We have seen ARC consume as much as 80\% of program execution time. A significant portion of ARC's direct performance cost can be attributed to its use of atomic machine instructions to protect reference count updates from data races. Consequently, we have designed and implemented dynamic atomicity, an optimization which safely replaces atomic reference-counting operations with nonatomic ones where feasible. The optimization introduces a store barrier to detect possibly intra-thread references, compiler-generated recursive reference-tracers to find all affected objects, and a bit of state in each reference count to encode its atomicity requirements. Using a suite of 171 microbenchmarks, 9 programs from the Computer Language Benchmarks Game, and the Richards benchmark, we performed a limit study by unsafely making all reference counting operations nonatomic. We measured potential speedups of up to 220\% on the microbenchmarks, 120\% on the Benchmarks Game and 70\% on Richards. By automatically reducing ARC overhead, our optimization both improves Swift 3's performance and reduces the temptation for performance-oriented programmers to resort to unsafe manual memory management. Furthermore, the machinery implemented for dynamic atomicity could also be employed to obtain cheaper thread-safe Swift data structures, or to augment ARC with optional cycle detection or a backup tracing garbage collector.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Infante:2017:OER, author = "Alejandro Infante and Alexandre Bergel", title = "Object equivalence: revisiting object equality profiling (an experience report)", journal = j-SIGPLAN, volume = "52", number = "11", pages = "27--38", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133844", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern object-oriented programming languages greatly alleviate the memory management for programmers. Despite the efficiency of garbage collection and Just-In-Time program analyzes, memory still remains prone to be wasted. A bloated memory may have severe consequences, including frequent execution lags due to a high pressure on the garbage collector and suboptimal object dependencies. We found that dynamically monitoring object production sites and the equivalence of the produced objects is key to identify wasted memory consumption caused by redundant objects. We implemented optimizations for reducing the memory consumption of six applications, achieving a reduction over 40\% in half of the applications without having any prior knowledge of these applications. Our results partially replicate the results obtained by Marinov and O'Callahan and explore new ways to identify redundant objects.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Pimas:2017:GCE, author = "Javier Pim{\'a}s and Javier Burroni and Jean Baptiste Arnaud and Stefan Marr", title = "Garbage collection and efficiency in dynamic metacircular runtimes: an experience report", journal = j-SIGPLAN, volume = "52", number = "11", pages = "39--50", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133845", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "In dynamic object-oriented languages, low-level mechanisms such as just-in-time compilation, object allocation, garbage collection (GC) and method dispatch are often handled by virtual machines (VMs). VMs are typically implemented using static languages, allowing only few changes at run time. In such systems, the VM is not part of the language and interfaces to memory management or method dispatch are fixed, not allowing for arbitrary adaptation. Furthermore, the implementation can typically not be inspected or debugged with standard tools used to work on application code. This paper reports on our experience building Bee, a dynamic Smalltalk runtime, written in Smalltalk. Bee is a Dynamic Metacircular Runtime (DMR) and seamlessly integrates the VM into the application and thereby overcomes many restrictions of classic VMs, for instance by allowing arbitrary code modifications of the VM at run time. Furthermore, the approach enables developers to use their standard tools for application code also for the VM, allowing them to inspect, debug, understand, and modify a DMR seamlessly. We detail our experience of implementing GC, compilation, and optimizations in a DMR. We discuss examples where we found that DMRs can improve understanding of the system, provide tighter control of the software stack, and facilitate research. We also show that the Bee DMR matches and surpass the performance of a widely used Smalltalk VM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Loring:2017:SAJ, author = "Matthew C. Loring and Mark Marron and Daan Leijen", title = "Semantics of asynchronous {JavaScript}", journal = j-SIGPLAN, volume = "52", number = "11", pages = "51--62", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133846", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript code running in the Node.js runtime is a major platform for developers building cloud, mobile, or IoT applications. A fundamental concept in Node.js programming is the use of asynchronous callbacks and event loops to provide highly responsive applications. While conceptually simple, this programming model contains numerous subtleties and behaviors that are defined implicitly by the current Node.js implementation. This paper presents the first comprehensive formalization of the Node.js asynchronous execution model and defines a high-level notion of async-contexts to formalize fundamental relationships between asynchronous executions in an application. These formalizations provide a foundation for the construction of static or dynamic program analysis tools, support the exploration of alternative Node.js event loop implementations, and provide a high-level conceptual framework for reasoning about relationships between the execution of asynchronous callbacks in a Node.js application.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Vergu:2017:SNR, author = "Vlad Vergu and Michiel Haisma and Eelco Visser", title = "The semantics of name resolution in {Grace}", journal = j-SIGPLAN, volume = "52", number = "11", pages = "63--74", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133847", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Grace is a dynamic object oriented programming language designed to aid programming education. We present a formal model of and give an operational semantics for its object model and name resolution algorithm. Our main contributions are a systematic model of Grace's name resolution using scope graphs, relating linguistic features to other languages, and an operationalization of this model in the form of an operational semantics which is readable and executable. The semantics are extensively tested against a reference Grace implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Soldevila:2017:DLF, author = "Mallku Soldevila and Beta Ziliani and Bruno Silvestre and Daniel Fridlender and Fabio Mascarenhas", title = "Decoding {Lua}: formal semantics for the developer and the semanticist", journal = j-SIGPLAN, volume = "52", number = "11", pages = "75--86", month = nov, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170472.3133848", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We provide formal semantics for a large subset of the Lua programming language, in its version 5.2. We validate our model by mechanizing it and testing it against the test suite of the reference interpreter of Lua, obtaining evidence that our model accurately represents the language. We target both a PL semanticist --- not necessarily versed in Lua --- , and a Lua developer --- not necessarily versed in semantic frameworks. To the former, we present the peculiarities of the language, and how we model them in a modular small-step operational semantics, using concepts from Felleisen-Hieb's reduction semantics with evaluation contexts. Moreover, we mechanize and test the model in PLT Redex, the de facto tool for reduction semantics. To the reader unfamiliar with such concepts, we provide a gentle introduction to the model. It is our hope that developers of the different Lua implementations and dialects understand the model and consider it both for testing their work and for experimenting with new language features.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "DLS '17 conference proceedings.", } @Article{Dig:2017:LRR, author = "Danny Dig", title = "The landscape of refactoring research in the last decade (keynote)", journal = j-SIGPLAN, volume = "52", number = "12", pages = "1--1", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3148040", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the last decade refactoring research has seen an exponential growth. I will attempt to map this vast landscape and the advances that the community has made by answering questions such as who does what, when, where, with who, why, and how. I will muse on some of the factors contributing to the growth of the field, the adoption of research into industry, and the lessons that we learned along this journey. This will inspire and equip you so that you can make a difference, with people who make a difference, at a time when it makes a difference.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Ge:2017:RSM, author = "Rui Ge and Ronald Garcia", title = "Refining semantics for multi-stage programming", journal = j-SIGPLAN, volume = "52", number = "12", pages = "2--14", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136047", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The multi-stage programming paradigm supports runtime code generation and execution. Though powerful, its potential is impeded by the lack of static analysis support. Van Horn and Might proposed a general-purpose approach to systematically develop static analyses by transforming an environmental abstract machine, which evolves a control string, an environment and a continuation as a program evaluates. To the best of our knowledge, no such semantics exists for a multi-stage language like MetaML. We develop and prove correct an environmental abstract machine semantics for MetaML by gradually refining the reference substitutional structural operational semantics. Highlights of our approach include leveraging explicit substitutions to bridge the gap between substitutional and environmental semantics, and devising meta-environments to model the complexities of variable bindings in multi-stage environmental semantics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Ofenbeck:2017:SGP, author = "Georg Ofenbeck and Tiark Rompf and Markus P{\"u}schel", title = "Staging for generic programming in space and time", journal = j-SIGPLAN, volume = "52", number = "12", pages = "15--28", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136060", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Metaprogramming is among the most promising candidates to solve the abstraction vs performance trade-off that plagues software engineering through specialization. Metaprogramming has been used to enable low-overhead generic programming for a long time, with C++ templates being one of the most prominent examples. But often a single, fixed pattern of specialization is not enough, and more flexibility is needed. Hence, this paper seeks to apply generic programming techniques to challenges in metaprogramming, in particular to abstract over the execution stage of individual program expressions. We thus extend the scope of generic programming into the dimension of time. The resulting notion of stage polymorphism enables novel abstractions in the design of program generators, which we develop and explore in this paper. We present one possible implementation, in Scala using the lightweight modular staging (LMS) framework, and apply it to two important case studies: convolution on images and the fast Fourier transform (FFT).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Oishi:2017:SCT, author = "Junpei Oishi and Yukiyoshi Kameyama", title = "Staging with control: type-safe multi-stage programming with control operators", journal = j-SIGPLAN, volume = "52", number = "12", pages = "29--40", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136049", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Staging allows a programmer to write domain-specific, custom code generators. Ideally, a programming language for staging provides all necessary features for staging, and at the same time, gives static guarantee for the safety properties of generated code including well typedness and well scopedness. We address this classic problem for the language with control operators, which allow code optimizations in a modular and compact way. Specifically, we design a staged programming language with the expressive control operators shift0 and reset0, which let us express, for instance, multi-layer let-insertion, while keeping the static guarantee of well typedness and well scopedness. For this purpose, we extend our earlier work on refined environment classifiers which were introduced for the staging language with state. We show that our language is expressive enough to express interesting code generation techniques, and that the type system enjoys type soundness. We also mention a type inference algorithm for our language under reasonable restriction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Courtes:2017:CSG, author = "Ludovic Court{\`e}s", title = "Code staging in {GNU Guix}", journal = j-SIGPLAN, volume = "52", number = "12", pages = "41--48", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136045", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "GNU Guix is a ``functional'' package manager that borrows from earlier work on Nix by Dolstra et al.. Guix implements high-level abstractions such as packages and operating system services as domain-specific languages (DSL) embedded in Scheme, and it also implements build actions and operating system orchestration in Scheme. This leads to a multi-tier programming environment where embedded code snippets are staged for eventual execution. In this paper we present G-expressions or ``gexps''. We explain our journey from traditional Lisp S-expressions to G-expressions, which augment the former with contextual information, and we discuss the implementation of gexps. We report on our experience using gexps in a variety of operating system use cases --- from package build processes to system services. Gexps provide a novel way to cover many aspects of OS configuration in a single, multi-tier language while facilitating code reuse and code sharing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Linsbauer:2017:CVC, author = "Lukas Linsbauer and Thorsten Berger and Paul Gr{\"u}nbacher", title = "A classification of variation control systems", journal = j-SIGPLAN, volume = "52", number = "12", pages = "49--62", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136054", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Version control systems are an integral part of today's software and systems development processes. They facilitate the management of revisions (sequential versions) and variants (concurrent versions) of a system under development and enable collaboration between developers. Revisions are commonly maintained either per file or for the whole system. Variants are supported via branching or forking mechanisms that conceptually clone the whole system under development. It is known that such cloning practices come with disadvantages. In fact, while short-lived branches for isolated development of new functionality (a.k.a. feature branches) are well supported, dealing with long-term and fine-grained system variants currently requires employing additional mechanisms, such as preprocessors, build systems or custom configuration tools. Interestingly, the literature describes a number of variation control systems, which provide a richer set of capabilities for handling fine-grained system variants compared to the version control systems widely used today. In this paper we present a classification and comparison of selected variation control systems to get an understanding of their capabilities and the advantages they can offer. We discuss problems of variation control systems, which may explain their comparably low popularity. We also propose research activities we regard as important to change this situation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Lapena:2017:AIN, author = "Ra{\'u}l Lape{\~n}a and Jaime Font and {\'O}scar Pastor and Carlos Cetina", title = "Analyzing the impact of natural language processing over feature location in models", journal = j-SIGPLAN, volume = "52", number = "12", pages = "63--76", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136052", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Feature Location (FL) is a common task in the Software Engineering field, specially in maintenance and evolution of software products. The results of FL depend in a great manner in the style in which Feature Descriptions and software artifacts are written. Therefore, Natural Language Processing (NLP) techniques are used to process them. Through this paper, we analyze the influence of the most common NLP techniques over FL in Conceptual Models through Latent Semantic Indexing, and the influence of human participation when embedding domain knowledge in the process. We evaluated the techniques in a real-world industrial case study in the rolling stocks domain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Fenske:2017:HPA, author = "Wolfram Fenske and Sandro Schulze and Gunter Saake", title = "How preprocessor annotations (do not) affect maintainability: a case study on change-proneness", journal = j-SIGPLAN, volume = "52", number = "12", pages = "77--90", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136059", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Preprocessor annotations (e.g., \#ifdef in C) enable the development of similar, but distinct software variants from a common code base. One particularly popular preprocessor is the C preprocessor, cpp. But the cpp is also widely criticized for impeding software maintenance by making code hard to understand and change. Yet, evidence to support this criticism is scarce. In this paper, we investigate the relation between cpp usage and maintenance effort, which we approximate with the frequency and extent of source code changes. To this end, we mined the version control repositories of eight open-source systems written in C. For each system, we measured if and how individual functions use cpp annotations and how they were changed. We found that functions containing cpp annotations are generally changed more frequently and more profoundly than other functions. However, when accounting for function size, the differences disappear or are greatly diminished. In summary, with respect to the frequency and extent of changes, our findings do not support the criticism of the cpp regarding maintainability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Carlson:2017:TQC, author = "Travis Carlson and Eric {Van Wyk}", title = "Type qualifiers as composable language extensions", journal = j-SIGPLAN, volume = "52", number = "12", pages = "91--103", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136055", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper reformulates type qualifiers as language extensions that can be automatically and reliably composed. Type qualifiers annotate type expressions to introduce new subtyping relations and are powerful enough to detect many kinds of errors. Type qualifiers, as illustrated in our ableC extensible language framework for C, can introduce rich forms of concrete syntax, can generate dynamic checks on data when static checks are infeasible or not appropriate, and inject code that affects the program's behavior, for example for conversions of data or logging. ableC language extensions to C are implemented as attribute grammar fragments and provide an expressive mechanism for type qualifier implementations to check for additional errors, e.g. dereferences to pointers not qualified by a ``nonnull'' qualifier, and report custom error messages. Our approach distinguishes language extension users from developers and provides modular analyses to developers to ensure that when users select a set of extensions to use, they will automatically compose to form a working compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Rosa:2017:ARC, author = "Andrea Ros{\`a} and Eduardo Rosales and Walter Binder", title = "Accurate reification of complete supertype information for dynamic analysis on the {JVM}", journal = j-SIGPLAN, volume = "52", number = "12", pages = "104--116", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136061", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Reflective supertype information (RSI) is useful for many instrumentation-based dynamic analyses on the Java Virtual Machine (JVM). On the one hand, while such information can be obtained when performing the instrumentation within the same JVM process executing the instrumented program, in-process instrumentation severely limits the code coverage of the analysis. On the other hand, performing the instrumentation in a separate process can achieve full code coverage, but complete RSI is generally not available, often requiring expensive runtime checks in the instrumented program. Providing accurate and complete RSI in the instrumentation process is challenging because of dynamic class loading and classloader namespaces. In this paper, we present a novel technique to accurately reify complete RSI in a separate instrumentation process. We implement our technique in the dynamic analysis framework DiSL and evaluate it on a task profiler, achieving speedups of up to 45\% for an analysis with full code coverage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Pearce:2017:RSC, author = "David J. Pearce", title = "Rewriting for sound and complete union, intersection and negation types", journal = j-SIGPLAN, volume = "52", number = "12", pages = "117--130", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136042", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Implementing the type system of a programming language is a critical task that is often done in an ad-hoc fashion. Whilst this makes it hard to ensure the system is sound, it also makes it difficult to extend as the language evolves. We are interested in describing type systems using declarative rewrite rules from which an implementation can be automatically generated. Whilst not all type systems are easily expressed in this manner, those involving unions, intersections and negations are well-suited for this. In this paper, we consider a relatively complex type system involving unions, intersections and negations developed previously. This system was not developed with rewriting in mind, though clear parallels are immediately apparent from the original presentation. For example, the system presented required types be first converted into a variation on Disjunctive Normal Form. We identify that the original system can, for the most part, be reworked to enable a natural expression using declarative rewrite rules. We present an implementation of our rewrite rules in the Whiley Rewrite Language (WyRL), and report performance results compared with a hand-coded solution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Parreaux:2017:QSR, author = "Lionel Parreaux and Amir Shaikhha and Christoph E. Koch", title = "Quoted staged rewriting: a practical approach to library-defined optimizations", journal = j-SIGPLAN, volume = "52", number = "12", pages = "131--145", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136043", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Staging has proved a successful technique for programmatically removing code abstractions, thereby allowing for faster program execution while retaining a high-level interface for the programmer. Unfortunately, techniques based on staging suffer from a number of problems --- ranging from practicalities to fundamental limitations --- which have prevented their widespread adoption. We introduce Quoted Staged Rewriting (QSR), an approach that uses type-safe, pattern matching-enabled quasiquotes to define optimizations. The approach is ``staged'' in two ways: first, rewrite rules can execute arbitrary code during pattern matching and code reconstruction, leveraging the power and flexibility of staging; second, library designers can orchestrate the application of successive rewriting phases (stages). The advantages of using quasiquote-based rewriting are that library designers never have to deal directly with the intermediate representation (IR), and that it allows for non-intrusive optimizations --- in contrast with staging, it is not necessary to adapt the entire library and user programs to accommodate optimizations. We show how Squid, a Scala macro-based framework, enables QSR and renders library-defined optimizations more practical than ever before: library designers write domain-specific optimizers that users invoke transparently on delimited portions of their code base. As a motivating example we describe an implementation of stream fusion (a well-known deforestation technique) that is both simpler and more powerful than the state of the art, and can readily be used by Scala programmers with no knowledge of metaprogramming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Caldwell:2017:RCC, author = "Joseph Caldwell and Shigeru Chiba", title = "Reducing calling convention overhead in object-oriented programming on embedded {ARM Thumb-2} platforms", journal = j-SIGPLAN, volume = "52", number = "12", pages = "146--156", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136057", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper examines the causes and extent of code size overhead caused by the ARM calling convention in Thumb-2 binaries. We show that binaries generated from C++ source files generally have higher amounts of calling convention overhead, and present a binary file optimizer to eliminate some of that overhead. Calling convention overhead can negatively impact power consumption, flash memory costs, and chip size in embedded or otherwise resource-constrained domains. This is particularly true on platforms using ``compressed'' instruction sets, such as the 16-bit ARM Thumb and Thumb-2 instruction sets, used in virtually all smartphones and in many other smaller-scale embedded devices. In this paper, we examine the extent of calling convention overhead in practical software, and compare the results of C and C++ programs, and find that C++ programs generally have a higher percentage of calling-convention overhead. Finally, we demonstrate a tool capable of eliminating some of this overhead, particularly in the case of C++ programs, by modifying the calling conventions on a per-procedure basis.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Perard-Gayot:2017:RSE, author = "Ars{\`e}ne P{\'e}rard-Gayot and Martin Weier and Richard Membarth and Philipp Slusallek and Roland Lei{\ss}a and Sebastian Hack", title = "{RaTrace}: simple and efficient abstractions for {BVH} ray traversal algorithms", journal = j-SIGPLAN, volume = "52", number = "12", pages = "157--168", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136044", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In order to achieve the highest possible performance, the ray traversal and intersection routines at the core of every high-performance ray tracer are usually hand-coded, heavily optimized, and implemented separately for each hardware platform-even though they share most of their algorithmic core. The results are implementations that heavily mix algorithmic aspects with hardware and implementation details, making the code non-portable and difficult to change and maintain. In this paper, we present a new approach that offers the ability to define in a functional language a set of conceptual, high-level language abstractions that are optimized away by a special compiler in order to maximize performance. Using this abstraction mechanism we separate a generic ray traversal and intersection algorithm from its low-level aspects that are specific to the target hardware. We demonstrate that our code is not only significantly more flexible, simpler to write, and more concise but also that the compiled results perform as well as state-of-the-art implementations on any of the tested CPU and GPU platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Susungi:2017:TCG, author = "Adilla Susungi and Norman A. Rink and Jer{\'o}nimo Castrill{\'o}n and Immo Huismann and Albert Cohen and Claude Tadonki and J{\"o}rg Stiller and Jochen Fr{\"o}hlich", title = "Towards compositional and generative tensor optimizations", journal = j-SIGPLAN, volume = "52", number = "12", pages = "169--175", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136050", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many numerical algorithms are naturally expressed as operations on tensors (i.e. multi-dimensional arrays). Hence, tensor expressions occur in a wide range of application domains, e.g. quantum chemistry and physics; big data analysis and machine learning; and computational fluid dynamics. Each domain, typically, has developed its own strategies for efficiently generating optimized code, supported by tools such as domain-specific languages, compilers, and libraries. However, strategies and tools are rarely portable between domains, and generic solutions typically act as ''black boxes'' that offer little control over code generation and optimization. As a consequence, there are application domains without adequate support for easily generating optimized code, e.g. computational fluid dynamics. In this paper we propose a generic and easily extensible intermediate language for expressing tensor computations and code transformations in a modular and generative fashion. Beyond being an intermediate language, our solution also offers meta-programming capabilities for experts in code optimization. While applications from the domain of computational fluid dynamics serve to illustrate our proposed solution, we believe that our general approach can help unify research in tensor optimizations and make solutions more portable between domains.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Al-Kofahi:2017:FLL, author = "Jafar M. Al-Kofahi and Suresh Kothari and Christian K{\"a}stner", title = "Four languages and lots of macros: analyzing autotools build systems", journal = j-SIGPLAN, volume = "52", number = "12", pages = "176--186", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136051", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Build systems are crucial for software system development, however there is a lack of tool support to help with their high maintenance overhead. GNU Autotools are widely used in the open source community, but users face various challenges from its hard to comprehend nature and staging of multiple code generation steps, often leading to low quality and error-prone build code. In this paper, we present a platform, AutoHaven, to provide a foundation for developers to create analysis tools to help them understand, maintain, and migrate their GNU Autotools build systems. Internally it uses approximate parsing and symbolic analysis of the build logic. We illustrate the use of the platform with two tools: ACSense helps developers to better understand their build systems and ACSniff detects build smells to improve build code quality. Our evaluation shows that AutoHaven can support most GNU Autotools build systems and can detect build smells in the wild.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Fernandes:2017:AUM, author = "Leonardo Fernandes and M{\'a}rcio Ribeiro and Luiz Carvalho and Rohit Gheyi and Melina Mongiovi and Andr{\'e} Santos and Ana Cavalcanti and Fabiano Ferrari and Jos{\'e} Carlos Maldonado", title = "Avoiding useless mutants", journal = j-SIGPLAN, volume = "52", number = "12", pages = "187--198", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136053", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mutation testing is a program-transformation technique that injects artificial bugs to check whether the existing test suite can detect them. However, the costs of using mutation testing are usually high, hindering its use in industry. Useless mutants (equivalent and duplicated) contribute to increase costs. Previous research has focused mainly on detecting useless mutants only after they are generated and compiled. In this paper, we introduce a strategy to help developers with deriving rules to avoid the generation of useless mutants. To use our strategy, we pass as input a set of programs. For each program, we also need a passing test suite and a set of mutants. As output, our strategy yields a set of useless mutants candidates. After manually confirming that the mutants classified by our strategy as ``useless'' are indeed useless, we derive rules that can avoid their generation and thus decrease costs. To the best of our knowledge, we introduce 37 new rules that can avoid useless mutants right before their generation. We then implement a subset of these rules in the MUJAVA mutation testing tool. Since our rules have been derived based on artificial and small Java programs, we take our MUJAVA version embedded with our rules and execute it in industrial-scale projects. Our rules reduced the number of mutants by almost 13\% on average. Our results are promising because (i) we avoid useless mutants generation; (ii) our strategy can help with identifying more rules in case we set it to use more complex Java programs; and (iii) our MUJAVA version has only a subset of the rules we derived.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Nakamaru:2017:SFA, author = "Tomoki Nakamaru and Kazuhiro Ichikawa and Tetsuro Yamazaki and Shigeru Chiba", title = "{Silverchain}: a fluent {API} generator", journal = j-SIGPLAN, volume = "52", number = "12", pages = "199--211", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136041", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a tool named Silverchain, which generates class definitions for a fluent API from the grammar of the API. A fluent API is an API that is used by method chaining and its grammar is a BNF-like set of rules that defines method chains accepted in type checking. Fluent APIs generated by Silverchain provide two styles of APIs: One is for building a chain by concatenating all method calls in series. The other is for building a chain from partial chains by passing child chains to method calls in the parent chain as their arguments. To generate such a fluent API, Silverchain first translates given grammar into a set of deterministic pushdown automata without {\~N}-transitions, then encodes these automata into class definitions. Each constructed automata corresponds to a nonterminal in given grammar and recognizes symbol sequences produced from its corresponding nonterminal.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Zaytsev:2017:PGE, author = "Vadim Zaytsev", title = "Parser generation by example for legacy pattern languages", journal = j-SIGPLAN, volume = "52", number = "12", pages = "212--218", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136058", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Most modern software languages enjoy relatively free and relaxed concrete syntax, with significant flexibility of formatting of the program/model/sheet text. Yet, in the dark legacy corners of software engineering there are still languages with a strict fixed column-based structure --- the compromises of times long gone, attempting to combine some human readability with some ease of machine processing. In this paper, we consider an industrial case study for retirement of a legacy domain-specific language, completed under extreme circumstances: absolute lack of documentation, varying line structure, hierarchical blocks within one file, scalability demands for millions of lines of code, performance demands for manipulating tens of thousands multi-megabyte files, etc. However, the regularity of the language allowed to infer its structure from the available examples, automatically, and produce highly efficient parsers for it.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Mainland:2017:HCS, author = "Geoffrey Mainland and Jeremy Johnson", title = "A {Haskell} compiler for signal transforms", journal = j-SIGPLAN, volume = "52", number = "12", pages = "219--232", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136056", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Building a reusable, auto-tuning code generator from scratch is a challenging problem, requiring many careful design choices. We describe HSpiral, a Haskell compiler for signal transforms that builds on the foundational work of Spiral. Our design leverages many Haskell language features to ensure that our framework is reusable, flexible, and efficient. As well as describing the design of our system, we show how to extend it to support new classes of transforms, including the number-theoretic transform and a variant of the split-radix algorithm that results in reduced operation counts. We also show how to incorporate rewrite rules into our system to reproduce results from previous literature on code generation for the fast Fourier transform. Although the Spiral project demonstrated significant advances in automatic code generation, it has not been widely used by other researchers. HSpiral is freely available under an MIT-style license, and we are actively working to turn it into a tool to further both our own research goals and to serve as a foundation for other research groups' work in developing new implementations of signal transform algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Martini:2017:AGV, author = "Ricardo Giuliani Martini and Pedro Rangel Henriques", title = "Automatic generation of virtual learning spaces driven by {CaVa DSL}: an experience report", journal = j-SIGPLAN, volume = "52", number = "12", pages = "233--245", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136046", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Several applications are based on Domain-Specific Languages (DSL). They provide the right terminology to a peculiar problem/subject, because they use a particular domain vocabulary that defines abstract concepts, different from general-purpose languages. Aiming an easy generation of virtual Learning Spaces (LS) for the use of the responsible of institutional archives or museums, we have idealized and developed an external domain-specific language, called CaVa DSL, to describe, in an abstract level, virtual exhibition rooms in the museum curator's viewpoint, giving the curator the possibility to specify the virtual LS upon a domain ontology vocabulary. We also contribute with a set of processors that deal with CaVa DSL and generates virtual Learning Spaces, turning available the navigation over important and real information contained in archival documents to the public through virtual museums. To demonstrate the obtained results, we present a running example along the paper showing the virtual LS generation process.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Grebe:2017:RSD, author = "Mark Grebe and David Young and Andy Gill", title = "Rewriting a shallow {DSL} using a {GHC} compiler extension", journal = j-SIGPLAN, volume = "52", number = "12", pages = "246--258", month = dec, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3170492.3136048", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Embedded Domain Specific Languages are a powerful tool for developing customized languages to fit specific problem domains. Shallow EDSLs allow a programmer to program using many of the features of a host language and its syntax, but sacrifice performance. Deep EDSLs provide better performance and flexibility, through the ability to manipulate the abstract syntax tree of the DSL program, but sacrifice syntactical similarity to the host language. Using Haskino, an EDSL designed for small embedded systems based on the Arduino line of microcontrollers, and a compiler plugin for the Haskell GHC compiler, we show a method for combining the best aspects of shallow and deep EDSLs. The programmer is able to write in the shallow EDSL, and have it automatically transformed into the deep EDSL. This allows the EDSL user to benefit from powerful aspects of the host language, Haskell, while meeting the demanding resource constraints of the small embedded processing environment.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "GPCE '17 conference proceedings.", } @Article{Wen:2018:IBM, author = "Haosen Wen and Joseph Izraelevitz and Wentao Cai and H. Alan Beadle and Michael L. Scott", title = "Interval-based memory reclamation", journal = j-SIGPLAN, volume = "53", number = "1", pages = "1--13", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178488", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper we present interval-based reclamation (IBR), a new approach to safe reclamation of disconnected memory blocks in nonblocking concurrent data structures. Safe reclamation is a difficult problem: a thread, before freeing a block, must ensure that no other threads are accessing that block; the required synchronization tends to be expensive. In contrast with epoch-based reclamation, in which threads reserve all blocks created after a certain time, or pointer-based reclamation (e.g., hazard pointers), in which threads reserve individual blocks, IBR allows a thread to reserve all blocks known to have existed in a bounded interval of time. By comparing a thread's reserved interval with the lifetime of a detached but not yet reclaimed block, the system can determine if the block is safe to free. Like hazard pointers, IBR avoids the possibility that a single stalled thread may reserve an unbounded number of blocks; unlike hazard pointers, it avoids a memory fence on most pointer-following operations. It also avoids the need to explicitly ``unreserve'' a no-longer-needed pointer. We describe three specific IBR schemes (one with several variants) that trade off performance, applicability, and space requirements. IBR requires no special hardware or OS support. In experiments with data structure microbenchmarks, it also compares favorably (in both time and space) to other state-of-the-art approaches, making it an attractive alternative for libraries of concurrent data structures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Arbel-Raviv:2018:HEB, author = "Maya Arbel-Raviv and Trevor Brown", title = "Harnessing epoch-based reclamation for efficient range queries", journal = j-SIGPLAN, volume = "53", number = "1", pages = "14--27", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178489", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent sets with range query operations are highly desirable in applications such as in-memory databases. However, few set implementations offer range queries. Known techniques for augmenting data structures with range queries (or operations that can be used to build range queries) have numerous problems that limit their usefulness. For example, they impose high overhead or rely heavily on garbage collection. In this work, we show how to augment data structures with highly efficient range queries, without relying on garbage collection. We identify a property of epoch-based memory reclamation algorithms that makes them ideal for implementing range queries, and produce three algorithms, which use locks, transactional memory and lock-free techniques, respectively. Our algorithms are applicable to more data structures than previous work, and are shown to be highly efficient on a large scale Intel system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Friedman:2018:PLF, author = "Michal Friedman and Maurice Herlihy and Virendra Marathe and Erez Petrank", title = "A persistent lock-free queue for non-volatile memory", journal = j-SIGPLAN, volume = "53", number = "1", pages = "28--40", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178490", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-volatile memory is expected to coexist with (or even displace) volatile DRAM for main memory in upcoming architectures. This has led to increasing interest in the problem of designing and specifying durable data structures that can recover from system crashes. Data structures may be designed to satisfy stricter or weaker durability guarantees to provide a balance between the strength of the provided guarantees and performance overhead. This paper proposes three novel implementations of a concurrent lock-free queue. These implementations illustrate algorithmic challenges in building persistent lock-free data structures with different levels of durability guarantees. In presenting these challenges, the proposed algorithmic designs, and the different durability guarantees, we hope to shed light on ways to build a wide variety of durable data structures. We implemented the various designs and compared their performance overhead to a simple queue design for standard (volatile) memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Wang:2018:SDG, author = "Linnan Wang and Jinmian Ye and Yiyang Zhao and Wei Wu and Ang Li and Shuaiwen Leon Song and Zenglin Xu and Tim Kraska", title = "Superneurons: dynamic {GPU} memory management for training deep neural networks", journal = j-SIGPLAN, volume = "53", number = "1", pages = "41--53", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178491", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Going deeper and wider in neural architectures improves their accuracy, while the limited GPU DRAM places an undesired restriction on the network design domain. Deep Learning (DL) practitioners either need to change to less desired network architectures, or nontrivially dissect a network across multiGPUs. These distract DL practitioners from concentrating on their original machine learning tasks. We present SuperNeurons: a dynamic GPU memory scheduling runtime to enable the network training far beyond the GPU DRAM capacity. SuperNeurons features 3 memory optimizations, Liveness Analysis, Unified Tensor Pool, and Cost-Aware Recomputation; together they effectively reduce the network-wide peak memory usage down to the maximal memory usage among layers. We also address the performance issues in these memory-saving techniques. Given the limited GPU DRAM, SuperNeurons not only provisions the necessary memory for the training, but also dynamically allocates the memory for convolution workspaces to achieve the high performance. Evaluations against Caffe, Torch, MXNet and TensorFlow have demonstrated that SuperNeurons trains at least 3.2432 deeper network than current ones with the leading performance. Particularly, SuperNeurons can train ResNet2500 that has 10$^4$ basic network layers on a 12GB K40c.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Belviranli:2018:JDA, author = "Mehmet E. Belviranli and Seyong Lee and Jeffrey S. Vetter and Laxmi N. Bhuyan", title = "{Juggler}: a dependence-aware task-based execution framework for {GPUs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "54--67", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178492", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scientific applications with single instruction, multiple data (SIMD) computations show considerable performance improvements when run on today's graphics processing units (GPUs). However, the existence of data dependences across thread blocks may significantly impact the speedup by requiring global synchronization across multiprocessors (SMs) inside the GPU. To efficiently run applications with interblock data dependences, we need fine-granular task-based execution models that will treat SMs inside a GPU as stand-alone parallel processing units. Such a scheme will enable faster execution by utilizing all internal computation elements inside the GPU and eliminating unnecessary waits during device-wide global barriers. In this paper, we propose Juggler, a task-based execution scheme for GPU workloads with data dependences. The Juggler framework takes applications embedding OpenMP 4.5 tasks as input and executes them on the GPU via an efficient in-device runtime, hence eliminating the need for kernel-wide global synchronization. Juggler requires no or little modification to the source code, and once launched, the runtime entirely runs on the GPU without relying on the host through the entire execution. We have evaluated Juggler on an NVIDIA Tesla P100 GPU and obtained up to 31\% performance improvement against global barrier based implementation, with minimal runtime overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Kotsifakou:2018:HHP, author = "Maria Kotsifakou and Prakalp Srivastava and Matthew D. Sinclair and Rakesh Komuravelli and Vikram Adve and Sarita Adve", title = "{HPVM}: heterogeneous parallel virtual machine", journal = j-SIGPLAN, volume = "53", number = "1", pages = "68--80", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178493", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We propose a parallel program representation for heterogeneous systems, designed to enable performance portability across a wide range of popular parallel hardware, including GPUs, vector instruction sets, multicore CPUs and potentially FPGAs. Our representation, which we call HPVM, is a hierarchical dataflow graph with shared memory and vector instructions. HPVM supports three important capabilities for programming heterogeneous systems: a compiler intermediate representation (IR), a virtual instruction set (ISA), and a basis for runtime scheduling; previous systems focus on only one of these capabilities. As a compiler IR, HPVM aims to enable effective code generation and optimization for heterogeneous systems. As a virtual ISA, it can be used to ship executable programs, in order to achieve both functional portability and performance portability across such systems. At runtime, HPVM enables flexible scheduling policies, both through the graph structure and the ability to compile individual nodes in a program to any of the target devices on a system. We have implemented a prototype HPVM system, defining the HPVM IR as an extension of the LLVM compiler IR, compiler optimizations that operate directly on HPVM graphs, and code generators that translate the virtual ISA to NVIDIA GPUs, Intel's AVX vector units, and to multicore X86-64 processors. Experimental results show that HPVM optimizations achieve significant performance improvements, HPVM translators achieve performance competitive with manually developed OpenCL code for both GPUs and vector hardware, and that runtime scheduling policies can make use of both program and runtime information to exploit the flexible compilation capabilities. Overall, we conclude that the HPVM representation is a promising basis for achieving performance portability and for implementing parallelizing compilers for heterogeneous parallel systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Guatto:2018:HMM, author = "Adrien Guatto and Sam Westrick and Ram Raghunathan and Umut Acar and Matthew Fluet", title = "Hierarchical memory management for mutable state", journal = j-SIGPLAN, volume = "53", number = "1", pages = "81--93", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178494", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is well known that modern functional programming languages are naturally amenable to parallel programming. Achieving efficient parallelism using functional languages, however, remains difficult. Perhaps the most important reason for this is their lack of support for efficient in-place updates, i.e., mutation, which is important for the implementation of both parallel algorithms and the run-time system services (e.g., schedulers and synchronization primitives) used to execute them. In this paper, we propose techniques for efficient mutation in parallel functional languages. To this end, we couple the memory manager with the thread scheduler to make reading and updating data allocated by nested threads efficient. We describe the key algorithms behind our technique, implement them in the MLton Standard ML compiler, and present an empirical evaluation. Our experiments show that the approach performs well, significantly improving efficiency over existing functional language implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Zhao:2018:BGB, author = "Yue Zhao and Jiajia Li and Chunhua Liao and Xipeng Shen", title = "Bridging the gap between deep learning and sparse matrix format selection", journal = j-SIGPLAN, volume = "53", number = "1", pages = "94--108", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178495", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This work presents a systematic exploration on the promise and special challenges of deep learning for sparse matrix format selection---a problem of determining the best storage format for a matrix to maximize the performance of Sparse Matrix Vector Multiplication (SpMV). It describes how to effectively bridge the gap between deep learning and the special needs of the pillar HPC problem through a set of techniques on matrix representations, deep learning structure, and cross-architecture model migrations. The new solution cuts format selection errors by two thirds, and improves SpMV performance by 1.73X on average over the state of the art.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Jia:2018:ODW, author = "Zhen Jia and Aleksandar Zlateski and Fredo Durand and Kai Li", title = "Optimizing {$N$}-dimensional, {Winograd}-based convolution for manycore {CPUs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "109--123", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178496", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent work on Winograd-based convolution allows for a great reduction of computational complexity, but existing implementations are limited to 2D data and a single kernel size of 3 by 3. They can achieve only slightly better, and often worse performance than better optimized, direct convolution implementations. We propose and implement an algorithm for N-dimensional Winograd-based convolution that allows arbitrary kernel sizes and is optimized for manycore CPUs. Our algorithm achieves high hardware utilization through a series of optimizations. Our experiments show that on modern ConvNets, our optimized implementation, is on average more than 3 x, and sometimes 8 x faster than other state-of-the-art CPU implementations on an Intel Xeon Phi manycore processors. Moreover, our implementation on the Xeon Phi achieves competitive performance for 2D ConvNets and superior performance for 3D ConvNets, compared with the best GPU implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Tang:2018:VLF, author = "Xiongchao Tang and Jidong Zhai and Xuehai Qian and Bingsheng He and Wei Xue and Wenguang Chen", title = "{vSensor}: leveraging fixed-workload snippets of programs for performance variance detection", journal = j-SIGPLAN, volume = "53", number = "1", pages = "124--136", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178497", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance variance becomes increasingly challenging on current large-scale HPC systems. Even using a fixed number of computing nodes, the execution time of several runs can vary significantly. Many parallel programs executing on supercomputers suffer from such variance. Performance variance not only causes unpredictable performance requirement violations, but also makes it unintuitive to understand the program behavior. Despite prior efforts, efficient on-line detection of performance variance remains an open problem. In this paper, we propose vS ensor, a novel approach for light-weight and on-line performance variance detection. The key insight is that, instead of solely relying on an external detector, the source code of a program itself could reveal the runtime performance characteristics. Specifically, many parallel programs contain code snippets that are executed repeatedly with an invariant quantity of work. Based on this observation, we use compiler techniques to automatically identify these fixed-workload snippets and use them as performance variance sensors (v-sensors) that enable effective detection. We evaluate vSensor with a variety of parallel programs on the Tianhe-2 system. Results show that vSensor can effectively detect performance variance on HPC systems. The performance overhead is smaller than 4\% with up to 16,384 processes. In particular, with vSensor, we found a bad node with slow memory that slowed a program's performance by 21\%. As a showcase, we also detected a severe network performance problem that caused a 3.37X slowdown for an HPC kernel program on the Tianhe-2 system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Prokopec:2018:CTC, author = "Aleksandar Prokopec", title = "Cache-tries: concurrent lock-free hash tries with constant-time operations", journal = j-SIGPLAN, volume = "53", number = "1", pages = "137--151", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178498", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent non-blocking hash tries have good cache locality, and horizontally scalable operations. However, operations on most existing concurrent hash tries run in O (log n ) time. In this paper, we show that the concurrent hash trie operations can run in expected constant time. We present a novel lock-free concurrent hash trie design that exerts less pressure on the memory allocator. This hash trie is augmented with a quiescently consistent cache, which permits the basic operations to run in expected O (1) time. We show a statistical analysis for the constant-time bound, which, to the best of our knowledge, is the first such proof for hash tries. We also prove the safety, lock-freedom and linearizability properties. On typical workloads, our implementation demonstrates up to 5X performance improvements with respect to the previous hash trie variants.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Chabbi:2018:FFF, author = "Milind Chabbi and Shasha Wen and Xu Liu", title = "Featherlight on-the-fly false-sharing detection", journal = j-SIGPLAN, volume = "53", number = "1", pages = "152--167", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178499", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Shared-memory parallel programs routinely suffer from false sharing---a performance degradation caused by different threads accessing different variables that reside on the same CPU cacheline and at least one variable is modified. State-of-the-art tools detect false sharing via a heavyweight process of logging memory accesses and feeding the ensuing access traces to an offline cache simulator. We have developed Feather, a lightweight, on-the-fly false-sharing detection tool. Feather achieves low overhead by exploiting two hardware features ubiquitous in commodity CPUs: the performance monitoring units (PMU) and debug registers. Additionally, Feather is a first-of-its-kind tool to detect false sharing in multi-process applications that use shared memory. Feather allowed us to scale false-sharing detection to myriad codes. Feather detected several false-sharing cases in important multi-core and multi-process codes including previous PPoPP artifacts. Eliminating false sharing resulted in dramatic (up to 16x) speedups.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Rawat:2018:ROS, author = "Prashant Singh Rawat and Fabrice Rastello and Aravind Sukumaran-Rajam and Louis-No{\"e}l Pouchet and Atanas Rountev and P. Sadayappan", title = "Register optimizations for stencils on {GPUs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "168--182", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178500", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The recent advent of compute-intensive GPU architecture has allowed application developers to explore high-order 3D stencils for better computational accuracy. A common optimization strategy for such stencils is to expose sufficient data reuse by means such as loop unrolling, with the expectation of register-level reuse. However, the resulting code is often highly constrained by register pressure. While current state-of-the-art register allocators are satisfactory for most applications, they are unable to effectively manage register pressure for such complex high-order stencils, resulting in sub-optimal code with a large number of register spills. In this paper, we develop a statement reordering framework that models stencil computations as a DAG of trees with shared leaves, and adapts an optimal scheduling algorithm for minimizing register usage for expression trees. The effectiveness of the approach is demonstrated through experimental results on a range of stencils extracted from application codes.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Zheng:2018:FPS, author = "Da Zheng and Disa Mhembere and Joshua T. Vogelstein and Carey E. Priebe and Randal Burns", title = "{FlashR}: parallelize and scale {R} for machine learning using {SSDs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "183--194", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178501", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "R is one of the most popular programming languages for statistics and machine learning, but it is slow and unable to scale to large datasets. The general approach for having an efficient algorithm in R is to implement it in C or FORTRAN and provide an R wrapper. FlashR accelerates and scales existing R code by parallelizing a large number of matrix functions in the R base package and scaling them beyond memory capacity with solid-state drives (SSDs). FlashR performs memory hierarchy aware execution to speed up parallelized R code by (i) evaluating matrix operations lazily, (ii) performing all operations in a DAG in a single execution and with only one pass over data to increase the ratio of computation to I/O, (iii) performing two levels of matrix partitioning and reordering computation on matrix partitions to reduce data movement in the memory hierarchy. We evaluate FlashR on various machine learning and statistics algorithms on inputs of up to four billion data points. Despite the huge performance gap between SSDs and RAM, FlashR on SSDs closely tracks the performance of FlashR in memory for many algorithms. The R implementations in FlashR outperforms H$_2$O and Spark MLlib by a factor of 3 --- 20.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Menon:2018:DDC, author = "Harshitha Menon and Kathryn Mohror", title = "{DisCVar}: discovering critical variables using algorithmic differentiation for transient faults", journal = j-SIGPLAN, volume = "53", number = "1", pages = "195--206", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Aggressive technology scaling trends have made the hardware of high performance computing (HPC) systems more susceptible to faults. Some of these faults can lead to silent data corruption (SDC), and represent a serious problem because they alter the HPC simulation results. In this paper, we present a full-coverage, systematic methodology called D isCVar to identify critical variables in HPC applications for protection against SDC. DisCVar uses automatic differentiation (AD) to determine the sensitivity of the simulation output to errors in program variables. We empirically validate our approach in identifying vulnerable variables by comparing the results against a full-coverage code-level fault injection campaign. We find that our DisCVar correctly identifies the variables that are critical to ensure application SDC resilience with a high degree of accuracy compared to the results of the fault injection campaign. Additionally, DisCVar requires only two executions of the target program to generate results, whereas in our experiments we needed to perform millions of executions to get the same information from a fault injection campaign.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Drachsler-Cohen:2018:PCT, author = "Dana Drachsler-Cohen and Martin Vechev and Eran Yahav", title = "Practical concurrent traversals in search trees", journal = j-SIGPLAN, volume = "53", number = "1", pages = "207--218", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178503", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Operations of concurrent objects often employ optimistic concurrency-control schemes that consist of a traversal followed by a validation step. The validation checks if concurrent mutations interfered with the traversal to determine if the operation should proceed or restart. A fundamental challenge is to discover a necessary and sufficient validation check that has to be performed to guarantee correctness. In this paper, we show a necessary and sufficient condition for validating traversals in search trees. The condition relies on a new concept of succinct path snapshots, which are derived from and embedded in the structure of the tree. We leverage the condition to design a general lock-free membership test suitable for any search tree. We then show how to integrate the validation condition in update operations of (non-rebalancing) binary search trees, internal and external, and AVL trees. We experimentally show that our new algorithms outperform existing ones.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Gianinazzi:2018:CAP, author = "Lukas Gianinazzi and Pavel Kalvoda and Alessandro {De Palma} and Maciej Besta and Torsten Hoefler", title = "Communication-avoiding parallel minimum cuts and connected components", journal = j-SIGPLAN, volume = "53", number = "1", pages = "219--232", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178504", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present novel scalable parallel algorithms for finding global minimum cuts and connected components, which are important and fundamental problems in graph processing. To take advantage of future massively parallel architectures, our algorithms are communication-avoiding: they reduce the costs of communication across the network and the cache hierarchy. The fundamental technique underlying our work is the randomized sparsification of a graph: removing a fraction of graph edges, deriving a solution for such a sparsified graph, and using the result to obtain a solution for the original input. We design and implement sparsification with O (1) synchronization steps. Our global minimum cut algorithm decreases communication costs and computation compared to the state-of-the-art, while our connected components algorithm incurs few cache misses and synchronization steps. We validate our approach by evaluating MPI implementations of the algorithms on a petascale supercomputer. We also provide an approximate variant of the minimum cut algorithm and show that it approximates the exact solutions well while using a fraction of cores in a fraction of time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Khyzha:2018:SPT, author = "Artem Khyzha and Hagit Attiya and Alexey Gotsman and Noam Rinetzky", title = "Safe privatization in transactional memory", journal = j-SIGPLAN, volume = "53", number = "1", pages = "233--245", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178505", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactional memory (TM) facilitates the development of concurrent applications by letting the programmer designate certain code blocks as atomic. Programmers using a TM often would like to access the same data both inside and outside transactions, e.g., to improve performance or to support legacy code. In this case, programmers would ideally like the TM to guarantee strong atomicity, where transactions can be viewed as executing atomically also with respect to non-transactional accesses. Since guaranteeing strong atomicity for arbitrary programs is prohibitively expensive, researchers have suggested guaranteeing it only for certain data-race free (DRF) programs, particularly those that follow the privatization idiom: from some point on, threads agree that a given object can be accessed non-transactionally. Supporting privatization safely in a TM is nontrivial, because this often requires correctly inserting transactional fences, which wait until all active transactions complete. Unfortunately, there is currently no consensus on a single definition of transactional DRF, in particular, because no existing notion of DRF takes into account transactional fences. In this paper we propose such a notion and prove that, if a TM satisfies a certain condition generalizing opacity and a program using it is DRF assuming strong atomicity, then the program indeed has strongly atomic semantics. We show that our DRF notion allows the programmer to use privatization idioms. We also propose a method for proving our generalization of opacity and apply it to the TL2 TM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Grossman:2018:MPB, author = "Samuel Grossman and Heiner Litz and Christos Kozyrakis", title = "Making pull-based graph processing performant", journal = j-SIGPLAN, volume = "53", number = "1", pages = "246--260", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178506", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graph processing engines following either the push-based or pull-based pattern conceptually consist of a two-level nested loop structure. Parallelizing and vectorizing these loops is critical for high overall performance and memory bandwidth utilization. Outer loop parallelization is simple for both engine types but suffers from high load imbalance. This work focuses on inner loop parallelization for pull engines, which when performed naively leads to a significant increase in conflicting memory writes that must be synchronized. Our first contribution is a scheduler-aware interface for parallel loops that allows us to optimize for the common case in which each thread executes several consecutive iterations. This eliminates most write traffic and avoids all synchronization, leading to speedups of up to 50X. Our second contribution is the Vector-Sparse format, which addresses the obstacles to vectorization that stem from the commonly-used Compressed-Sparse data structure. Our new format eliminates unaligned memory accesses and bounds checks within vector operations, two common problems when processing low-degree vertices. Vectorization with Vector-Sparse leads to speedups of up to 2.5X. Our contributions are embodied in Grazelle, a hybrid graph processing framework. On a server equipped with four Intel Xeon E7-4850 v3 processors, Grazelle respectively outperforms Ligra, Polymer, GraphMat, and X-Stream by up to 15.2X, 4.6X, 4.7X, and 66.8X.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Jangda:2018:EFT, author = "Abhinav Jangda and Uday Bondhugula", title = "An effective fusion and tile size model for optimizing image processing pipelines", journal = j-SIGPLAN, volume = "53", number = "1", pages = "261--275", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178507", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Effective models for fusion of loop nests continue to remain a challenge in both general-purpose and domain-specific language (DSL) compilers. The difficulty often arises from the combinatorial explosion of grouping choices and their interaction with parallelism and locality. This paper presents a new fusion algorithm for high-performance domain-specific compilers for image processing pipelines. The fusion algorithm is driven by dynamic programming and explores spaces of fusion possibilities not covered by previous approaches, and is driven by a cost function more concrete and precise in capturing optimization criteria than prior approaches. The fusion model is particularly tailored to the transformation and optimization sequence applied by PolyMage and Halide, two recent DSLs for image processing pipelines. Our model-driven technique when implemented in PolyMage provides significant improvements (up to 4.32X) over PolyMage's approach (which uses auto-tuning to aid its model), and over Halide's automatic approach (by up to 2.46X) on two state-of-the-art shared-memory multicore architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Wang:2018:LLD, author = "Lei Wang and Liangji Zhuang and Junhang Chen and Huimin Cui and Fang Lv and Ying Liu and Xiaobing Feng", title = "{Lazygraph}: lazy data coherency for replicas in distributed graph-parallel computation", journal = j-SIGPLAN, volume = "53", number = "1", pages = "276--289", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178508", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Replicas$^1$ of a vertex play an important role in existing distributed graph processing systems which make a single vertex to be parallel processed by multiple machines and access remote neighbors locally without any remote access. However, replicas of vertices introduce data coherency problem. Existing distributed graph systems treat replicas of a vertex v as an atomic and indivisible vertex, and use an eager data coherency approach to guarantee replicas atomicity. In eager data coherency approach, any changes to vertex data must be immediately communicated to all replicas of v, thus leading to frequent global synchronizations and communications. In this paper, we propose a lazy data coherency approach, called LazyAsync, which treats replicas of a vertex as independent vertices and maintains the data coherency by computations, rather than communications in existing eager approach. Our approach automatically selects some data coherency points from the graph algorithm, and maintains all replicas to share the same global view only at such points, which means the replicas are enabled to maintain different local views between any two adjacent data coherency points. Based on PowerGraph, we develop a distributed graph processing system LazyGraph to implement the LazyAsync approach and exploit graph-aware optimizations. On a 48-node EC2-like cluster, LazyGraph outperforms PowerGraph on four widely used graph algorithms across a variety of real-world graphs, with a speedup ranging from 1.25x to 10.69x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Sun:2018:PPA, author = "Yihan Sun and Daniel Ferizovic and Guy E. Belloch", title = "{PAM}: parallel augmented maps", journal = j-SIGPLAN, volume = "53", number = "1", pages = "290--304", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Ordered (key-value) maps are an important and widely-used data type for large-scale data processing frameworks. Beyond simple search, insertion and deletion, more advanced operations such as range extraction, filtering, and bulk updates form a critical part of these frameworks. We describe an interface for ordered maps that is augmented to support fast range queries and sums, and introduce a parallel and concurrent library called PAM (Parallel Augmented Maps) that implements the interface. The interface includes a wide variety of functions on augmented maps ranging from basic insertion and deletion to more interesting functions such as union, intersection, filtering, extracting ranges, splitting, and range-sums. We describe algorithms for these functions that are efficient both in theory and practice. As examples of the use of the interface and the performance of PAM we apply the library to four applications: simple range sums, interval trees, 2D range trees, and ranked word index searching. The interface greatly simplifies the implementation of these data structures over direct implementations. Sequentially the code achieves performance that matches or exceeds existing libraries designed specially for a single application, and in parallel our implementation gets speedups ranging from 40 to 90 on 72 cores with 2-way hyperthreading.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Fu:2018:ESM, author = "Zhouwang Fu and Tao Song and Zhengwei Qi and Haibing Guan", title = "Efficient shuffle management with {SCache} for {DAG} computing frameworks", journal = j-SIGPLAN, volume = "53", number = "1", pages = "305--316", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178510", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In large-scale data-parallel analytics, shuffle, or the cross-network read and aggregation of partitioned data between tasks with data dependencies, usually brings in large overhead. To reduce shuffle overhead, we present SCache, an open source plug-in system that particularly focuses on shuffle optimization. By extracting and analyzing shuffle dependencies prior to the actual task execution, SCache can adopt heuristic pre-scheduling combining with shuffle size prediction to pre-fetch shuffle data and balance load on each node. Meanwhile, SCache takes full advantage of the system memory to accelerate the shuffle process. We have implemented SCache and customized Spark to use it as the external shuffle service and co-scheduler. The performance of SCache is evaluated with both simulations and testbed experiments on a 50-node Amazon EC2 cluster. Those evaluations have demonstrated that, by incorporating SCache, the shuffle overhead of Spark can be reduced by nearly 89\%, and the overall completion time of TPC-DS queries improves 40\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Li:2018:HPG, author = "Xueqi Li and Guangming Tan and Bingchen Wang and Ninghui Sun", title = "High-performance genomic analysis framework with in-memory computing", journal = j-SIGPLAN, volume = "53", number = "1", pages = "317--328", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178511", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we propose an in-memory computing framework (called GPF) that provides a set of genomic formats, APIs and a fast genomic engine for large-scale genomic data processing. Our GPF comprises two main components: (1) scalable genomic data formats and API. (2) an advanced execution engine that supports efficient compression of genomic data and eliminates redundancies in the execution engine of our GPF. We further present both system and algorithm-specific implementations for users to build genomic analysis pipeline without any acquaintance of Spark parallel programming. To test the performance of GPF, we built a WGS pipeline on top of our GPF as a test case. Our experimental data indicate that GPF completes Whole-Genome-Sequencing (WGS) analysis of 146.9G bases Human Platinum Genome in running time of 24 minutes, with over 50\% parallel efficiency when used on 2048 CPU cores. Together, our GPF framework provides a fast and general engine for large-scale genomic data processing which supports in-memory computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Liu:2018:GUC, author = "Yang Liu and Jianguo Wang and Steven Swanson", title = "{Griffin}: uniting {CPU} and {GPU} in information retrieval systems for intra-query parallelism", journal = j-SIGPLAN, volume = "53", number = "1", pages = "327--337", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interactive information retrieval services, such as enterprise search and document search, must provide relevant results with consistent, low response times in the face of rapidly growing data sets and query loads. These growing demands have led researchers to consider a wide range of optimizations to reduce response latency, including query processing parallelization and acceleration with co-processors such as GPUs. However, previous work runs queries either on GPU or CPU, ignoring the fact that the best processor for a given query depends on the query's characteristics, which may change as the processing proceeds. We present Griffin, an IR systems that dynamically combines GPU- and CPU-based algorithms to process individual queries according to their characteristics. Griffin uses state-of-the-art CPU-based query processing techniques and incorporates a novel approach to GPU-based query evaluation. Our GPU-based approach, as far as we know, achieves the best available GPU search performance by leveraging a new compression scheme and exploiting an advanced merge-based intersection algorithm. We evaluate Griffin with real world queries and datasets, and show that it improves query performance by 10x compared to a highly optimized CPU-only implementation, and 1.5x compared to our GPU-approach running alone. We also find that Griffin helps reduce the 95th-, 99th-, and 99.9th-percentile query response time by 10.4x, 16.1x, and 26.8x, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Wang:2018:SFS, author = "Xinliang Wang and Weifeng Liu and Wei Xue and Li Wu", title = "{swSpTRSV}: a fast sparse triangular solve with sparse level tile layout on {Sunway} architectures", journal = j-SIGPLAN, volume = "53", number = "1", pages = "338--353", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178513", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Sparse triangular solve (SpTRSV) is one of the most important kernels in many real-world applications. Currently, much research on parallel SpTRSV focuses on level-set construction for reducing the number of inter-level synchronizations. However, the out-of-control data reuse and high cost for global memory or shared cache access in inter-level synchronization have been largely neglected in existing work. In this paper, we propose a novel data layout called Sparse Level Tile to make all data reuse under control, and design a Producer-Consumer pairing method to make any inter-level synchronization only happen in very fast register communication. We implement our data layout and algorithms on an SW26010 many-core processor, which is the main building-block of the current world fastest supercomputer Sunway Taihulight. The experimental results of testing all 2057 square matrices from the Florida Matrix Collection show that our method achieves an average speedup of 6.9 and the best speedup of 38.5 over parallel level-set method. Our method also outperforms the latest methods on a KNC many-core processor in 1856 matrices and the latest methods on a K80 GPU in 1672 matrices, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Wilcox:2018:VVH, author = "James R. Wilcox and Cormac Flanagan and Stephen N. Freund", title = "{VerifiedFT}: a verified, high-performance precise dynamic race detector", journal = j-SIGPLAN, volume = "53", number = "1", pages = "354--367", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178514", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic data race detectors are valuable tools for testing and validating concurrent software, but to achieve good performance they are typically implemented using sophisticated concurrent algorithms. Thus, they are ironically prone to the exact same kind of concurrency bugs they are designed to detect. To address these problems, we have developed VerifiedFT, a clean slate redesign of the FastTrack race detector [19]. The VerifiedFT analysis provides the same precision guarantee as FastTrack, but is simpler to implement correctly and efficiently, enabling us to mechanically verify an implementation of its core algorithm using CIVL [27]. Moreover, VerifiedFT provides these correctness guarantees without sacrificing any performance over current state-of-the-art (but complex and unverified) FastTrack implementations for Java.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Xu:2018:EPD, author = "Yifan Xu and I-Ting Angelina Lee and Kunal Agrawal", title = "Efficient parallel determinacy race detection for two-dimensional dags", journal = j-SIGPLAN, volume = "53", number = "1", pages = "368--380", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178515", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A program is said to have a determinacy race if logically parallel parts of a program access the same memory location and one of the accesses is a write. These races are generally bugs in the program since they lead to non-deterministic program behavior different schedules of the program can lead to different results. Most prior work on detecting these races focuses on a subclass of programs with fork-join parallelism. This paper presents a race-detection algorithm, 2D-Order, for detecting races in a more general class of programs, namely programs whose dependence structure can be represented as planar dags embedded in 2D grids. Such dependence structures arise from programs that use pipelined parallelism or dynamic programming recurrences. Given a computation with $ T_1 $ work and $ T_\infty $ span, 2D-Order executes it while also detecting races in $ O(T_1 / P + T_\infty) $ time on $P$ processors, which is asymptotically optimal. We also implemented PRacer, a race-detection algorithm based on 2D-Order for Cilk-P, which is a language for expressing pipeline parallelism. Empirical results demonstrate that PRacer incurs reasonable overhead and exhibits scalability similar to the baseline (executions without race detection) when running on multiple cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Acar:2018:PCM, author = "Umut A. Acar and Vitaly Aksenov and Arthur Chargu{\'e}raud and Mike Rainey", title = "Performance challenges in modular parallel programs", journal = j-SIGPLAN, volume = "53", number = "1", pages = "381--382", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178516", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Over the past decade, many programming languages and systems for parallel-computing have been developed, including Cilk, Fork/Join Java, Habanero Java, Parallel Haskell, Parallel ML, and X10. Although these systems raise the level of abstraction at which parallel code are written, performance continues to require the programmer to perform extensive optimizations and tuning, often by taking various architectural details into account. One such key optimization is granularity control, which requires the programmer to determine when and how parallel tasks should be sequentialized. In this paper, we briefly describe some of the challenges associated with automatic granularity control when trying to achieve portable performance for parallel programs with arbitrary nesting of parallel constructs. We consider a result from the functional-programming community, whose starting point is to consider an ``oracle'' that can predict the work of parallel codes, and thereby control granularity. We discuss the challenges in implementing such an oracle and proving that it has the desired theoretical properties under the nested-parallel programming model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Arif:2018:RBP, author = "Mahwish Arif and Hans Vandierendonck", title = "Reducing the burden of parallel loop schedulers for many-core processors", journal = j-SIGPLAN, volume = "53", number = "1", pages = "383--384", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This work proposes a low-overhead half-barrier pattern to schedule fine-grain parallel loops and considers its integration in the Intel OpenMP and Cilkplus schedulers. Experimental evaluation demonstrates that the scheduling overhead of our techniques is 43\% lower than Intel OpenMP and 12.1x lower than Cilk. We observe 22\% speedup on 48 threads, with a peak of 2.8x speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Cohen:2018:RTA, author = "Nachshon Cohen and Erez Petrank and James R. Larus", title = "Reducing transaction aborts by looking to the future", journal = j-SIGPLAN, volume = "53", number = "1", pages = "385--386", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178518", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Transactions are widely used in database engines and they becoming increasingly useful as a general synchronization technique for multicore machines [1]. Transactional systems allow a programmer to encapsulate multiple operations inside a transaction. All these operations appear to be executed atomically or not at all.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Correia:2018:STR, author = "Andreia Correia and Pedro Ramalhete", title = "Strong trylocks for reader-writer locks", journal = j-SIGPLAN, volume = "53", number = "1", pages = "387--388", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178519", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A reader-writer lock provides basic methods for shared and exclusive lock acquisition. A thread calling one of these methods may have to wait indefinitely to enter its critical section, with no guarantee of completion. We present two new reader-writer strong trylock algorithms, where a call to a trylock method always completes in a finite number of steps, and is guaranteed to succeed unless there is a linearizable history for which another thread has the lock. The first algorithm, named StrongTryRW, uses a single word of memory to reach consensus, thus yielding reduced scalability for readers. To address read scalability, we designed StrongTryRWRI which matches in throughput the current state of the art reader-writer lock algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Dong:2018:SSM, author = "Yao Dong and Ana Milanova and Julian Dolby", title = "{SecureMR}: secure mapreduce using homomorphic encryption and program partitioning", journal = j-SIGPLAN, volume = "53", number = "1", pages = "389--390", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178520", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In cloud computing customers upload data and computation to cloud providers. As they cede their data to the cloud provider, they may cede data confidentiality. We develop SecureMR, a system that analyzes and transforms MapReduce programs to operate over encrypted data. SecureMR makes use of partially homomorphic encryption and a trusted client. We evaluate SecureMR on a set of MapReduce benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Firoz:2018:SDV, author = "Jesun Sahariar Firoz and Marcin Zalewski and Andrew Lumsdaine", title = "A scalable distance-1 vertex coloring algorithm for power-law graphs", journal = j-SIGPLAN, volume = "53", number = "1", pages = "391--392", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178521", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a distributed, unordered, label-correcting distance-1 vertex coloring algorithm, called Distributed Control (DC) coloring algorithm. DC eliminates the need for vertex-centric barriers and global synchronization for color refinement, relying only on atomic operations and local termination detection to update vertex color. We implement our DC coloring algorithm and the well-known Jones-Plassmann algorithm in the AM++ AMT runtime and compare their performance. We show that, with runtime support, the elimination of waiting time of vertex-centric barriers and investing this time for local ordering results in better execution time for power-law graphs with dense local subgraphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Hayashi:2018:SMP, author = "Koby Hayashi and Grey Ballard and Yujie Jiang and Michael J. Tobia", title = "Shared-memory parallelization of {MTTKRP} for dense tensors", journal = j-SIGPLAN, volume = "53", number = "1", pages = "393--394", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The matricized-tensor times Khatri--Rao product (MTTKRP) is the computational bottleneck for algorithms computing CP decompositions of tensors. In this work, we develop shared-memory parallel algorithms for MTTKRP involving dense tensors. The algorithms cast nearly all of the computation as matrix operations in order to use optimized BLAS subroutines, and they avoid reordering tensor entries in memory. We use our parallel implementation to compute a CP decomposition of a neuroimaging data set and achieve a speedup of up to 7.4X over existing parallel software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Jiang:2018:RPS, author = "Peng Jiang and Gagan Agrawal", title = "Revealing parallel scans and reductions in sequential loops through function reconstruction", journal = j-SIGPLAN, volume = "53", number = "1", pages = "395--396", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178523", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many sequential loops are actually scans or reductions and can be parallelized across iterations despite the loop-carried dependences. In this work, we consider the parallelization of such scan/reduction loops, and propose a practical runtime approach called sampling-and-reconstruction to extract the hidden scan/reduction patterns in these loops.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Hong:2018:PMG, author = "Changwan Hong and Aravind Sukumaran-Rajam and Jinsung Kim and Prashant Singh Rawat and Sriram Krishnamoorthy and Louis-No{\"e}l Pouchet and Fabrice Rastello and P. Sadayappan", title = "Performance modeling for {GPUs} using abstract kernel emulation", journal = j-SIGPLAN, volume = "53", number = "1", pages = "397--398", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178524", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Performance modeling of GPU kernels is a significant challenge. In this paper, we develop a novel approach to performance modeling for GPUs through abstract kernel emulation along with latency/gap modeling of resources. Experimental results on all benchmarks from the Rodinia suite demonstrate good accuracy in predicting execution time on multiple GPU platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Jordan:2018:TCD, author = "Herbert Jordan and Bernhard Scholz and Pavle Subotic", title = "Two concurrent data structures for efficient datalog query processing", journal = j-SIGPLAN, volume = "53", number = "1", pages = "399--400", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178525", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In recent years, Datalog has gained popularity for the implementation of advanced data analysis. Applications benefit from Datalog's high-level, declarative syntax, and availability of efficient algorithms for computing solutions. The efficiency of Datalog engines has reached a point where engines such as Souffl{\'e} have reported performance results comparable to low-level hand-crafted alternatives [3].", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Kerbl:2018:SQW, author = "Bernhard Kerbl and J{\"o}rg M{\"u}ller and Michael Kenzel and Dieter Schmalstieg and Markus Steinberger", title = "A scalable queue for work distribution on {GPUs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "401--402", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178526", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Harnessing the power of massively parallel devices like the graphics processing unit (GPU) is difficult for algorithms that show dynamic or inhomogeneous workloads. To achieve high performance, such advanced algorithms require scalable, concurrent queues to collect and distribute work. We present a new concurrent work queue, the Broker Queue, a highly efficient, linearizable queue for fine-granular work distribution on the GPU. We evaluate its usability and benefits in contrast to existing queuing algorithms. Our queue is up to one order of magnitude faster than non-blocking queues, and outperforms simpler queue designs that are unfit for fine-granular work distribution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Licht:2018:DSF, author = "Johannes de Fine Licht and Michaela Blott and Torsten Hoefler", title = "Designing scalable {FPGA} architectures using high-level synthesis", journal = j-SIGPLAN, volume = "53", number = "1", pages = "403--404", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178527", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Massive spatial parallelism at low energy gives FPGAs the potential to be core components in large scale high performance computing (HPC) systems. In this paper we present four major design steps that harness high-level synthesis (HLS) to implement scalable spatial FPGA algorithms. To aid productivity, we introduce the open source library hlslib to complement HLS. We evaluate kernels designed with our approach on an FPGA accelerator board, demonstrating high performance and board utilization with enhanced programmer productivity. By following our guidelines, programmers can use HLS to develop efficient parallel algorithms for FPGA, scaling their implementations with increased resources on future hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Liu:2018:LLC, author = "Bo Liu and Wenbin Jiang and Hai Jin and Xuanhua Shi and Yang Ma", title = "{Layrub}: layer-centric {GPU} memory reuse and data migration in extreme-scale deep learning systems", journal = j-SIGPLAN, volume = "53", number = "1", pages = "405--406", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178528", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Growing accuracy and robustness of Deep Neural Networks (DNN) models are accompanied by growing model capacity (going deeper or wider). However, high memory requirements of those models make it difficult to execute the training process in one GPU. To address it, we first identify the memory usage characteristics for deep and wide convolutional networks, and demonstrate the opportunities of memory reuse on both intra-layer and inter-layer levels. We then present Layrub, a runtime data placement strategy that orchestrates the execution of training process. It achieves layer-centric reuse to reduce memory consumption for extreme-scale deep learning that cannot be run on one single GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Liu:2018:RBI, author = "Junhong Liu and Xin He and Weifeng Liu and Guangming Tan", title = "Register-based implementation of the sparse general matrix--matrix multiplication on {GPUs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "407--408", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178529", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "General sparse matrix--matrix multiplication (SpGEMM) is an essential building block in a number of applications. In our work, we fully utilize GPU registers and shared memory to implement an efficient and load balanced SpGEMM in comparison with the existing implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Mururu:2018:QRE, author = "Girish Mururu and Ada Gavrilovska and Santosh Pande", title = "Quantifying and reducing execution variance in {STM} via model driven commit optimization", journal = j-SIGPLAN, volume = "53", number = "1", pages = "409--410", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178530", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Simplified parallel programming coupled with an ability to express speculative computation is realized with Software Transactional Memory (STM). Although STMs are gaining popularity because of significant improvements in parallel performance, they exhibit enormous variation in transaction execution with non-repeatable performance behavior which is unacceptable in many application domains, especially in which frame rates and responsiveness should be predictable. Thus, reducing execution variance in STM is an important performance goal that has been mostly overlooked. In this work, we minimize the variance in execution time of threads in STM by reducing non-determinism exhibited due to speculation by first quantifying non-determinism and generating an automaton that models the behavior of STM. We used the automaton to guide the STM to a less non-deterministic execution that reduced the variance in frame rate by a maximum of 65\% on a version of real-world Quake3 game.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Park:2018:TGM, author = "Jungho Park and Hyungmin Cho and Wookeun Jung and Jaejin Lee", title = "Transparent {GPU} memory management for {DNNs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "411--412", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern DNN frameworks exploit GPU acceleration by default to achieve high performance. The limitation of GPU memory capacity becomes a serious problem because DNNs are becoming deeper and larger. This paper proposes a purely software-based transparent solution, called tvDNN, to the GPU memory capacity problem. It is based on GPU memory swapping and memory object sectioning techniques. It also provides an efficient memory-object swapping schedule based on ILP (optimal) and heuristics (suboptimal). The experimental results show that tvDNN enables Caffe to build VGG-16 with a large batch size, such as 256 or 512, using a few GB of GPU memory without significant performance degradation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "Distributed Neural Network (DNN)", remark = "PPoPP '18 proceedings.", } @Article{Poter:2018:SIA, author = "Manuel P{\"o}ter and Jesper Larsson Tr{\"a}ff", title = "Stamp-it, amortized constant-time memory reclamation in comparison to five other schemes", journal = j-SIGPLAN, volume = "53", number = "1", pages = "413--414", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178532", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The memory reclamation problem is to determine, for any given allocated memory node, when there are no more references to the node, allowing it to be safely returned to the memory management system. In a concurrent context, the memory reclamation problem is highly non-trivial, since there may be more than one thread referencing an allocated node unbeknownst to the other threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Reif:2018:PSA, author = "Stefan Reif and Wolfgang Schr{\"o}der-Preikschat", title = "A predictable synchronisation algorithm", journal = j-SIGPLAN, volume = "53", number = "1", pages = "415--416", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178533", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Interaction with physical objects often imposes latency requirements to multi-core embedded systems. One consequence is the need for synchronisation algorithms that provide predictable latency, in addition to high throughput. We present a synchronisation algorithm that needs at most 7 atomic memory operations per asynchronous critical section. The performance is competitive, at least, to locks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Riebler:2018:ACA, author = "Heinrich Riebler and Gavin Vaz and Tobias Kenter and Christian Plessl", title = "Automated code acceleration targeting heterogeneous {OpenCL} devices", journal = j-SIGPLAN, volume = "53", number = "1", pages = "417--418", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Accelerators can offer exceptional performance advantages. However, programmers need to spend considerable efforts on acceleration, without knowing how sustainable the employed programming models, languages and tools are. To tackle this challenge, we propose and demonstrate a new runtime system called HT rOP that is able to automatically generate and execute OpenCL code from sequential CPU code. HTrOP transforms suitable data-parallel loops into independent OpenCL-typical work-items and handles concrete calls to these devices through a mix of library components and application-specific OpenCL host code. Computational hotspots are identified and can be offloaded to different resources (CPU, GPGPU and Xeon Phi). We demonstrate the potential of HTrOP on a broad set of applications and are able to improve the performance by 4.3X on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Barrera:2018:GPA, author = "Isaac S{\'a}nchez Barrera and Marc Casas and Miquel Moret{\'o} and Eduard Ayguad{\'e} and Jes{\'u}s Labarta and Mateo Valero", title = "Graph partitioning applied to {DAG} scheduling to reduce {NUMA} effects", journal = j-SIGPLAN, volume = "53", number = "1", pages = "419--420", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178535", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The complexity of shared memory systems is becoming more relevant as the number of memory domains increases, with different access latencies and bandwidth rates depending on the proximity between the cores and the devices containing the data. In this context, techniques to manage and mitigate non-uniform memory access (NUMA) effects consist in migrating threads, memory pages or both and are typically applied by the system software. We propose techniques at the runtime system level to reduce NUMA effects on parallel applications. We leverage runtime system metadata in terms of a task dependency graph. Our approach, based on graph partitioning methods, is able to provide parallel performance improvements of 1.12X on average with respect to the state-of-the-art.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Volkov:2018:MSG, author = "Vasily Volkov", title = "A microbenchmark to study {GPU} performance models", journal = j-SIGPLAN, volume = "53", number = "1", pages = "421--422", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Basic microarchitectural features of NVIDIA GPUs have been stable for a decade, and many analytic solutions were proposed to model their performance. We present a way to review, systematize, and evaluate these approaches by using a microbenchmark. In this manner, we produce a brief algebraic summary of key elements of selected performance models, identify patterns in their design, and highlight their previously unknown limitations. Also, we identify a potentially superior method for estimating performance based on classical work.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Zhao:2018:SCG, author = "Tuowen Zhao and Mary Hall and Protonu Basu and Samuel Williams and Hans Johansen", title = "{SIMD} code generation for stencils on brick decompositions", journal = j-SIGPLAN, volume = "53", number = "1", pages = "423--424", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a stencil library and associated compiler code generation framework designed to maximize performance on higher-order stencil computations through the use of two main technologies: a fine-grained brick data layout designed to exploit the inherent multidimensional spatial locality endemic to stencil computations, and a vector scatter associative reordering transformation that reduces vector loads and alignment operations and exposes opportunities for the backend compiler to reduce computation. For a range of stencil computations, we compare the generated code expressed in the brick library to the standard tiled code. We attain up to a 7.2X speedup on the most complex stencils when running on an Intel Knights Landing (Xeon Phi) processor.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PPoPP '18 proceedings.", } @Article{Fujiki:2018:MDP, author = "Daichi Fujiki and Scott Mahlke and Reetuparna Das", title = "In-Memory Data Parallel Processor", journal = j-SIGPLAN, volume = "53", number = "2", pages = "1--14", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173171", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recent developments in Non-Volatile Memories (NVMs) have opened up a new horizon for in-memory computing. Despite the significant performance gain offered by computational NVMs, previous works have relied on manual mapping of specialized kernels to the memory arrays, making it infeasible to execute more general workloads. We combat this problem by proposing a programmable in-memory processor architecture and data-parallel programming framework. The efficiency of the proposed in-memory processor comes from two sources: massive parallelism and reduction in data movement. A compact instruction set provides generalized computation capabilities for the memory array. The proposed programming framework seeks to leverage the underlying parallelism in the hardware by merging the concepts of data-flow and vector processing. To facilitate in-memory programming, we develop a compilation framework that takes a TensorFlow input and generates code for our in-memory processor. Our results demonstrate 7.5x speedup over a multi-core CPU server for a set of applications from Parsec and 763x speedup over a server-class GPU for a set of Rodinia benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Fix:2018:HMT, author = "Jordan Fix and Nayana P. Nagendra and Sotiris Apostolakis and Hansen Zhang and Sophie Qiu and David I. August", title = "Hardware Multithreaded Transactions", journal = j-SIGPLAN, volume = "53", number = "2", pages = "15--29", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173172", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Speculation with transactional memory systems helps programmers and compilers produce profitable thread-level parallel programs. Prior work shows that supporting transactions that can span multiple threads, rather than requiring transactions be contained within a single thread, enables new types of speculative parallelization techniques for both programmers and parallelizing compilers. Unfortunately, software support for multi-threaded transactions (MTXs) comes with significant additional inter-thread communication overhead for speculation validation. This overhead can make otherwise good parallelization unprofitable for programs with sizeable read and write sets. Some programs using these prior software MTXs overcame this problem through significant efforts by expert programmers to minimize these sets and optimize communication, capabilities which compiler technology has been unable to equivalently achieve. Instead, this paper makes speculative parallelization less laborious and more feasible through low-overhead speculation validation, presenting the first complete design, implementation, and evaluation of hardware MTXs. Even with maximal speculation validation of every load and store inside transactions of tens to hundreds of millions of instructions, profitable parallelization of complex programs can be achieved. Across 8 benchmarks, this system achieves a geomean speedup of 99\% over sequential execution on a multicore machine with 4 cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Kumar:2018:BTF, author = "Rakesh Kumar and Boris Grot and Vijay Nagarajan", title = "Blasting through the Front-End Bottleneck with {Shotgun}", journal = j-SIGPLAN, volume = "53", number = "2", pages = "30--42", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173178", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The front-end bottleneck is a well-established problem in server workloads owing to their deep software stacks and large instruction working sets. Despite years of research into effective L1-I and BTB prefetching, state-of-the-art techniques force a trade-off between performance and metadata storage costs. This work introduces Shotgun, a BTB-directed front-end prefetcher powered by a new BTB organization that maintains a logical map of an application's instruction footprint, which enables high-efficacy prefetching at low storage cost. To map active code regions, Shotgun precisely tracks an application's global control flow (e.g., function and trap routine entry points) and summarizes local control flow within each code region. Because the local control flow enjoys high spatial locality, with most functions comprised of a handful of instruction cache blocks, it lends itself to a compact region-based encoding. Meanwhile, the global control flow is naturally captured by the application's unconditional branch working set (calls, returns, traps). Based on these insights, Shotgun devotes the bulk of its BTB capacity to branches responsible for the global control flow and a spatial encoding of their target regions. By effectively capturing a map of the application's instruction footprint in the BTB, Shotgun enables highly effective BTB-directed prefetching. Using a storage budget equivalent to a conventional BTB, Shotgun outperforms the state-of-the-art BTB-directed front-end prefetcher by up to 14\% on a set of varied commercial workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Besta:2018:SNL, author = "Maciej Besta and Syed Minhaj Hassan and Sudhakar Yalamanchili and Rachata Ausavarungnirun and Onur Mutlu and Torsten Hoefler", title = "Slim {NoC}: a Low-Diameter On-Chip Network Topology for High Energy Efficiency and Scalability", journal = j-SIGPLAN, volume = "53", number = "2", pages = "43--55", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging chips with hundreds and thousands of cores require networks with unprecedented energy/area efficiency and scalability. To address this, we propose Slim NoC (SN): a new on-chip network design that delivers significant improvements in efficiency and scalability compared to the state-of-the-art. The key idea is to use two concepts from graph and number theory, degree-diameter graphs combined with non-prime finite fields, to enable the smallest number of ports for a given core count. SN is inspired by state-of-the-art off-chip topologies; it identifies and distills their advantages for NoC settings while solving several key issues that lead to significant overheads on-chip. SN provides NoC-specific layouts, which further enhance area/energy efficiency. We show how to augment SN with state-of-the-art router microarchitecture schemes such as Elastic Links, to make the network even more scalable and efficient. Our extensive experimental evaluations show that SN outperforms both traditional low-radix topologies (e.g., meshes and tori) and modern high-radix networks (e.g., various Flattened Butterflies) in area, latency, throughput, and static/dynamic power consumption for both synthetic and real workloads. SN provides a promising direction in scalable and energy-efficient NoC topologies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Nguyen:2018:SCM, author = "Khanh Nguyen and Lu Fang and Christian Navasca and Guoqing Xu and Brian Demsky and Shan Lu", title = "{Skyway}: Connecting Managed Heaps in Distributed Big Data Systems", journal = j-SIGPLAN, volume = "53", number = "2", pages = "56--69", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173200", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Managed languages such as Java and Scala are prevalently used in development of large-scale distributed systems. Under the managed runtime, when performing data transfer across machines, a task frequently conducted in a Big Data system, the system needs to serialize a sea of objects into a byte sequence before sending them over the network. The remote node receiving the bytes then deserializes them back into objects. This process is both performance-inefficient and labor-intensive: (1) object serialization/deserialization makes heavy use of reflection, an expensive runtime operation and/or (2) serialization/deserialization functions need to be hand-written and are error-prone. This paper presents Skyway, a JVM-based technique that can directly connect managed heaps of different (local or remote) JVM processes. Under Skyway, objects in the source heap can be directly written into a remote heap without changing their formats. Skyway provides performance benefits to any JVM-based system by completely eliminating the need (1) of invoking serialization/deserialization functions, thus saving CPU time, and (2) of requiring developers to hand-write serialization functions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Wu:2018:EBJ, author = "Mingyu Wu and Ziming Zhao and Haoyu Li and Heting Li and Haibo Chen and Binyu Zang and Haibing Guan", title = "{Espresso}: Brewing {Java} For More Non-Volatility with Non-volatile Memory", journal = j-SIGPLAN, volume = "53", number = "2", pages = "70--83", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173201", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Fast, byte-addressable non-volatile memory (NVM) embraces both near-DRAM latency and disk-like persistence, which has generated considerable interests to revolutionize system software stack and programming models. However, it is less understood how NVM can be combined with managed runtime like Java virtual machine (JVM) to ease persistence management. This paper proposes Espresso, a holistic extension to Java and its runtime, to enable Java programmers to exploit NVM for persistence management with high performance. Espresso first provides a general persistent heap design called Persistent Java Heap (PJH) to manage persistent data as normal Java objects. The heap is then strengthened with a recoverable mechanism to provide crash consistency for heap metadata. Espresso further provides a new abstraction called Persistent Java Object (PJO) to provide an easy-to-use but safe persistence programming model for programmers to persist application data. Evaluation confirms that Espresso significantly outperforms state-of-art NVM support for Java (i.e., JPA and PCJ) while being compatible to data structures in existing Java programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Wang:2018:ECI, author = "Wenwen Wang and Stephen McCamant and Antonia Zhai and Pen-Chung Yew", title = "Enhancing Cross-{ISA} {DBT} Through Automatically Learned Translation Rules", journal = j-SIGPLAN, volume = "53", number = "2", pages = "84--97", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a novel approach for dynamic binary translation (DBT) to automatically learn translation rules from guest and host binaries compiled from the same source code. The learned translation rules are then verified via binary symbolic execution and used in an existing DBT system, QEMU, to generate more efficient host binary code. Experimental results on SPEC CINT2006 show that the average time of learning a translation rule is less than two seconds. With the rules learned from a collection of benchmark programs excluding the targeted program itself, an average 1.25X performance speedup over QEMU can be achieved for SPEC CINT2006. Moreover, the translation overhead introduced by this rule-based approach is very small even for short-running workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Rajadurai:2018:GSL, author = "Sumanaruban Rajadurai and Jeffrey Bosboom and Weng-Fai Wong and Saman Amarasinghe", title = "{Gloss}: Seamless Live Reconfiguration and Reoptimization of Stream Programs", journal = j-SIGPLAN, volume = "53", number = "2", pages = "98--112", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173170", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An important class of applications computes on long-running or infinite streams of data, often with known fixed data rates. The latter is referred to as synchronous data flow ~(SDF) streams. These stream applications need to run on clusters or the cloud due to the high performance requirement. Further, they require live reconfiguration and reoptimization for various reasons such as hardware maintenance, elastic computation, or to respond to fluctuations in resources or application workload. However, reconfiguration and reoptimization without downtime while accurately preserving program state in a distributed environment is difficult. In this paper, we introduce Gloss, a suite of compiler and runtime techniques for live reconfiguration of distributed stream programs. Gloss, for the first time, avoids periods of zero throughput during the reconfiguration of both stateless and stateful SDF based stream programs. Furthermore, unlike other systems, Gloss globally reoptimizes and completely recompiles the program during reconfiguration. This permits it to reoptimize the application for entirely new configurations that it may not have encountered before. All these Gloss operations happen in-situ, requiring no extra hardware resources. We show how Gloss allows stream programs to reconfigure and reoptimize with no downtime and minimal overhead, and demonstrate the wider applicability of it via a variety of experiments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Yoon:2018:FTB, author = "Hongil Yoon and Jason Lowe-Power and Gurindar S. Sohi", title = "Filtering Translation Bandwidth with Virtual Caching", journal = j-SIGPLAN, volume = "53", number = "2", pages = "113--127", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173195", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous computing with GPUs integrated on the same chip as CPUs is ubiquitous, and to increase programmability many of these systems support virtual address accesses from GPU hardware. However, this entails address translation on every memory access. We observe that future GPUs and workloads show very high bandwidth demands (up to 4 accesses per cycle in some cases) for shared address translation hardware due to frequent private TLB misses. This greatly impacts performance (32\% average performance degradation relative to an ideal MMU). To mitigate this overhead, we propose a software-agnostic, practical, GPU virtual cache hierarchy. We use the virtual cache hierarchy as an effective address translation bandwidth filter. We observe many requests that miss in private TLBs find corresponding valid data in the GPU cache hierarchy. With a GPU virtual cache hierarchy, these TLB misses can be filtered (i.e., virtual cache hits), significantly reducing bandwidth demands for the shared address translation hardware. In addition, accelerator-specific attributes (e.g., less likelihood of synonyms) of GPUs reduce the design complexity of virtual caches, making a whole virtual cache hierarchy (including a shared L2 cache) practical for GPUs. Our evaluation shows that the entire GPU virtual cache hierarchy effectively filters the high address translation bandwidth, achieving almost the same performance as an ideal MMU. We also evaluate L1-only virtual cache designs and show that using a whole virtual cache hierarchy obtains additional performance benefits (1.31$ \times $ speedup on average).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Maleki:2018:AHP, author = "Sepideh Maleki and Martin Burtscher", title = "Automatic Hierarchical Parallelization of Linear Recurrences", journal = j-SIGPLAN, volume = "53", number = "2", pages = "128--138", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173168", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Linear recurrences encompass many fundamental computations including prefix sums and digital filters. Later result values depend on earlier result values in recurrences, making it a challenge to compute them in parallel. We present a new work- and space-efficient algorithm to compute linear recurrences that is amenable to automatic parallelization and suitable for hierarchical massively-parallel architectures such as GPUs. We implemented our approach in a domain-specific code generator that emits optimized CUDA code. Our evaluation shows that, for standard prefix sums and single-stage IIR filters, the generated code reaches the throughput of memory copy for large inputs, which cannot be surpassed. On higher-order prefix sums, it performs nearly as well as the fastest handwritten code from the literature. On tuple-based prefix sums and digital filters, our automatically parallelized code outperforms the fastest prior implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Ginsbach:2018:AML, author = "Philip Ginsbach and Toomas Remmelg and Michel Steuwer and Bruno Bodin and Christophe Dubach and Michael F. P. O'Boyle", title = "Automatic Matching of Legacy Code to Heterogeneous {APIs}: an Idiomatic Approach", journal = j-SIGPLAN, volume = "53", number = "2", pages = "139--153", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173182", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous accelerators often disappoint. They provide the prospect of great performance, but only deliver it when using vendor specific optimized libraries or domain specific languages. This requires considerable legacy code modifications, hindering the adoption of heterogeneous computing. This paper develops a novel approach to automatically detect opportunities for accelerator exploitation. We focus on calculations that are well supported by established APIs: sparse and dense linear algebra, stencil codes and generalized reductions and histograms. We call them idioms and use a custom constraint-based Idiom Description Language (IDL) to discover them within user code. Detected idioms are then mapped to BLAS libraries, cuSPARSE and clSPARSE and two DSLs: Halide and Lift. We implemented the approach in LLVM and evaluated it on the NAS and Parboil sequential C/C++ benchmarks, where we detect 60 idiom instances. In those cases where idioms are a significant part of the sequential execution time, we generate code that achieves 1.26x to over 20x speedup on integrated and external GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Wang:2018:UAA, author = "Shu Wang and Chi Li and Henry Hoffmann and Shan Lu and William Sentosa and Achmad Imam Kistijantoro", title = "Understanding and Auto-Adjusting Performance-Sensitive Configurations", journal = j-SIGPLAN, volume = "53", number = "2", pages = "154--168", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173206", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern software systems are often equipped with hundreds to thousands of configurations, many of which greatly affect performance. Unfortunately, properly setting these configurations is challenging for developers due to the complex and dynamic nature of system workload and environment. In this paper, we first conduct an empirical study to understand performance-sensitive configurations and the challenges of setting them in the real-world. Guided by our study, we design a systematic and general control-theoretic framework, SmartConf, to automatically set and dynamically adjust performance-sensitive configurations to meet required operating constraints while optimizing other performance metrics. Evaluation shows that SmartConf is effective in solving real-world configuration problems, often providing better performance than even the best static configuration developers can choose under existing configuration systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Rahmani:2018:SFS, author = "Amir M. Rahmani and Bryan Donyanavard and Tiago M{\"u}ck and Kasra Moazzemi and Axel Jantsch and Onur Mutlu and Nikil Dutt", title = "{SPECTR}: Formal Supervisory Control and Coordination for Many-core Systems Resource Management", journal = j-SIGPLAN, volume = "53", number = "2", pages = "169--183", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173199", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Resource management strategies for many-core systems need to enable sharing of resources such as power, processing cores, and memory bandwidth while coordinating the priority and significance of system- and application-level objectives at runtime in a scalable and robust manner. State-of-the-art approaches use heuristics or machine learning for resource management, but unfortunately lack formalism in providing robustness against unexpected corner cases. While recent efforts deploy classical control-theoretic approaches with some guarantees and formalism, they lack scalability and autonomy to meet changing runtime goals. We present SPECTR, a new resource management approach for many-core systems that leverages formal supervisory control theory (SCT) to combine the strengths of classical control theory with state-of-the-art heuristic approaches to efficiently meet changing runtime goals. SPECTR is a scalable and robust control architecture and a systematic design flow for hierarchical control of many-core systems. SPECTR leverages SCT techniques such as gain scheduling to allow autonomy for individual controllers. It facilitates automatic synthesis of the high-level supervisory controller and its property verification. We implement SPECTR on an Exynos platform containing ARM's big.LITTLE-based heterogeneous multi-processor (HMP) and demonstrate that SPECTR's use of SCT is key to managing multiple interacting resources (e.g., chip power and processing cores) in the presence of competing objectives (e.g., satisfying QoS vs. power capping). The principles of SPECTR are easily applicable to any resource type and objective as long as the management problem can be modeled using dynamical systems theory (e.g., difference equations), discrete-event dynamic systems, or fuzzy dynamics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Mishra:2018:CLC, author = "Nikita Mishra and Connor Imes and John D. Lafferty and Henry Hoffmann", title = "{CALOREE}: Learning Control for Predictable Latency and Low Energy", journal = j-SIGPLAN, volume = "53", number = "2", pages = "184--198", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173184", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many modern computing systems must provide reliable latency with minimal energy. Two central challenges arise when allocating system resources to meet these conflicting goals: (1) complexity modern hardware exposes diverse resources with complicated interactions and (2) dynamics latency must be maintained despite unpredictable changes in operating environment or input. Machine learning accurately models the latency of complex, interacting resources, but does not address system dynamics; control theory adjusts to dynamic changes, but struggles with complex resource interaction. We therefore propose CALOREE, a resource manager that learns key control parameters to meet latency requirements with minimal energy in complex, dynamic environments. CALOREE breaks resource allocation into two sub-tasks: learning how interacting resources affect speedup, and controlling speedup to meet latency requirements with minimal energy. CALOREE defines a general control system whose parameters are customized by a learning framework while maintaining control-theoretic formal guarantees that the latency goal will be met. We test CALOREE's ability to deliver reliable latency on heterogeneous ARM big.LITTLE architectures in both single and multi-application scenarios. Compared to the best prior learning and control solutions, CALOREE reduces deadline misses by 60\% and energy consumption by 13\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Turakhia:2018:DGC, author = "Yatish Turakhia and Gill Bejerano and William J. Dally", title = "{Darwin}: a Genomics Co-processor Provides up to $ 15 \, 000 \times $ Acceleration on Long Read Assembly", journal = j-SIGPLAN, volume = "53", number = "2", pages = "199--213", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173193", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Genomics is transforming medicine and our understanding of life in fundamental ways. Genomics data, however, is far outpacing Moore's Law. Third-generation sequencing technologies produce 100X longer reads than second generation technologies and reveal a much broader mutation spectrum of disease and evolution. However, these technologies incur prohibitively high computational costs. Over 1,300 CPU hours are required for reference-guided assembly of the human genome, and over 15,600 CPU hours are required for de novo assembly. This paper describes ``Darwin'' --- a co-processor for genomic sequence alignment that, without sacrificing sensitivity, provides up to $ 15 \, 000 \times $ speedup over the state-of-the-art software for reference-guided assembly of third-generation reads. Darwin achieves this speedup through hardware/algorithm co-design, trading more easily accelerated alignment for less memory-intensive filtering, and by optimizing the memory system for filtering. Darwin combines a hardware-accelerated version of D-SOFT, a novel filtering algorithm, alignment at high speed, and with a hardware-accelerated version of GACT, a novel alignment algorithm. GACT generates near-optimal alignments of arbitrarily long genomic sequences using constant memory for the compute-intensive step. Darwin is adaptable, with tunable speed and sensitivity to match emerging sequencing technologies and to meet the requirements of genomic applications beyond read assembly.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Zha:2018:LSM, author = "Yue Zha and Jing Li", title = "{Liquid Silicon-Monona}: a Reconfigurable Memory-Oriented Computing Fabric with Scalable Multi-Context Support", journal = j-SIGPLAN, volume = "53", number = "2", pages = "214--228", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173167", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "With the recent trend of promoting Field-Programmable Gate Arrays (FPGAs) to first-class citizens in accelerating compute-intensive applications in networking, cloud services and artificial intelligence, FPGAs face two major challenges in sustaining competitive advantages in performance and energy efficiency for diverse cloud workloads: (1) limited configuration capability for supporting light-weight computations/on-chip data storage to accelerate emerging search-/data-intensive applications. (2) lack of architectural support to hide reconfiguration overhead for assisting virtualization in a cloud computing environment. In this paper, we propose a reconfigurable memory-oriented computing fabric, namely Liquid Silicon-Monona (L-Si), enabled by emerging nonvolatile memory technology i.e. RRAM, to address these two challenges. Specifically, L-Si addresses the first challenge by virtue of a new architecture comprising a 2D array of physically identical but functionally-configurable building blocks. It, for the first time, extends the configuration capabilities of existing FPGAs from computation to the whole spectrum ranging from computation to data storage. It allows users to better customize hardware by flexibly partitioning hardware resources between computation and memory, greatly benefiting emerging search- and data-intensive applications. To address the second challenge, L-Si provides scalable multi-context architectural support to minimize reconfiguration overhead for assisting virtualization. In addition, we provide compiler support to facilitate the programming of applications written in high-level programming languages (e.g. OpenCL) and frameworks (e.g. TensorFlow, MapReduce) while fully exploiting the unique architectural capability of L-Si. Our evaluation results show L-Si achieves 99.6\% area reduction, 1.43$ \times $ throughput improvement and 94.0\% power reduction on search-intensive benchmarks, as compared with the FPGA baseline. For neural network benchmarks, on average, L-Si achieves 52.3$ \times $ speedup, 113.9$ \times $ energy reduction and 81\% area reduction over the FPGA baseline. In addition, the multi-context architecture of L-Si reduces the context switching time to --- 10ns, compared with an off-the-shelf FPGA (~100ms), greatly facilitating virtualization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Achour:2018:TDC, author = "Sara Achour and Martin Rinard", title = "Time Dilation and Contraction for Programmable Analog Devices with {Jaunt}", journal = j-SIGPLAN, volume = "53", number = "2", pages = "229--242", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173179", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programmable analog devices are a powerful new computing substrate that are especially appropriate for performing computationally intensive simulations of neuromorphic and cytomorphic models. Current state of the art techniques for configuring analog devices to simulate dynamical systems do not consider the current and voltage operating ranges of analog device components or the sampling limitations of the digital interface of the device. We present Jaunt, a new solver that scales the values that configure the analog device to ensure the resulting analog computation executes within the operating constraints of the device, preserves the recoverable dynamics of the original simulation, and executes slowly enough to observe these dynamics at the sampled digital outputs. Our results show that, on a set of benchmark biological simulations, (1) unscaled configurations produce incorrect simulations because they violate the operating ranges of the device and (2) Jaunt delivers scaled configurations that respect the operating ranges to produce correct simulations with observable dynamics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Dai:2018:EDT, author = "Yuting Dai and Tao Li and Benyong Liu and Mingcong Song and Huixiang Chen", title = "Exploiting Dynamic Thermal Energy Harvesting for Reusing in {Smartphone} with Mobile Applications", journal = j-SIGPLAN, volume = "53", number = "2", pages = "243--256", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173188", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Recently, mobile applications have gradually become performance- and resource- intensive, which results in a massive battery power drain and high surface temperature, and further degrades the user experience. Thus, high power consumption and surface over-heating have been considered as a severe challenge to smartphone design. In this paper, we propose DTEHR, a mobile Dynamic Thermal Energy Harvesting Reusing framework to tackle this challenge. The approach is sustainable in that it generates energy using dynamic Thermoelectric Generators (TEGs). The generated energy not only powers Thermoelectric Coolers (TECs) for cooling down hot-spots, but also recharges micro-supercapacitors (MSCs) for extended smartphone usage. To analyze thermal characteristics and evaluate DTEHR across real-world applications, we build MPPTAT (Multi-comPonent Power and Thermal Analysis Tool), a power and thermal analyzing tool for Android. The result shows that DTEHR reduces the temperature differences between hot areas and cold areas up to 15.4${}^\circ $C (internal) and 7${}^\circ $C (surface). With TEC-based hot-spots cooling, DTEHR reduces the temperature of the surface and internal hot-spots by an average of 8${}^\circ $ and 12.8mW respectively. With dynamic TEGs, DTEHR generates 2.7-15mW power, more than hundreds of times of power that TECs need to cool down hot-spots. Thus, extra-generated power can be stored into MSCs to prolong battery life.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Hu:2018:SDE, author = "Yongjian Hu and Iulian Neamtiu", title = "Static Detection of Event-based Races in {Android} Apps", journal = j-SIGPLAN, volume = "53", number = "2", pages = "257--270", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173173", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Event-based races are the main source of concurrency errors in Android apps. Prior approaches for scalable detection of event-based races have been dynamic. Due to their dynamic nature, these approaches suffer from coverage and false negative issues. We introduce a precise and scalable static approach and tool, named SIERRA, for detecting Android event-based races. SIERRA is centered around a new concept of ``concurrency action'' (that reifies threads, events/messages, system and user actions) and statically-derived order (happens-before relation) between actions. Establishing action order is complicated in Android, and event-based systems in general, because of externally-orchestrated control flow, use of callbacks, asynchronous tasks, and ad-hoc synchronization. We introduce several novel approaches that enable us to infer order relations statically: auto-generated code models which impose order among lifecycle and GUI events; a novel context abstraction for event-driven programs named action-sensitivity and finally, on-demand path sensitivity via backward symbolic execution to further rule out false positives. We have evaluated SIERRA on 194 Android apps. Of these, we chose 20 apps for manual analysis and comparison with a state-of-the-art dynamic race detector. Experimental results show that SIERRA is effective and efficient, typically taking 960 seconds to analyze an app and revealing 43 potential races. Compared with the dynamic race detector, SIERRA discovered an average 29.5 true races with 3.5 false positives, where the dynamic detector only discovered 4 races (hence missing 25.5 races per app) --- this demonstrates the advantage of a precise static approach. We believe that our approach opens the way for precise analysis and static event race detection in other event-driven systems beyond Android.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Guo:2018:PCA, author = "Peizhen Guo and Wenjun Hu", title = "{Potluck}: Cross-Application Approximate Deduplication for Computation-Intensive Mobile Applications", journal = j-SIGPLAN, volume = "53", number = "2", pages = "271--284", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173185", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging mobile applications, such as cognitive assistance and augmented reality (AR) based gaming, are increasingly computation-intensive and latency-sensitive, while running on resource-constrained devices. The standard approaches to addressing these involve either offloading to a cloud(let) or local system optimizations to speed up the computation, often trading off computation quality for low latency. Instead, we observe that these applications often operate on similar input data from the camera feed and share common processing components, both within the same (type of) applications and across different ones. Therefore, deduplicating processing across applications could deliver the best of both worlds. In this paper, we present Potluck, to achieve approximate deduplication. At the core of the system is a cache service that stores and shares processing results between applications and a set of algorithms to process the input data to maximize deduplication opportunities. This is implemented as a background service on Android. Extensive evaluation shows that Potluck can reduce the processing latency for our AR and vision workloads by a factor of 2.5 to 10.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Chong:2018:QCG, author = "Frederic T. Chong", title = "Quantum Computing is Getting Real: Architecture, {PL}, and {OS} Roles in Closing the Gap between Quantum Algorithms and Machines", journal = j-SIGPLAN, volume = "53", number = "2", pages = "285--285", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177152", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Quantum computing is at an inflection point, where 50-qubit (quantum bit) machines have been built, 100-qubit machines are just around the corner, and even 1000-qubit machines are perhaps only a few years away. These machines have the potential to fundamentally change our concept of what is computable and demonstrate practical applications in areas such as quantum chemistry, optimization, and quantum simulation. Yet a significant resource gap remains between practical quantum algorithms and real machines. There is an urgent shortage of the necessary computer scientists to work on software and architectures to close this gap. I will outline several grand research challenges in closing this gap, including programming language design, software and hardware verification, defining and perforating abstraction boundaries, cross-layer optimization, managing parallelism and communication, mapping and scheduling computations, reducing control complexity, machine-specific optimizations, learning error patterns, and many more. I will also describe the resources and infrastructure available for starting research in quantum computing and for tackling these challenges.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{DeLozier:2018:SSO, author = "Christian DeLozier and Ariel Eizenberg and Brandon Lucia and Joseph Devietti", title = "{SOFRITAS}: Serializable Ordering-Free Regions for Increasing Thread Atomicity Scalably", journal = j-SIGPLAN, volume = "53", number = "2", pages = "286--300", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173192", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Correctly synchronizing multithreaded programs is challenging and errors can lead to program failures such as atomicity violations. Existing strong memory consistency models rule out some possible failures, but are limited by depending on programmer-defined locking code. We present the new Ordering-Free Region (OFR) serializability consistency model that ensures atomicity for OFRs, which are spans of dynamic instructions between consecutive ordering constructs (e.g., barriers), without breaking atomicity at lock operations. Our platform, Serializable Ordering-Free Regions for Increasing Thread Atomicity Scalably (SOFRITAS), ensures a C/C++ program's execution is equivalent to a serialization of OFRs by default. We build two systems that realize the SOFRITAS idea: a concurrency bug finding tool for testing called SOFRITEST, and a production runtime system called SOPRO. SOFRITEST uses OFRs to find concurrency bugs, including a multi-critical-section atomicity violation in memcached that weaker consistency models will miss. If OFR's are too coarse-grained, SOFRITEST suggests refinement annotations automatically. Our software-only SOPRO implementation has high performance, scales well with increased parallelism, and prevents failures despite bugs in locking code. SOFRITAS has an average overhead of just 1.59x on a single-threaded execution and 1.51x on sixteen threads, despite pthreads' much weaker memory model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Markuze:2018:DOF, author = "Alex Markuze and Igor Smolyar and Adam Morrison and Dan Tsafrir", title = "{DAMN}: Overhead-Free {IOMMU} Protection for Networking", journal = j-SIGPLAN, volume = "53", number = "2", pages = "301--315", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173175", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "DMA operations can access memory buffers only if they are ``mapped'' in the IOMMU, so operating systems protect themselves against malicious/errant network DMAs by mapping and unmapping each packet immediately before/after it is DMAed. This approach was recently found to be riskier and less performant than keeping packets non-DMAable and instead copying their content to/from permanently-mapped buffers. Still, the extra copy hampers performance of multi-gigabit networking. We observe that achieving protection at the DMA (un)map boundary is needlessly constraining, as devices must be prevented from changing the data only after the kernel reads it. So there is no real need to switch ownership of buffers between kernel and device at the DMA (un)mapping layer, as opposed to the approach taken by all existing IOMMU protection schemes. We thus eliminate the extra copy by (1)~implementing a new allocator called DMA-Aware Malloc for Networking (DAMN), which (de)allocates packet buffers from a memory pool permanently mapped in the IOMMU; (2)~modifying the network stack to use this allocator; and (3)~copying packet data only when the kernel needs it, which usually morphs the aforementioned extra copy into the kernel's standard copy operation performed at the user-kernel boundary. DAMN thus provides full IOMMU protection with performance comparable to that of an unprotected system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Boroumand:2018:GWC, author = "Amirali Boroumand and Saugata Ghose and Youngsok Kim and Rachata Ausavarungnirun and Eric Shiu and Rahul Thakur and Daehyun Kim and Aki Kuusela and Allan Knies and Parthasarathy Ranganathan and Onur Mutlu", title = "{Google} Workloads for Consumer Devices: Mitigating Data Movement Bottlenecks", journal = j-SIGPLAN, volume = "53", number = "2", pages = "316--331", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173177", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We are experiencing an explosive growth in the number of consumer devices, including smartphones, tablets, web-based computers such as Chromebooks, and wearable devices. For this class of devices, energy efficiency is a first-class concern due to the limited battery capacity and thermal power budget. We find that data movement is a major contributor to the total system energy and execution time in consumer devices. The energy and performance costs of moving data between the memory system and the compute units are significantly higher than the costs of computation. As a result, addressing data movement is crucial for consumer devices. In this work, we comprehensively analyze the energy and performance impact of data movement for several widely-used Google consumer workloads: (1) the Chrome web browser; (2) TensorFlow Mobile, Google's machine learning framework; (3) video playback, and (4) video capture, both of which are used in many video services such as YouTube and Google Hangouts. We find that processing-in-memory (PIM) can significantly reduce data movement for all of these workloads, by performing part of the computation close to memory. Each workload contains simple primitives and functions that contribute to a significant amount of the overall data movement. We investigate whether these primitives and functions are feasible to implement using PIM, given the limited area and power constraints of consumer devices. Our analysis shows that offloading these primitives to PIM logic, consisting of either simple cores or specialized accelerators, eliminates a large amount of data movement, and significantly reduces total system energy (by an average of 55.4\% across the workloads) and execution time (by an average of 54.2\%).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Wen:2018:WSI, author = "Shasha Wen and Xu Liu and John Byrne and Milind Chabbi", title = "Watching for Software Inefficiencies with {Witch}", journal = j-SIGPLAN, volume = "53", number = "2", pages = "332--347", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177159", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Inefficiencies abound in complex, layered software. A variety of inefficiencies show up as wasteful memory operations. Many existing tools instrument every load and store instruction to monitor memory, which significantly slows execution and consumes enormously extra memory. Our lightweight framework, Witch, samples consecutive accesses to the same memory location by exploiting two ubiquitous hardware features: the performance monitoring units (PMU) and debug registers. Witch performs no instrumentation. Hence, witchcraft---tools built atop Witch---can detect a variety of software inefficiencies while introducing negligible slowdown and insignificant memory consumption and yet maintaining accuracy comparable to exhaustive instrumentation tools. Witch allowed us to scale our analysis to a large number of code bases. Guided by witchcraft, we detected several performance problems in important code bases; eliminating these inefficiencies resulted in significant speedups.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Devecsery:2018:OHA, author = "David Devecsery and Peter M. Chen and Jason Flinn and Satish Narayanasamy", title = "Optimistic Hybrid Analysis: Accelerating Dynamic Analysis through Predicated Static Analysis", journal = j-SIGPLAN, volume = "53", number = "2", pages = "348--362", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177153", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic analysis tools, such as those that detect data-races, verify memory safety, and identify information flow, have become a vital part of testing and debugging complex software systems. While these tools are powerful, their slow speed often limits how effectively they can be deployed in practice. Hybrid analysis speeds up these tools by using static analysis to decrease the work performed during dynamic analysis. In this paper we argue that current hybrid analysis is needlessly hampered by an incorrect assumption that preserving the soundness of dynamic analysis requires an underlying sound static analysis. We observe that, even with unsound static analysis, it is possible to achieve sound dynamic analysis for the executions which fall within the set of states statically considered. This leads us to a new approach, called optimistic hybrid analysis. We first profile a small set of executions and generate a set of likely invariants that hold true during most, but not necessarily all, executions. Next, we apply a much more precise, but unsound, static analysis that assumes these invariants hold true. Finally, we run the resulting dynamic analysis speculatively while verifying whether the assumed invariants hold true during that particular execution; if not, the program is reexecuted with a traditional hybrid analysis. Optimistic hybrid analysis is as precise and sound as traditional dynamic analysis, but is typically much faster because (1) unsound static analysis can speed up dynamic analysis much more than sound static analysis can and (2) verifications rarely fail. We apply optimistic hybrid analysis to race detection and program slicing and achieve 1.8x over a state-of-the-art race detector (FastTrack) optimized with traditional hybrid analysis and 8.3x over a hybrid backward slicer (Giri).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Katz:2018:SRC, author = "Omer Katz and Noam Rinetzky and Eran Yahav", title = "Statistical Reconstruction of Class Hierarchies in Binaries", journal = j-SIGPLAN, volume = "53", number = "2", pages = "363--376", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173202", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address a fundamental problem in reverse engineering of object-oriented code: the reconstruction of a program's class hierarchy from its stripped binary. Existing approaches rely heavily on structural information that is not always available, e.g., calls to parent constructors. As a result, these approaches often leave gaps in the hierarchies they construct, or fail to construct them altogether. Our main insight is that behavioral information can be used to infer subclass/superclass relations, supplementing any missing structural information. Thus, we propose the first statistical approach for static reconstruction of class hierarchies based on behavioral similarity. We capture the behavior of each type using a statistical language model (SLM), define a metric for pairwise similarity between types based on the Kullback--Leibler divergence between their SLMs, and lift it to determine the most likely class hierarchy. We implemented our approach in a tool called ROCK and used it to automatically reconstruct the class hierarchies of several real-world stripped C++ binaries. Our results demonstrate that ROCK obtained significantly more accurate class hierarchies than those obtained using structural analysis alone.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Rigger:2018:STA, author = "Manuel Rigger and Roland Schatz and Ren{\'e} Mayrhofer and Matthias Grimmer and Hanspeter M{\"o}ssenb{\"o}ck", title = "{Sulong}, and Thanks for All the Bugs: Finding Errors in {C} Programs by Abstracting from the Native Execution Model", journal = j-SIGPLAN, volume = "53", number = "2", pages = "377--391", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173174", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In C, memory errors, such as buffer overflows, are among the most dangerous software errors; as we show, they are still on the rise. Current dynamic bug-finding tools that try to detect such errors are based on the low-level execution model of the underlying machine. They insert additional checks in an ad-hoc fashion, which makes them prone to omitting checks for corner cases. To address this, we devised a novel approach to finding bugs during the execution of a program. At the core of this approach is an interpreter written in a high-level language that performs automatic checks (such as bounds, NULL, and type checks). By mapping data structures in C to those of the high-level language, accesses are automatically checked and bugs discovered. We have implemented this approach and show that our tool (called Safe Sulong) can find bugs that state-of-the-art tools overlook, such as out-of-bounds accesses to the main function arguments.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{David:2018:FPS, author = "Yaniv David and Nimrod Partush and Eran Yahav", title = "{FirmUp}: Precise Static Detection of Common Vulnerabilities in Firmware", journal = j-SIGPLAN, volume = "53", number = "2", pages = "392--404", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177157", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a static, precise, and scalable technique for finding CVEs (Common Vulnerabilities and Exposures) in stripped firmware images. Our technique is able to efficiently find vulnerabilities in real-world firmware with high accuracy. Given a vulnerable procedure in an executable binary and a firmware image containing multiple stripped binaries, our goal is to detect possible occurrences of the vulnerable procedure in the firmware image. Due to the variety of architectures and unique tool chains used by vendors, as well as the highly customized nature of firmware, identifying procedures in stripped firmware is extremely challenging. Vulnerability detection requires not only pairwise similarity between procedures but also information about the relationships between procedures in the surrounding executable. This observation serves as the foundation for a novel technique that establishes a partial correspondence between procedures in the two binaries. We implemented our technique in a tool called FirmUp and performed an extensive evaluation over 40 million procedures, over 4 different prevalent architectures, crawled from public vendor firmware images. We discovered 373 vulnerabilities affecting publicly available firmware, 147 of them in the latest available firmware version for the device. A thorough comparison of FirmUp to previous methods shows that it accurately and effectively finds vulnerabilities in firmware, while outperforming the detection rate of the state of the art by 45\% on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Alglave:2018:FSC, author = "Jade Alglave and Luc Maranget and Paul E. McKenney and Andrea Parri and Alan Stern", title = "Frightening Small Children and Disconcerting Grown-ups: Concurrency in the {Linux} Kernel", journal = j-SIGPLAN, volume = "53", number = "2", pages = "405--418", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177156", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Concurrency in the Linux kernel can be a contentious topic. The Linux kernel mailing list features numerous discussions related to consistency models, including those of the more than 30 CPU architectures supported by the kernel and that of the kernel itself. How are Linux programs supposed to behave? Do they behave correctly on exotic hardware? A formal model can help address such questions. Better yet, an executable model allows programmers to experiment with the model to develop their intuition. Thus we offer a model written in the cat language, making it not only formal, but also executable by the herd simulator. We tested our model against hardware and refined it in consultation with maintainers. Finally, we formalised the fundamental law of the Read-Copy-Update synchronisation mechanism, and proved that one of its implementations satisfies this law.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Liu:2018:FAD, author = "Haopeng Liu and Xu Wang and Guangpu Li and Shan Lu and Feng Ye and Chen Tian", title = "{FCatch}: Automatically Detecting Time-of-fault Bugs in Cloud Systems", journal = j-SIGPLAN, volume = "53", number = "2", pages = "419--431", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177161", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "It is crucial for distributed systems to achieve high availability. Unfortunately, this is challenging given the common component failures (i.e., faults). Developers often cannot anticipate all the timing conditions and system states under which a fault might occur, and introduce time-of-fault (TOF) bugs that only manifest when a node crashes or a message drops at a special moment. Although challenging, detecting TOF bugs is fundamental to developing highly available distributed systems. Unlike previous work that relies on fault injection to expose TOF bugs, this paper carefully models TOF bugs as a new type of concurrency bugs, and develops FCatch to automatically predict TOF bugs by observing correct execution. Evaluation on representative cloud systems shows that FCatch is effective, accurately finding severe TOF bugs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Deiana:2018:UPN, author = "Enrico A. Deiana and Vincent St-Amour and Peter A. Dinda and Nikos Hardavellas and Simone Campanoni", title = "Unconventional Parallelization of Nondeterministic Applications", journal = j-SIGPLAN, volume = "53", number = "2", pages = "432--447", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173181", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The demand for thread-level-parallelism (TLP) on commodity processors is endless as it is essential for gaining performance and saving energy. However, TLP in today's programs is limited by dependences that must be satisfied at run time. We have found that for nondeterministic programs, some of these actual dependences can be satisfied with alternative data that can be generated in parallel, thus boosting the program's TLP. Satisfying these dependences with alternative data nonetheless produces final outputs that match those of the original nondeterministic program. To demonstrate the practicality of our technique, we describe the design, implementation, and evaluation of our compilers, autotuner, profiler, and runtime, which are enabled by our proposed C++ programming language extensions. The resulting system boosts the performance of six well-known nondeterministic and multi-threaded benchmarks by 158.2\% (geometric mean) on a 28-core Intel-based platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Ji:2018:BGB, author = "Yu Ji and Youhui Zhang and Wenguang Chen and Yuan Xie", title = "Bridge the Gap between Neural Networks and Neuromorphic Hardware with a Neural Network Compiler", journal = j-SIGPLAN, volume = "53", number = "2", pages = "448--460", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173205", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Different from developing neural networks (NNs) for general-purpose processors, the development for NN chips usually faces with some hardware-specific restrictions, such as limited precision of network signals and parameters, constrained computation scale, and limited types of non-linear functions. This paper proposes a general methodology to address the challenges. We decouple the NN applications from the target hardware by introducing a compiler that can transform an existing trained, unrestricted NN into an equivalent network that meets the given hardware's constraints. We propose multiple techniques to make the transformation adaptable to different kinds of NN chips, and reliable for restrict hardware constraints. We have built such a software tool that supports both spiking neural networks (SNNs) and traditional artificial neural networks (ANNs). We have demonstrated its effectiveness with a fabricated neuromorphic chip and a processing-in-memory (PIM) design. Tests show that the inference error caused by this solution is insignificant and the transformation time is much shorter than the retraining time. Also, we have studied the parameter-sensitivity evaluations to explore the tradeoffs between network error and resource utilization for different transformation strategies, which could provide insights for co-design optimization of neuromorphic hardware and software.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Kwon:2018:MEF, author = "Hyoukjun Kwon and Ananda Samajdar and Tushar Krishna", title = "{MAERI}: Enabling Flexible Dataflow Mapping over {DNN} Accelerators via Reconfigurable Interconnects", journal = j-SIGPLAN, volume = "53", number = "2", pages = "461--475", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173176", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deep neural networks (DNN) have demonstrated highly promising results across computer vision and speech recognition, and are becoming foundational for ubiquitous AI. The computational complexity of these algorithms and a need for high energy-efficiency has led to a surge in research on hardware accelerators. \% for this paradigm. To reduce the latency and energy costs of accessing DRAM, most DNN accelerators are spatial in nature, with hundreds of processing elements (PE) operating in parallel and communicating with each other directly. DNNs are evolving at a rapid rate, and it is common to have convolution, recurrent, pooling, and fully-connected layers with varying input and filter sizes in the most recent topologies.They may be dense or sparse. They can also be partitioned in myriad ways (within and across layers) to exploit data reuse (weights and intermediate outputs). All of the above can lead to different dataflow patterns within the accelerator substrate. Unfortunately, most DNN accelerators support only fixed dataflow patterns internally as they perform a careful co-design of the PEs and the network-on-chip (NoC). In fact, the majority of them are only optimized for traffic within a convolutional layer. This makes it challenging to map arbitrary dataflows on the fabric efficiently, and can lead to underutilization of the available compute resources. DNN accelerators need to be programmable to enable mass deployment. For them to be programmable, they need to be configurable internally to support the various dataflow patterns that could be mapped over them. To address this need, we present MAERI, which is a DNN accelerator built with a set of modular and configurable building blocks that can easily support myriad DNN partitions and mappings by appropriately configuring tiny switches. MAERI provides 8-459\% better utilization across multiple dataflow mappings over baselines with rigid NoC fabrics.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Cai:2018:VHA, author = "Ruizhe Cai and Ao Ren and Ning Liu and Caiwen Ding and Luhao Wang and Xuehai Qian and Massoud Pedram and Yanzhi Wang", title = "{VIBNN}: Hardware Acceleration of {Bayesian} Neural Networks", journal = j-SIGPLAN, volume = "53", number = "2", pages = "476--488", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173212", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Bayesian Neural Networks (BNNs) have been proposed to address the problem of model uncertainty in training and inference. By introducing weights associated with conditioned probability distributions, BNNs are capable of resolving the overfitting issue commonly seen in conventional neural networks and allow for small-data training, through the variational inference process. Frequent usage of Gaussian random variables in this process requires a properly optimized Gaussian Random Number Generator (GRNG). The high hardware cost of conventional GRNG makes the hardware implementation of BNNs challenging. In this paper, we propose VIBNN, an FPGA-based hardware accelerator design for variational inference on BNNs. We explore the design space for massive amount of Gaussian variable sampling tasks in BNNs. Specifically, we introduce two high performance Gaussian (pseudo) random number generators: (1) the RAM-based Linear Feedback Gaussian Random Number Generator (RLF-GRNG), which is inspired by the properties of binomial distribution and linear feedback logics; and (2) the Bayesian Neural Network-oriented Wallace Gaussian Random Number Generator. To achieve high scalability and efficient memory access, we propose a deep pipelined accelerator architecture with fast execution and good hardware utilization. Experimental results demonstrate that the proposed VIBNN implementations on an FPGA can achieve throughput of 321,543.4 Images/s and energy efficiency upto 52,694.8 Images/J while maintaining similar accuracy as its software counterpart.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Sadrosadati:2018:LEH, author = "Mohammad Sadrosadati and Amirhossein Mirhosseini and Seyed Borna Ehsani and Hamid Sarbazi-Azad and Mario Drumond and Babak Falsafi and Rachata Ausavarungnirun and Onur Mutlu", title = "{LTRF}: Enabling High-Capacity Register Files for {GPUs} via Hardware\slash Software Cooperative Register Prefetching", journal = j-SIGPLAN, volume = "53", number = "2", pages = "489--502", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173211", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics Processing Units (GPUs) employ large register files to accommodate all active threads and accelerate context switching. Unfortunately, register files are a scalability bottleneck for future GPUs due to long access latency, high power consumption, and large silicon area provisioning. Prior work proposes hierarchical register file, to reduce the register file power consumption by caching registers in a smaller register file cache. Unfortunately, this approach does not improve register access latency due to the low hit rate in the register file cache. In this paper, we propose the Latency-Tolerant Register File (LTRF) architecture to achieve low latency in a two-level hierarchical structure while keeping power consumption low. We observe that compile-time interval analysis enables us to divide GPU program execution into intervals with an accurate estimate of a warp's aggregate register working-set within each interval. The key idea of LTRF is to prefetch the estimated register working-set from the main register file to the register file cache under software control, at the beginning of each interval, and overlap the prefetch latency with the execution of other warps. Our experimental results show that LTRF enables high-capacity yet long-latency main GPU register files, paving the way for various optimizations. As an example optimization, we implement the main register file with emerging high-density high-latency memory technologies, enabling 8X larger capacity and improving overall GPU performance by 31\% while reducing register file power consumption by 46\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Ausavarungnirun:2018:MRG, author = "Rachata Ausavarungnirun and Vance Miller and Joshua Landgraf and Saugata Ghose and Jayneel Gandhi and Adwait Jog and Christopher J. Rossbach and Onur Mutlu", title = "{MASK}: Redesigning the {GPU} Memory Hierarchy to Support Multi-Application Concurrency", journal = j-SIGPLAN, volume = "53", number = "2", pages = "503--518", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173169", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics Processing Units (GPUs) exploit large amounts of threadlevel parallelism to provide high instruction throughput and to efficiently hide long-latency stalls. The resulting high throughput, along with continued programmability improvements, have made GPUs an essential computational resource in many domains. Applications from different domains can have vastly different compute and memory demands on the GPU. In a large-scale computing environment, to efficiently accommodate such wide-ranging demands without leaving GPU resources underutilized, multiple applications can share a single GPU, akin to how multiple applications execute concurrently on a CPU. Multi-application concurrency requires several support mechanisms in both hardware and software. One such key mechanism is virtual memory, which manages and protects the address space of each application. However, modern GPUs lack the extensive support for multi-application concurrency available in CPUs, and as a result suffer from high performance overheads when shared by multiple applications, as we demonstrate. We perform a detailed analysis of which multi-application concurrency support limitations hurt GPU performance the most. We find that the poor performance is largely a result of the virtual memory mechanisms employed in modern GPUs. In particular, poor address translation performance is a key obstacle to efficient GPU sharing. State-of-the-art address translation mechanisms, which were designed for single-application execution, experience significant inter-application interference when multiple applications spatially share the GPU. This contention leads to frequent misses in the shared translation lookaside buffer (TLB), where a single miss can induce long-latency stalls for hundreds of threads. As a result, the GPU often cannot schedule enough threads to successfully hide the stalls, which diminishes system throughput and becomes a first-order performance concern. Based on our analysis, we propose MASK, a new GPU framework that provides low-overhead virtual memory support for the concurrent execution of multiple applications. MASK consists of three novel address-translation-aware cache and memory management mechanisms that work together to largely reduce the overhead of address translation: (1) a token-based technique to reduce TLB contention, (2) a bypassing mechanism to improve the effectiveness of cached address translations, and (3) an application-aware memory scheduling scheme to reduce the interference between address translation and data requests. Our evaluations show that MASK restores much of the throughput lost to TLB contention. Relative to a state-of-the-art GPU TLB, MASK improves system throughput by 57.8\%, improves IPC throughput by 43.4\%, and reduces application-level unfairness by 22.4\%. MASK's system throughput is within 23.2\% of an ideal GPU system with no address translation overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Yao:2018:SSG, author = "Zhihao Yao and Zongheng Ma and Yingtong Liu and Ardalan Amiri Sani and Aparna Chandramowlishwaran", title = "{Sugar}: Secure {GPU} Acceleration in {Web} Browsers", journal = j-SIGPLAN, volume = "53", number = "2", pages = "519--534", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173186", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Modern personal computers have embraced increasingly powerful Graphics Processing Units (GPUs). Recently, GPU-based graphics acceleration in web apps (i.e., applications running inside a web browser) has become popular. WebGL is the main effort to provide OpenGL-like graphics for web apps and it is currently used in 53\% of the top-100 websites. Unfortunately, WebGL has posed serious security concerns as several attack vectors have been demonstrated through WebGL. Web browsers' solutions to these attacks have been reactive: discovered vulnerabilities have been patched and new runtime security checks have been added. Unfortunately, this approach leaves the system vulnerable to zero-day vulnerability exploits, especially given the large size of the Trusted Computing Base of the graphics plane. We present Sugar, a novel operating system solution that enhances the security of GPU acceleration for web apps by design. The key idea behind Sugar is using a dedicated virtual graphics plane for a web app by leveraging modern GPU virtualization solutions. A virtual graphics plane consists of a dedicated virtual GPU (or vGPU) as well as all the software graphics stack (including the device driver). Sugar enhances the system security since a virtual graphics plane is fully isolated from the rest of the system. Despite GPU virtualization overhead, we show that Sugar achieves high performance. Moreover, unlike current systems, Sugar is able to use two underlying physical GPUs, when available, to co-render the User Interface (UI): one GPU is used to provide virtual graphics planes for web apps and the other to provide the primary graphics plane for the rest of the system. Such a design not only provides strong security guarantees, it also provides enhanced performance isolation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Hsu:2018:SRP, author = "Chang-Hong Hsu and Qingyuan Deng and Jason Mars and Lingjia Tang", title = "{SmoothOperator}: Reducing Power Fragmentation and Improving Power Utilization in Large-scale Datacenters", journal = j-SIGPLAN, volume = "53", number = "2", pages = "535--548", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173190", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the ever growing popularity of cloud computing and web services, Internet companies are in need of increased computing capacity to serve the demand. However, power has become a major limiting factor prohibiting the growth in industry: it is often the case that no more servers can be added to datacenters without surpassing the capacity of the existing power infrastructure. In this work, we first investigate the power utilization in Facebook datacenters. We observe that the combination of provisioning for peak power usage, highly fluctuating traffic, and multi-level power delivery infrastructure leads to significant power budget fragmentation problem and inefficiently low power utilization. To address this issue, our insight is that heterogeneity of power consumption patterns among different services provides opportunities to re-shape the power profile of each power node by re-distributing services. By grouping services with asynchronous peak times under the same power node, we can reduce the peak power of each node and thus creating more power head-rooms to allow more servers hosted, achieving higher throughput. Based on this insight, we develop a workload-aware service placement framework to systematically spread the service instances with synchronous power patterns evenly under the power supply tree, greatly reducing the peak power draw at power nodes. We then leverage dynamic power profile reshaping to maximally utilize the headroom unlocked by our placement framework. Our experiments based on real production workload and power traces show that we are able to host up to 13\% more machines in production, without changing the underlying power infrastructure. Utilizing the unleashed power headroom with dynamic reshaping, we achieve up to an estimated total of 15\% and 11\% throughput improvement for latency-critical service and batch service respectively at the same time, with up to 44\% of energy slack reduction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Lee:2018:WPE, author = "Jaewon Lee and Changkyu Kim and Kun Lin and Liqun Cheng and Rama Govindaraju and Jangwoo Kim", title = "{WSMeter}: a Performance Evaluation Methodology for {Google}'s Production Warehouse-Scale Computers", journal = j-SIGPLAN, volume = "53", number = "2", pages = "549--563", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173196", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Evaluating the comprehensive performance of a warehouse-scale computer (WSC) has been a long-standing challenge. Traditional load-testing benchmarks become ineffective because they cannot accurately reproduce the behavior of thousands of distinct jobs co-located on a WSC. We therefore evaluate WSCs using actual job behaviors in live production environments. From our experience of developing multiple generations of WSCs, we identify two major challenges of this approach: (1) the lack of a holistic metric that incorporates thousands of jobs and summarizes the performance, and (2) the high costs and risks of conducting an evaluation in a live environment. To address these challenges, we propose WSMeter, a cost-effective methodology to accurately evaluate a WSC's performance using a live production environment. We first define a new metric which accurately represents a WSC's overall performance, taking a wide variety of unevenly distributed jobs into account. We then propose a model to statistically embrace the performance variance inherent in WSCs, to conduct an evaluation with minimal costs and risks. We present three real-world use cases to prove the effectiveness of WSMeter. In the first two cases, WSMeter accurately discerns 7\% and 1\% performance improvements from WSC upgrades using only 0.9\% and 6.6\% of the machines in the WSCs, respectively. We emphasize that naive statistical comparisons incur much higher evaluation costs ($ < 4 $ times) and sometimes even fail to distinguish subtle differences. The third case shows that a cloud customer hosting two services on our WSC quantifies the performance benefits of software optimization (+9.3\%) with minimal overheads (2.3\% of the service capacity).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Yu:2018:DAH, author = "Zhibin Yu and Zhendong Bei and Xuehai Qian", title = "Datasize-Aware High Dimensional Configurations Auto-Tuning of In-Memory Cluster Computing", journal = j-SIGPLAN, volume = "53", number = "2", pages = "564--577", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173187", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In-Memory cluster Computing (IMC) frameworks (e.g., Spark) have become increasingly important because they typically achieve more than 10$ \times $ speedups over the traditional On-Disk cluster Computing (ODC) frameworks for iterative and interactive applications. Like ODC, IMC frameworks typically run the same given programs repeatedly on a given cluster with similar input dataset size each time. It is challenging to build performance model for IMC program because: (1) the performance of IMC programs is more sensitive to the size of input dataset, which is known to be difficult to be incorporated into a performance model due to its complex effects on performance; (2) the number of performance-critical configuration parameters in IMC is much larger than ODC (more than 40 vs. around 10), the high dimensionality requires more sophisticated models to achieve high accuracy. To address this challenge, we propose DAC, a datasize-aware auto-tuning approach to efficiently identify the high dimensional configuration for a given IMC program to achieve optimal performance on a given cluster. DAC is a significant advance over the state-of-the-art because it can take the size of input dataset and 41 configuration parameters as the parameters of the performance model for a given IMC program, --- unprecedented in previous work. It is made possible by two key techniques: (1) Hierarchical Modeling (HM), which combines a number of individual sub-models in a hierarchical manner; (2) Genetic Algorithm (GA) is employed to search the optimal configuration. To evaluate DAC, we use six typical Spark programs, each with five different input dataset sizes. The evaluation results show that DAC improves the performance of six typical Spark programs, each with five different input dataset sizes compared to default configurations by a factor of 30.4x on average and up to 89x. We also report that the geometric mean speedups of DAC over configurations by default, expert, and RFHOC are 15.4x, 2.3x, and 1.5x, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Ainsworth:2018:ETP, author = "Sam Ainsworth and Timothy M. Jones", title = "An Event-Triggered Programmable Prefetcher for Irregular Workloads", journal = j-SIGPLAN, volume = "53", number = "2", pages = "578--592", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173189", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many modern workloads compute on large amounts of data, often with irregular memory accesses. Current architectures perform poorly for these workloads, as existing prefetching techniques cannot capture the memory access patterns; these applications end up heavily memory-bound as a result. Although a number of techniques exist to explicitly configure a prefetcher with traversal patterns, gaining significant speedups, they do not generalise beyond their target data structures. Instead, we propose an event-triggered programmable prefetcher combining the flexibility of a general-purpose computational unit with an event-based programming model, along with compiler techniques to automatically generate events from the original source code with annotations. This allows more complex fetching decisions to be made, without needing to stall when intermediate results are required. Using our programmable prefetching system, combined with small prefetch kernels extracted from applications, we achieve an average 3.0x speedup in simulation for a variety of graph, database and HPC workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Zhang:2018:MLO, author = "Dan Zhang and Xiaoyu Ma and Michael Thomson and Derek Chiou", title = "{Minnow}: Lightweight Offload Engines for Worklist Management and Worklist-Directed Prefetching", journal = j-SIGPLAN, volume = "53", number = "2", pages = "593--607", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173197", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The importance of irregular applications such as graph analytics is rapidly growing with the rise of Big Data. However, parallel graph workloads tend to perform poorly on general-purpose chip multiprocessors (CMPs) due to poor cache locality, low compute intensity, frequent synchronization, uneven task sizes, and dynamic task generation. At high thread counts, execution time is dominated by worklist synchronization overhead and cache misses. Researchers have proposed hardware worklist accelerators to address scheduling costs, but these proposals often harden a specific scheduling policy and do not address high cache miss rates. We address this with Minnow, a technique that augments each core in a CMP with a lightweight Minnow accelerator. Minnow engines offload worklist scheduling from worker threads to improve scalability. The engines also perform worklist-directed prefetching, a technique that exploits knowledge of upcoming tasks to issue nearly perfectly accurate and timely prefetch operations. On a simulated 64-core CMP running a parallel graph benchmark suite, Minnow improves scalability and reduces L2 cache misses from 29 to 1.2 MPKI on average, resulting in 6.01x average speedup over an optimized software baseline for only 1\% area overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Zhang:2018:WNA, author = "Mingxing Zhang and Yongwei Wu and Youwei Zhuo and Xuehai Qian and Chengying Huan and Kang Chen", title = "{Wonderland}: a Novel Abstraction-Based Out-Of-Core Graph Processing System", journal = j-SIGPLAN, volume = "53", number = "2", pages = "608--621", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173208", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many important graph applications are iterative algorithms that repeatedly process the input graph until convergence. For such algorithms, graph abstraction is an important technique: although much smaller than the original graph, it can bootstrap an initial result that can significantly accelerate the final convergence speed, leading to a better overall performance. However, existing graph abstraction techniques typically assume either fully in-memory or distributed environment, which leads to many obstacles preventing the application to an out-of-core graph processing system. In this paper, we propose Wonderland, a novel out-of-core graph processing system based on abstraction. Wonderland has three unique features: (1) A simple method applicable to out-of-core systems allowing users to extract effective abstractions from the original graph with acceptable cost and a specific memory limit; (2) Abstraction-enabled information propagation, where an abstraction can be used as a bridge over the disjoint on-disk graph partitions; (3) Abstraction guided priority scheduling, where an abstraction can infer the better priority-based order in processing on-disk graph partitions. Wonderland is a significant advance over the state-of-the-art because it not only makes graph abstraction feasible to out-of-core systems, but also broadens the applications of the concept in important ways. Evaluation results of Wonderland reveal that Wonderland achieves a drastic speedup over the other state-of-the-art systems, up to two orders of magnitude for certain cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Sabet:2018:TTI, author = "Amir Hossein Nodehi Sabet and Junqiao Qiu and Zhijia Zhao", title = "{Tigr}: Transforming Irregular Graphs for {GPU}-Friendly Graph Processing", journal = j-SIGPLAN, volume = "53", number = "2", pages = "622--636", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173180", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graph analytics delivers deep knowledge by processing large volumes of highly connected data. In real-world graphs, the degree distribution tends to follow the power law --- a small portion of nodes own a large number of neighbors. The high irregularity of degree distribution acts as a major barrier to their efficient processing on GPU architectures, which are primarily designed for accelerating computations on regular data with SIMD executions. Existing solutions to the inefficiency of GPU-based graph analytics either modify the graph programming abstraction or rely on changes to the low-level thread execution models. The former requires more programming efforts for designing and maintaining graph analytics; while the latter couples with the underlying architectures, making it difficult to adapt as architectures quickly evolve. Unlike prior efforts, this work proposes to address the above fundamental problem at its origin --- the irregular graph data itself. It raises a critical question in irregular graph processing: Is it possible to transform irregular graphs into more regular ones such that the graphs can be processed more efficiently on GPU-like architectures, yet still producing the same results? Inspired by the question, this work introduces Tigr --- a graph transformation framework that can effectively reduce the irregularity of real-world graphs with correctness guarantees for a wide range of graph analytics. To make the transformations practical, Tigr features a lightweight virtual transformation scheme, which can substantially reduce the costs of graph transformations, while preserving the benefits of reduced irregularity. Evaluation on Tigr-based GPU graph processing shows significant and consistent speedup over the state-of-the-art GPU graph processing frameworks for a spectrum of irregular graphs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Haria:2018:DMH, author = "Swapnil Haria and Mark D. Hill and Michael M. Swift", title = "Devirtualizing Memory in Heterogeneous Systems", journal = j-SIGPLAN, volume = "53", number = "2", pages = "637--650", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Accelerators are increasingly recognized as one of the major drivers of future computational growth. For accelerators, shared virtual memory (VM) promises to simplify programming and provide safe data sharing with CPUs. Unfortunately, the overheads of virtual memory, which are high for general-purpose processors, are even higher for accelerators. Providing accelerators with direct access to physical memory (PM) in contrast, provides high performance but is both unsafe and more difficult to program. We propose Devirtualized Memory (DVM) to combine the protection of VM with direct access to PM. By allocating memory such that physical and virtual addresses are almost always identical (VA==PA), DVM mostly replaces page-level address translation with faster region-level Devirtualized Access Validation (DAV). Optionally on read accesses, DAV can be overlapped with data fetch to hide VM overheads. DVM requires modest OS and IOMMU changes, and is transparent to the application. Implemented in Linux 4.10, DVM reduces VM overheads in a graph-processing accelerator to just 1.6\% on average. DVM also improves performance by 2.1X over an optimized conventional VM implementation, while consuming 3.9X less dynamic energy for memory management. We further discuss DVM's potential to extend beyond accelerators to CPUs, where it reduces VM overheads to 5\% on average, down from 29\% for conventional VM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Kumar:2018:LLT, author = "Mohan Kumar Kumar and Steffen Maass and Sanidhya Kashyap and J{\'a}n Vesel{\'y} and Zi Yan and Taesoo Kim and Abhishek Bhattacharjee and Tushar Krishna", title = "{LATR}: Lazy Translation Coherence", journal = j-SIGPLAN, volume = "53", number = "2", pages = "651--664", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173198", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose LATR-lazy TLB coherence-a software-based TLB shootdown mechanism that can alleviate the overhead of the synchronous TLB shootdown mechanism in existing operating systems. By handling the TLB coherence in a lazy fashion, LATR can avoid expensive IPIs which are required for delivering a shootdown signal to remote cores, and the performance overhead of associated interrupt handlers. Therefore, virtual memory operations, such as free and page migration operations, can benefit significantly from LATR's mechanism. For example, LATR improves the latency of munmap() by 70.8\% on a 2-socket machine, a widely used configuration in modern data centers. Real-world, performance-critical applications such as web servers can also benefit from LATR: without any application-level changes, LATR improves Apache by 59.9\% compared to Linux, and by 37.9\% compared to ABIS, a highly optimized, state-of-the-art TLB coherence technique.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Taassori:2018:VRP, author = "Meysam Taassori and Ali Shafiee and Rajeev Balasubramonian", title = "{VAULT}: Reducing Paging Overheads in {SGX} with Efficient Integrity Verification Structures", journal = j-SIGPLAN, volume = "53", number = "2", pages = "665--678", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177155", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Intel's SGX offers state-of-the-art security features, including confidentiality, integrity, and authentication (CIA) when accessing sensitive pages in memory. Sensitive pages are placed in an Enclave Page Cache (EPC) within the physical memory before they can be accessed by the processor. To control the overheads imposed by CIA guarantees, the EPC operates with a limited capacity (currently 128 MB). Because of this limited EPC size, sensitive pages must be frequently swapped between EPC and non-EPC regions in memory. A page swap is expensive (about 40K cycles) because it requires an OS system call, page copying, updates to integrity trees and metadata, etc. Our analysis shows that the paging overhead can slow the system on average by 5$ \times $, and other studies have reported even higher slowdowns for memory-intensive workloads. The paging overhead can be reduced by growing the size of the EPC to match the size of physical memory, while allowing the EPC to also accommodate non-sensitive pages. However, at least two important problems must be addressed to enable this growth in EPC: (i) the depth of the integrity tree and its cacheability must be improved to keep memory bandwidth overheads in check, (ii) the space overheads of integrity verification (tree and MACs) must be reduced. We achieve both goals by introducing a variable arity unified tree (VAULT) organization that is more compact and has lower depth. We further reduce the space overheads with techniques that combine MAC sharing and compression. With simulations, we show that the combination of our techniques can address most inefficiencies in SGX memory access and improve overall performance by 3.7$ \times $, relative to an SGX baseline, while incurring a memory capacity over-head of only 4.7\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Panwar:2018:MHP, author = "Ashish Panwar and Aravinda Prasad and K. Gopinath", title = "Making Huge Pages Actually Useful", journal = j-SIGPLAN, volume = "53", number = "2", pages = "679--692", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173203", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The virtual-to-physical address translation overhead, a major performance bottleneck for modern workloads, can be effectively alleviated with huge pages. However, since huge pages must be mapped contiguously, OSs have not been able to use them well because of the memory fragmentation problem despite hardware support for huge pages being available for nearly two decades. This paper presents a comprehensive study of the interaction of fragmentation with huge pages in the Linux kernel. We observe that when huge pages are used, problems such as high CPU utilization and latency spikes occur because of unnecessary work (e.g., useless page migration) performed by memory management related subsystems due to the poor handling of unmovable (i.e., kernel) pages. This behavior is even more harmful in virtualized systems where unnecessary work may be performed in both guest and host OSs. We present Illuminator, an efficient memory manager that provides various subsystems, such as the page allocator, the ability to track all unmovable pages. It allows subsystems to make informed decisions and eliminate unnecessary work which in turn leads to cost-effective huge page allocations. Illuminator reduces the cost of compaction (up to 99\%), improves application performance (up to 2.3x) and reduces the maximum latency of MySQL database server (by 30x). Importantly, this work shows the effectiveness of a simple solution for long-standing huge page related problems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Evtyushkin:2018:BNS, author = "Dmitry Evtyushkin and Ryan Riley and Nael CSE and ECE Abu-Ghazaleh and Dmitry Ponomarev", title = "{BranchScope}: a New Side-Channel Attack on Directional Branch Predictor", journal = j-SIGPLAN, volume = "53", number = "2", pages = "693--707", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173204", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present BranchScope --- a new side-channel attack where the attacker infers the direction of an arbitrary conditional branch instruction in a victim program by manipulating the shared directional branch predictor. The directional component of the branch predictor stores the prediction on a given branch (taken or not-taken) and is a different component from the branch target buffer (BTB) attacked by previous work. BranchScope is the first fine-grained attack on the directional branch predictor, expanding our understanding of the side channel vulnerability of the branch prediction unit. Our attack targets complex hybrid branch predictors with unknown organization. We demonstrate how an attacker can force these predictors to switch to a simple 1-level mode to simplify the direction recovery. We carry out BranchScope on several recent Intel CPUs and also demonstrate the attack against an SGX enclave.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Dickens:2018:SCI, author = "Bernard {Dickens III} and Haryadi S. Gunawi and Ariel J. Feldman and Henry Hoffmann", title = "{StrongBox}: Confidentiality, Integrity, and Performance using Stream Ciphers for Full Drive Encryption", journal = j-SIGPLAN, volume = "53", number = "2", pages = "708--721", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173183", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Full-drive encryption (FDE) is especially important for mobile devices because they contain large quantities of sensitive data yet are easily lost or stolen. Unfortunately, the standard approach to FDE-the AES block cipher in XTS mode-is 3--5$ \times $ slower than unencrypted storage. Authenticated encryption based on stream ciphers is already used as a faster alternative to AES in other contexts, such as HTTPS, but the conventional wisdom is that stream ciphers are unsuitable for FDE. Used naively in drive encryption, stream ciphers are vulnerable to attacks, and mitigating these attacks with on-drive metadata is generally believed to ruin performance. In this paper, we argue that recent developments in mobile hardware invalidate this assumption, making it possible to use fast stream ciphers for FDE. Modern mobile devices employ solid-state storage with Flash Translation Layers (FTL), which operate similarly to Log-structured File Systems (LFS). They also include trusted hardware such as Trusted Execution Environments (TEEs) and secure storage areas. Leveraging these two trends, we propose StrongBox, a stream cipher-based FDE layer that is a drop-in replacement for dm-crypt, the standard Linux FDE module based on AES-XTS. StrongBox introduces a system design and on-drive data structures that exploit LFS's lack of overwrites to avoid costly rekeying and a counter stored in trusted hardware to protect against attacks. We implement StrongBox on an ARM big.LITTLE mobile processor and test its performance under multiple popular production LFSes. We find that StrongBox improves read performance by as much as 2.36$ \times $ (1.72$ \times $ on average) while offering stronger integrity guarantees.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Hunger:2018:DDC, author = "Casen Hunger and Lluis Vilanova and Charalampos Papamanthou and Yoav Etsion and Mohit Tiwari", title = "{DATS} --- Data Containers for {Web} Applications", journal = j-SIGPLAN, volume = "53", number = "2", pages = "722--736", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173213", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Data containers enable users to control access to their data while untrusted applications compute on it. However, they require replicating an application inside each container --- compromising functionality, programmability, and performance. We propose DATS --- a system to run web applications that retains application usability and efficiency through a mix of hardware capability enhanced containers and the introduction of two new primitives modeled after the popular model-view-controller (MVC) pattern. (1) DATS introduces a templating language to create views that compose data across data containers. (2) DATS uses authenticated storage and confinement to enable an untrusted storage service, such as memcached and deduplication, to operate on plain-text data across containers. These two primitives act as robust declassifiers that allow DATS to enforce non-interference across containers, taking large applications out of the trusted computing base (TCB). We showcase eight different web applications including Gitlab and a Slack-like chat, significantly improve the worst-case overheads due to application replication, and demonstrate usable performance for common-case usage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Mallon:2018:DPP, author = "Stephen Mallon and Vincent Gramoli and Guillaume Jourjon", title = "{DLibOS}: Performance and Protection with a Network-on-Chip", journal = j-SIGPLAN, volume = "53", number = "2", pages = "737--750", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173209", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A long body of research work has led to the conjecture that highly efficient IO processing at user-level would necessarily violate protection. In this paper, we debunk this myth by introducing DLibOS a new paradigm that consists of distributing a library OS on specialized cores to achieve performance and protection at the user-level. Its main novelty consists of leveraging network-on-chip to allow hardware message passing, rather than context switches, for communication between different address spaces. To demonstrate the feasibility of our approach, we implement a driver and a network stack at user-level on a Tilera many-core machine. We define a novel asynchronous socket interface and partition the memory such that the reception, the transmission and the application modify isolated regions. Our high performance results of 4.2 and 3.1 million requests per second obtained on a webserver and the Memcached applications, respectively, confirms the relevance of our design decisions. Finally, we compare DLibOS against a non-protected user-level network stack and show that protection comes at a negligible cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Lin:2018:AIA, author = "Shih-Chieh Lin and Yunqi Zhang and Chang-Hong Hsu and Matt Skach and Md E. Haque and Lingjia Tang and Jason Mars", title = "The Architectural Implications of Autonomous Driving: Constraints and Acceleration", journal = j-SIGPLAN, volume = "53", number = "2", pages = "751--766", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173191", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Autonomous driving systems have attracted a significant amount of interest recently, and many industry leaders, such as Google, Uber, Tesla, and Mobileye, have invested a large amount of capital and engineering power on developing such systems. Building autonomous driving systems is particularly challenging due to stringent performance requirements in terms of both making the safe operational decisions and finishing processing at real-time. Despite the recent advancements in technology, such systems are still largely under experimentation and architecting end-to-end autonomous driving systems remains an open research question. To investigate this question, we first present and formalize the design constraints for building an autonomous driving system in terms of performance, predictability, storage, thermal and power. We then build an end-to-end autonomous driving system using state-of-the-art award-winning algorithms to understand the design trade-offs for building such systems. In our real-system characterization, we identify three computational bottlenecks, which conventional multicore CPUs are incapable of processing under the identified design constraints. To meet these constraints, we accelerate these algorithms using three accelerator platforms including GPUs, FPGAs, and ASICs, which can reduce the tail latency of the system by 169x, 10x, and 93x respectively. With accelerator-based designs, we are able to build an end-to-end autonomous driving system that meets all the design constraints, and explore the trade-offs among performance, power and the higher accuracy enabled by higher resolution cameras.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Colin:2018:RES, author = "Alexei Colin and Emily Ruppel and Brandon Lucia", title = "A Reconfigurable Energy Storage Architecture for Energy-harvesting Devices", journal = j-SIGPLAN, volume = "53", number = "2", pages = "767--781", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173210", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Battery-free, energy-harvesting devices operate using energy collected exclusively from their environment. Energy-harvesting devices allow maintenance-free deployment in extreme environments, but requires a power system to provide the right amount of energy when an application needs it. Existing systems must provision energy capacity statically based on an application's peak demand which compromises efficiency and responsiveness when not at peak demand. This work presents Capybara: a co-designed hardware/software power system with dynamically reconfigurable energy storage capacity that meets varied application energy demand. The Capybara software interface allows programmers to specify the energy mode of an application task. Capybara's runtime system reconfigures Capybara's hardware energy capacity to match application demand. Capybara also allows a programmer to write reactive application tasks that pre-allocate a burst of energy that it can spend in response to an asynchronous (e.g., external) event. We instantiated Capybara's hardware design in two EH devices and implemented three reactive sensing applications using its software interface. Capybara improves event detection accuracy by 2x-4x over statically-provisioned energy capacity, maintains response latency within 1.5x of a continuously-powered baseline, and enables reactive applications that are intractable with existing power systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Ma:2018:NNE, author = "Kaisheng Ma and Xueqing Li and Mahmut Taylan Kandemir and Jack Sampson and Vijaykrishnan Narayanan and Jinyang Li and Tongda Wu and Zhibo Wang and Yongpan Liu and Yuan Xie", title = "{NEOFog}: Nonvolatility-Exploiting Optimizations for Fog Computing", journal = j-SIGPLAN, volume = "53", number = "2", pages = "782--796", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3177154", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nonvolatile processors have emerged as one of the promising solutions for energy harvesting scenarios, among which Wireless Sensor Networks (WSN) provide some of the most important applications. In a typical distributed sensing system, due to difference in location, energy harvester angles, power sources, etc. different nodes may have different amount of energy ready for use. While prior approaches have examined these challenges, they have not done so in the context of the features offered by nonvolatile computing approaches, which disrupt certain foundational assumptions. We propose a new set of nonvolatility-exploiting optimizations and embody them in the NEOFog system architecture. We discuss shifts in the tradeoffs in data and program distribution for nonvolatile processing-based WSNs, showing how non-volatile processing and non-volatile RF support alter the benefits of computation and communication-centric approaches. We also propose a new algorithm specific to nonvolatile sensing systems for load balancing both computation and communication demands. Collectively, the NV-aware optimizations in NEOFog increase the ability to perform in-fog processing by 4.2X and can increase this to 8X if virtualized nodes are 3X multiplexed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Lottarini:2018:VBV, author = "Andrea Lottarini and Alex Ramirez and Joel Coburn and Martha A. Kim and Parthasarathy Ranganathan and Daniel Stodolsky and Mark Wachsler", title = "{\tt vbench}: Benchmarking Video Transcoding in the Cloud", journal = j-SIGPLAN, volume = "53", number = "2", pages = "797--809", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173207", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents vbench, a publicly available benchmark for cloud video services. We are the first study, to the best of our knowledge, to characterize the emerging video-as-a-service workload. Unlike prior video processing benchmarks, vbench's videos are algorithmically selected to represent a large commercial corpus of millions of videos. Reflecting the complex infrastructure that processes and hosts these videos, vbench includes carefully constructed metrics and baselines. The combination of validated corpus, baselines, and metrics reveal nuanced tradeoffs between speed, quality, and compression. We demonstrate the importance of video selection with a microarchitectural study of cache, branch, and SIMD behavior. vbench reveals trends from the commercial corpus that are not visible in other video corpuses. Our experiments with GPUs under vbench's scoring scenarios reveal that context is critical: GPUs are well suited for live-streaming, while for video-on-demand shift costs from compute to storage and network. Counterintuitively, they are not viable for popular videos, for which highly compressed, high quality copies are required. We instead find that popular videos are currently well-served by the current trajectory of software encoders.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Blackburn:2018:SDS, author = "Steve Blackburn", title = "Session details: Session 7B: Memory 2", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252965", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Carter:2018:SDS, author = "John Carter", title = "Session details: Session 6B: Datacenters", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252963", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Criswell:2018:SDS, author = "John Criswell", title = "Session details: Session 8A: Security and Protection", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Gandhi:2018:SDS, author = "Jayneel Gandhi", title = "Session details: Session 6A: {GPU} 2", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252962", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Hoffmann:2018:SDS, author = "Hank Hoffmann", title = "Session details: Session 5A: Concurrency and Parallelism", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252960", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Kim:2018:SDS, author = "Martha Kim", title = "Session details: Session 7A: Irregular Apps and Graphs", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252964", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Larus:2018:SDS, author = "James Larus", title = "Session details: Session 2B: Performance Management", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252955", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Lee:2018:SDS, author = "Dongyoon Lee", title = "Session details: Session 3B: Mobile Applications", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252957", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Liu:2018:SDS, author = "Lei Liu", title = "Session details: Session 1B: Managed Runtimes and Dynamic Translation", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252953", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Lu:2018:SDS, author = "Shan Lu", title = "Session details: Session 4B: Program Analysis", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252959", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Narayanasamy:2018:SDS, author = "Satish Narayanasamy", title = "Session details: Session 3A: Programmable Devices and Co-processors", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252956", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Rossbach:2018:SDS, author = "Christopher J. Rossbach", title = "Session details: Session 2A: {GPUs} 1", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252954", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Sampson:2018:SDS, author = "Adrian Sampson?", title = "Session details: Session 5B Neural Networks", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252961", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Solihin:2018:SDS, author = "Yan Solihin", title = "Session details: Session 8B: Potpourri", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252967", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Torrellas:2018:SDS, author = "Josep Torrellas", title = "Session details: Session 1A: New Architectures", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252952", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Tsafrir:2018:SDS, author = "Dan Tsafrir", title = "Session details: Session 4A: Memory 1", journal = j-SIGPLAN, volume = "53", number = "2", pages = "??--??", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3252958", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ASPLOS '18 proceedings.", } @Article{Wang:2018:HSA, author = "Kunshan Wang and Stephen M. Blackburn and Antony L. Hosking and Michael Norrish", title = "Hop, Skip, \& Jump: Practical On-Stack Replacement for a Cross-Platform Language-Neutral {VM}", journal = j-SIGPLAN, volume = "53", number = "3", pages = "1--16", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186412", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "On-stack replacement (OSR) is a performance-critical technology for many languages, especially dynamic languages. Conventional wisdom, apparent in JavaScript engines such as V8 and SpiderMonkey, is that OSR must be implemented in a low-level (i.e., in assembly) and language-specific way. This paper presents an OSR abstraction based on Swapstack, materialized as the API for a low-level virtual machine, and shows how the abstraction of resumption protocols facilitates an elegant implementation of this API on real hardware. Using an experimental JavaScript implementation, we demonstrate that this API enables the language implementation to perform OSR without the need to deal with machine-level details. We also show that the API itself is implementable on concrete hardware. This work helps crystallize OSR abstractions and, by providing a reusable implementation, brings OSR within reach for more language implementers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Wang:2018:IDG, author = "Wenwen Wang and Jiacheng Wu and Xiaoli Gong and Tao Li and Pen-Chung Yew", title = "Improving Dynamically-Generated Code Performance on Dynamic Binary Translators", journal = j-SIGPLAN, volume = "53", number = "3", pages = "17--30", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186413", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The recent transition in the software industry toward dynamically generated code poses a new challenge to existing dynamic binary translation (DBT) systems. A significant re-translation overhead could be introduced due to the maintenance of the consistency between the dynamically-generated guest code and the corresponding translated host code. To address this issue, this paper presents a novel approach to optimize DBT systems for guest applications with dynamically-generated code. The proposed approach can maximize the reuse of previously translated host code to mitigate the re-translation overhead. A prototype based on such an approach has been implemented on an existing DBT system HQEMU. Experimental results on a set of JavaScript applications show that it can achieve a 1.24X performance speedup on average compared to the original HQEMU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Ma:2018:GEG, author = "Jiacheng Ma and Xiao Zheng and Yaozu Dong and Wentai Li and Zhengwei Qi and Bingsheng He and Haibing Guan", title = "{gMig}: Efficient {GPU} Live Migration Optimized by Software Dirty Page for Full Virtualization", journal = j-SIGPLAN, volume = "53", number = "3", pages = "31--44", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186414", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "This paper introduces gMig, an open-source and practical GPU live migration solution for full virtualization. By taking advantage of the dirty pattern of GPU workloads, gMig presents the One-Shot Pre-Copy combined with the hashing based Software Dirty Page technique to achieve efficient GPU live migration. Particularly, we propose three approaches for gMig: (1) Dynamic Graphics Address Remapping, which parses and manipulates GPU commands to adjust the address mapping to adapt to a different environment after migration, (2) Software Dirty Page, which utilizes a hashing based approach to detect page modification, overcomes the commodity GPU's hardware limitation, and speeds up the migration by only sending the dirtied pages, (3) One-Shot Pre-Copy, which greatly reduces the rounds of pre-copy of graphics memory. Our evaluation shows that gMig achieves GPU live migration with an average downtime of 302 ms on Windows and 119 ms on Linux. With the help of Software Dirty Page, the number of GPU pages transferred during the downtime is effectively reduced by 80.0\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Ruprecht:2018:VLM, author = "Adam Ruprecht and Danny Jones and Dmitry Shiraev and Greg Harmon and Maya Spivak and Michael Krebs and Miche Baker-Harvey and Tyler Sanderson", title = "{VM} Live Migration At Scale", journal = j-SIGPLAN, volume = "53", number = "3", pages = "45--56", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186415", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Uninterrupted uptime is a critical aspect of Virtual Machines (VMs) offered by cloud hosting providers. Google's VMs run on top of rapidly changing infrastructure: we regularly update hardware and host software, and we must quickly respond to failing hardware. Frequent change is critical to both development velocity---deploying new versions of services and infrastructure---and the ability to respond rapidly to defects, including critical security fixes. Typically these updates would be disruptive, resulting in VM termination or restart. In this paper we present how we use VM live migration at scale to eliminate this disruption with minimal impact to the guest, performing over 1,000,0001migrations monthly in our production fleet, with 50ms median blackout, 300ms 99th percentile blackout.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Xu:2018:DES, author = "Yu Xu and Jianguo Yao and Yaozu Dong and Kun Tian and Xiao Zheng and Haibing Guan", title = "{Demon}: an Efficient Solution for on-Device {MMU} Virtualization in Mediated Pass-Through", journal = j-SIGPLAN, volume = "53", number = "3", pages = "57--70", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186416", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Memory Management Units (MMUs) for on-device address translation are widely used in modern devices. However, conventional solutions for on-device MMU virtualization, such as shadow page table implemented in mediated pass-through, still suffer from high complexity and low performance. We present Demon, an efficient solution for on-DEvice MMU virtualizatiON in mediated pass-through. The key insight is that Demon takes advantage of IOMMU to construct a two-dimensional address translation and dynamically switches the 2nd-dimensional page table to a proper candidate when the device owner switches. In order to support fine-grained parallelism for the device with multiple engines, we put forward a hardware proposal that separates the address space of each engine and enables simultaneous device address remapping for multiple virtual machines (VMs). We implement Demon with a prototype named gDemon which virtualizes Intel GPU MMU. Nonetheless, Demon is not limited to this particular case. Evaluations show that gDemon provides up to 19.73x better performance in the media transcoding workloads and achieves performance improvement of up to 17.09\% and 13.73\% in the 2D benchmarks and 3D benchmarks, respectively, compared with gVirt. The current release of gDemon scales up to 6 VMs with moderate performance in our experiments. In addition, gDemon simplifies the implementation of GPU MMU virtualization with 37\% code reduction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Yan:2018:FPS, author = "Qiuchen Yan and Stephen McCamant", title = "{Fast PokeEMU}: Scaling Generated Instruction Tests Using Aggregation and State Chaining", journal = j-SIGPLAN, volume = "53", number = "3", pages = "71--83", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186417", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Software that emulates a CPU has many applications, but is difficult to implement correctly and requires extensive testing. Since a large number of test cases are required for full coverage, it is important that the tests execute efficiently. We explore techniques for combining many instruction tests into one program to amortize overheads such as booting an emulator. To ensure the results of each test are reflected in a final result, we use the outputs of one instruction test as an input to the next, and adopt the ``Feistel network'' construction from cryptography so that each step is invertible. We evaluate this approach by applying it to PokeEMU, a tool that generates emulator tests using symbolic execution. The combined tests run much faster, but still reveal most of the same behavior differences as when run individually.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Rigger:2018:AXI, author = "Manuel Rigger and Stefan Marr and Stephen Kell and David Leopoldseder and Hanspeter M{\"o}ssenb{\"o}ck", title = "An Analysis of x86-64 Inline Assembly in {C} Programs", journal = j-SIGPLAN, volume = "53", number = "3", pages = "84--99", month = mar, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296975.3186418", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "C codebases frequently embed nonportable and unstandardized elements such as inline assembly code. Such elements are not well understood, which poses a problem to tool developers who aspire to support C code. This paper investigates the use of x86-64 inline assembly in 1264 C projects from GitHub and combines qualitative and quantitative analyses to answer questions that tool authors may have. We found that 28.1\% of the most popular projects contain inline assembly code, although the majority contain only a few fragments with just one or two instructions. The most popular instructions constitute a small subset concerned largely with multicore semantics, performance optimization, and hardware control. Our findings are intended to help developers of C-focused tools, those testing compilers, and language designers seeking to reduce the reliance on inline assembly. They may also aid the design of tools focused on inline assembly itself.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "VEE '18 proceedings.", } @Article{Panchekha:2018:VWP, author = "Pavel Panchekha and Adam T. Geller and Michael D. Ernst and Zachary Tatlock and Shoaib Kamil", title = "Verifying that web pages have accessible layout", journal = j-SIGPLAN, volume = "53", number = "4", pages = "1--14", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192407", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Usability and accessibility guidelines aim to make graphical user interfaces accessible to all users, by, say, requiring that text is sufficiently large, interactive controls are visible, and heading size corresponds to importance. These guidelines must hold on the infinitely many possible renderings of a web page generated by differing screen sizes, fonts, and other user preferences. Today, these guidelines are tested by manual inspection of a few renderings, because (1) the guidelines are not expressed in a formal language, (2) the semantics of browser rendering are not well understood, and (3) no tools exist to check all possible renderings of a web page. VizAssert solves these problems. First, it introduces visual logic to precisely specify accessibility properties. Second, it formalizes a large fragment of the browser rendering algorithm using novel finitization reductions. Third, it provides a sound, automated tool for verifying assertions in visual logic. We encoded 14 assertions drawn from best-practice accessibility and mobile-usability guidelines in visual logic. VizAssert checked them on on 62 professionally designed web pages. It found 64 distinct errors in the web pages, while reporting only 13 false positive warnings.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Vilk:2018:BAD, author = "John Vilk and Emery D. Berger", title = "{BLeak}: automatically debugging memory leaks in web applications", journal = j-SIGPLAN, volume = "53", number = "4", pages = "15--29", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192376", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the presence of garbage collection in managed languages like JavaScript, memory leaks remain a serious problem. In the context of web applications, these leaks are especially pervasive and difficult to debug. Web application memory leaks can take many forms, including failing to dispose of unneeded event listeners, repeatedly injecting iframes and CSS files, and failing to call cleanup routines in third-party libraries. Leaks degrade responsiveness by increasing GC frequency and overhead, and can even lead to browser tab crashes by exhausting available memory. Because previous leak detection approaches designed for conventional C, C++ or Java applications are ineffective in the browser environment, tracking down leaks currently requires intensive manual effort by web developers. This paper introduces BLeak (Browser Leak debugger), the first system for automatically debugging memory leaks in web applications. BLeak's algorithms leverage the observation that in modern web applications, users often repeatedly return to the same (approximate) visual state (e.g., the inbox view in Gmail). Sustained growth between round trips is a strong indicator of a memory leak. To use BLeak, a developer writes a short script (17-73 LOC on our benchmarks) to drive a web application in round trips to the same visual state. BLeak then automatically generates a list of leaks found along with their root causes, ranked by return on investment. Guided by BLeak, we identify and fix over 50 memory leaks in popular libraries and apps including Airbnb, AngularJS, Google Analytics, Google Maps SDK, and jQuery. BLeak's median precision is 100\%; fixing the leaks it identifies reduces heap growth by an average of 94\%, saving from 0.5 MB to 8 MB per round trip. We believe BLeak's approach to be broadly applicable beyond web applications, including to GUI applications on desktop and mobile platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Baxter:2018:PAS, author = "Samuel Baxter and Rachit Nigam and Joe Gibbs Politz and Shriram Krishnamurthi and Arjun Guha", title = "Putting in all the stops: execution control for {JavaScript}", journal = j-SIGPLAN, volume = "53", number = "4", pages = "30--45", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192370", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scores of compilers produce JavaScript, enabling programmers to use many languages on the Web, reuse existing code, and even use Web IDEs. Unfortunately, most compilers inherit the browser's compromised execution model, so long-running programs freeze the browser tab, infinite loops crash IDEs, and so on. The few compilers that avoid these problems suffer poor performance and are difficult to engineer. This paper presents Stopify, a source-to-source compiler that extends JavaScript with debugging abstractions and blocking operations, and easily integrates with existing compilers. We apply Stopify to ten programming languages and develop a Web IDE that supports stopping, single-stepping, breakpointing, and long-running computations. For nine languages, Stopify requires no or trivial compiler changes. For eight, our IDE is the first that provides these features. Two of our subject languages have compilers with similar features. Stopify's performance is competitive with these compilers and it makes them dramatically simpler. Stopify's abstractions rely on first-class continuations, which it provides by compiling JavaScript to JavaScript. We also identify sub-languages of JavaScript that compilers implicitly use, and exploit these to improve performance. Finally, Stopify needs to repeatedly interrupt and resume program execution. We use a sampling-based technique to estimate program speed that outperforms other systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Gogte:2018:PSF, author = "Vaibhav Gogte and Stephan Diestelhorst and William Wang and Satish Narayanasamy and Peter M. Chen and Thomas F. Wenisch", title = "Persistency for synchronization-free regions", journal = j-SIGPLAN, volume = "53", number = "4", pages = "46--61", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192367", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Nascent persistent memory (PM) technologies promise the performance of DRAM with the durability of disk, but how best to integrate them into programming systems remains an open question. Recent work extends language memory models with a persistency model prescribing semantics for updates to PM. These semantics enable programmers to design data structures in PM that are accessed like memory and yet are recoverable upon crash or failure. Alas, we find the semantics and performance of existing approaches unsatisfying. Existing approaches require high-overhead mechanisms, are restricted to certain synchronization constructs, provide incomplete semantics, and/or may recover to state that cannot arise in fault-free execution. We propose persistency semantics that guarantee failure atomicity of synchronization-free regions (SFRs) --- program regions delimited by synchronization operations. Our approach provides clear semantics for the PM state recovery code may observe and extends C++11's ``sequential consistency for data-race-free'' guarantee to post-failure recovery code. We investigate two designs for failure-atomic SFRs that vary in performance and the degree to which commit of persistent state may lag execution. We demonstrate both approaches in LLVM v3.6.0 and compare to a state-of-the-art baseline to show performance improvement up to 87.5\% (65.5\% avg).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Akram:2018:WRG, author = "Shoaib Akram and Jennifer B. Sartor and Kathryn S. McKinley and Lieven Eeckhout", title = "Write-rationing garbage collection for hybrid memories", journal = j-SIGPLAN, volume = "53", number = "4", pages = "62--77", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192392", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging Non-Volatile Memory (NVM) technologies offer high capacity and energy efficiency compared to DRAM, but suffer from limited write endurance and longer latencies. Prior work seeks the best of both technologies by combining DRAM and NVM in hybrid memories to attain low latency, high capacity, energy efficiency, and durability. Coarsegrained hardware and OS optimizations then spread writes out (wear-leveling) and place highly mutated pages in DRAM to extend NVM lifetimes. Unfortunately even with these coarse-grained methods, popular Java applications exact impractical NVM lifetimes of 4 years or less. This paper shows how to make hybrid memories practical, without changing the programming model, by enhancing garbage collection in managed language runtimes. We find object write behaviors offer two opportunities: (1) 70\% of writes occur to newly allocated objects, and (2) 2\% of objects capture 81\% of writes to mature objects. We introduce write-rationing garbage collectors that exploit these fine-grained behaviors. They extend NVM lifetimes by placing highly mutated objects in DRAM and read-mostly objects in NVM. We implement two such systems. (1) Kingsguard-nursery places new allocation in DRAM and survivors in NVM, reducing NVM writes by 5$ \times $ versus NVM only with wear-leveling. (2) Kingsguard-writers (KG-W) places nursery objects in DRAM and survivors in a DRAM observer space. It monitors all mature object writes and moves unwritten mature objects from DRAM to NVM. Because most mature objects are unwritten, KG-W exploits NVM capacity while increasing NVM lifetimes by 11$ \times $. It reduces the energy-delay product by 32\% over DRAM-only and 29\% over NVM-only. This work opens up new avenues for making hybrid memories practical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Lin:2018:MSN, author = "Chit-Kwan Lin and Andreas Wild and Gautham N. Chinya and Tsung-Han Lin and Mike Davies and Hong Wang", title = "Mapping spiking neural networks onto a manycore neuromorphic architecture", journal = j-SIGPLAN, volume = "53", number = "4", pages = "78--89", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192371", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a compiler for Loihi, a novel manycore neuromorphic processor that features a programmable, on-chip learning engine for training and executing spiking neural networks (SNNs). An SNN is distinguished from other neural networks in that (1) its independent computing units, or ``neurons'', communicate with others only through spike messages; and (2) each neuron evaluates local learning rules, which are functions of spike arrival and departure timings, to modify its local state. The collective neuronal state dynamics of an SNN form a nonlinear dynamical system that can be cast as an unconventional model of computation. To realize such an SNN on Loihi requires each constituent neuron to locally store and independently update its own spike timing information. However, each Loihi core has limited resources for this purpose and these must be shared by neurons assigned to the same core. In this work, we present a compiler for Loihi that maps the neurons of an SNN onto and across Loihi's cores efficiently. We show that a poor neuron-to-core mapping can incur significant energy costs and address this with a greedy algorithm that compiles SNNs onto Loihi in a power-efficient manner. In so doing, we highlight the need for further development of compilers for this new, emerging class of architectures.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Brutschy:2018:SSA, author = "Lucas Brutschy and Dimitar Dimitrov and Peter M{\"u}ller and Martin Vechev", title = "Static serializability analysis for causal consistency", journal = j-SIGPLAN, volume = "53", number = "4", pages = "90--104", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192415", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many distributed databases provide only weak consistency guarantees to reduce synchronization overhead and remain available under network partitions. However, this leads to behaviors not possible under stronger guarantees. Such behaviors can easily defy programmer intuition and lead to errors that are notoriously hard to detect. In this paper, we propose a static analysis for detecting non-serializable behaviors of applications running on top of causally-consistent databases. Our technique is based on a novel, local serializability criterion and combines a generalization of graph-based techniques from the database literature with another, complementary analysis technique that encodes our serializability criterion into first-order logic formulas to be checked by an SMT solver. This analysis is more expensive yet more precise and produces concrete counter-examples. We implemented our methods and evaluated them on a number of applications from two different domains: cloud-backed mobile applications and clients of a distributed database. Our experiments demonstrate that our analysis is able to detect harmful serializability violations while producing only a small number of false alarms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Liu:2018:CIC, author = "Peizun Liu and Thomas Wahl", title = "{CUBA}: interprocedural {Context-UnBounded Analysis} of concurrent programs", journal = j-SIGPLAN, volume = "53", number = "4", pages = "105--119", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192419", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A classical result by Ramalingam about synchronization-sensitive interprocedural program analysis implies that reachability for concurrent threads running recursive procedures is undecidable. A technique proposed by Qadeer and Rehof, to bound the number of context switches allowed between the threads, leads to an incomplete solution that is, however, believed to catch ``most bugs'' in practice. The question whether the technique can also prove the absence of bugs at least in some cases has remained largely open. In this paper we introduce a broad verification methodology for resource-parameterized programs that observes how changes to the resource parameter affect the behavior of the program. Applied to the context-unbounded analysis problem (CUBA), the methodology results in partial verification techniques for procedural concurrent programs. Our solutions may not terminate, but are able to both refute and prove context-unbounded safety for concurrent recursive threads. We demonstrate the effectiveness of our method using a variety of examples, the safe of which cannot be proved safe by earlier, context-bounded methods.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Ferles:2018:SRA, author = "Kostas Ferles and Jacob {Van Geffen} and Isil Dillig and Yannis Smaragdakis", title = "Symbolic reasoning for automatic signal placement", journal = j-SIGPLAN, volume = "53", number = "4", pages = "120--134", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192395", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Explicit signaling between threads is a perennial cause of bugs in concurrent programs. While there are several run-time techniques to automatically notify threads upon the availability of some shared resource, such techniques are not widely-adopted due to their run-time overhead. This paper proposes a new solution based on static analysis for automatically generating a performant explicit-signal program from its corresponding implicit-signal implementation. The key idea is to generate verification conditions that allow us to minimize the number of required signals and unnecessary context switches, while guaranteeing semantic equivalence between the source and target programs. We have implemented our method in a tool called Expresso and evaluate it on challenging benchmarks from prior papers and open-source software. Expresso-generated code significantly outperforms past automatic signaling mechanisms (avg. 1.56x speedup) and closely matches the performance of hand-optimized explicit-signal code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Chen:2018:AAB, author = "Yu-Fang Chen and Matthias Heizmann and Ondrej Leng{\'a}l and Yong Li and Ming-Hsien Tsai and Andrea Turrini and Lijun Zhang", title = "Advanced automata-based algorithms for program termination checking", journal = j-SIGPLAN, volume = "53", number = "4", pages = "135--150", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192405", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In 2014, Heizmann et al. proposed a novel framework for program termination analysis. The analysis starts with a termination proof of a sample path. The path is generalized to a B{\"u}chi automaton (BA) whose language (by construction) represents a set of terminating paths. All these paths can be safely removed from the program. The removal of paths is done using automata difference, implemented via BA complementation and intersection. The analysis constructs in this way a set of BAs that jointly ``cover'' the behavior of the program, thus proving its termination. An implementation of the approach in Ultimate Automizer won the 1st place in the Termination category of SV-COMP 2017. In this paper, we exploit advanced automata-based algorithms and propose several non-trivial improvements of the framework. To alleviate the complementation computation for BAs---one of the most expensive operations in the framework---, we propose a multi-stage generalization construction. We start with generalizations producing subclasses of BAs (such as deterministic BAs) for which efficient complementation algorithms are known, and proceed to more general classes only if necessary. Particularly, we focus on the quite expressive subclass of semideterministic BAs and provide an improved complementation algorithm for this class. Our experimental evaluation shows that the proposed approach significantly improves the power of termination checking within the Ultimate Automizer framework.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Ottoni:2018:HJP, author = "Guilherme Ottoni", title = "{HHVM JIT}: a profile-guided, region-based compiler for {PHP} and Hack", journal = j-SIGPLAN, volume = "53", number = "4", pages = "151--165", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192374", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic languages such as PHP, JavaScript, Python, and Ruby have been gaining popularity over the last two decades. A very popular domain for these languages is web development, including server-side development of large-scale websites. As a result, improving the performance of these languages has become more important. Efficiently compiling programs in these languages is challenging, and many popular dynamic languages still lack efficient production-quality implementations. This paper describes the design of the second generation of the HHVM JIT and how it addresses the challenges to efficiently execute PHP and Hack programs. This new design uses profiling to build an aggressive region-based JIT compiler. We discuss the benefits of this approach compared to the more popular method-based and trace-based approaches to compile dynamic languages. Our evaluation running a very large PHP-based code base, the Facebook website, demonstrates the effectiveness of the new JIT design.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{DElia:2018:SRD, author = "Daniele Cono D'Elia and Camil Demetrescu", title = "On-stack replacement, distilled", journal = j-SIGPLAN, volume = "53", number = "4", pages = "166--180", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192396", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "On-stack replacement (OSR) is essential technology for adaptive optimization, allowing changes to code actively executing in a managed runtime. The engineering aspects of OSR are well-known among VM architects, with several implementations available to date. However, OSR is yet to be explored as a general means to transfer execution between related program versions, which can pave the road to unprecedented applications that stretch beyond VMs. We aim at filling this gap with a constructive and provably correct OSR framework, allowing a class of general-purpose transformation functions to yield a special-purpose replacement. We describe and evaluate an implementation of our technique in LLVM. As a novel application of OSR, we present a feasibility study on debugging of optimized code, showing how our techniques can be used to fix variables holding incorrect values at breakpoints due to optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Duck:2018:ETM, author = "Gregory J. Duck and Roland H. C. Yap", title = "{EffectiveSan}: type and memory error detection using dynamically typed {C\slash C++}", journal = j-SIGPLAN, volume = "53", number = "4", pages = "181--195", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192388", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Low-level programming languages with weak/static type systems, such as C and C++, are vulnerable to errors relating to the misuse of memory at runtime, such as (sub-)object bounds overflows, (re)use-after-free, and type confusion. Such errors account for many security and other undefined behavior bugs for programs written in these languages. In this paper, we introduce the notion of dynamically typed C/C++, which aims to detect such errors by dynamically checking the ``effective type'' of each object before use at runtime. We also present an implementation of dynamically typed C/C++ in the form of the Effective Type Sanitizer (EffectiveSan). EffectiveSan enforces type and memory safety using a combination of low-fat pointers, type meta data and type/bounds check instrumentation. We evaluate EffectiveSan against the SPEC2006 benchmark suite and the Firefox web browser, and detect several new type and memory errors. We also show that EffectiveSan achieves high compatibility and reasonable overheads for the given error coverage. Finally, we highlight that EffectiveSan is one of only a few tools that can detect sub-object bounds errors, and uses a novel approach (dynamic type checking) to do so.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Cai:2018:CRC, author = "Cheng Cai and Qirun Zhang and Zhiqiang Zuo and Khanh Nguyen and Guoqing Xu and Zhendong Su", title = "Calling-to-reference context translation via constraint-guided {CFL}-reachability", journal = j-SIGPLAN, volume = "53", number = "4", pages = "196--210", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192378", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A calling context is an important piece of information used widely to help developers understand program executions (e.g., for debugging). While calling contexts offer useful control information, information regarding data involved in a bug (e.g., what data structure holds a leaking object), in many cases, can bring developers closer to the bug's root cause. Such data information, often exhibited as heap reference paths, has already been needed by many tools. The only way for a dynamic analysis to record complete reference paths is to perform heap dumping, which incurs huge runtime overhead and renders the analysis impractical. This paper presents a novel static analysis that can precisely infer, from a calling context of a method that contains a use (e.g., read or write) of an object, the heap reference paths leading to the object at the time the use occurs. Since calling context recording is much less expensive, our technique provides benefits for all dynamic techniques that need heap information, significantly reducing their overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Chong:2018:STW, author = "Nathan Chong and Tyler Sorensen and John Wickerson", title = "The semantics of transactions and weak memory in x86, {Power}, {ARM}, and {C++}", journal = j-SIGPLAN, volume = "53", number = "4", pages = "211--225", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Weak memory models provide a complex, system-centric semantics for concurrent programs, while transactional memory (TM) provides a simpler, programmer-centric semantics. Both have been studied in detail, but their combined semantics is not well understood. This is problematic because such widely-used architectures and languages as x86, Power, and C++ all support TM, and all have weak memory models. Our work aims to clarify the interplay between weak memory and TM by extending existing axiomatic weak memory models (x86, Power, ARMv8, and C++) with new rules for TM. Our formal models are backed by automated tooling that enables (1) the synthesis of tests for validating our models against existing implementations and (2) the model-checking of TM-related transformations, such as lock elision and compiling C++ transactions to hardware. A key finding is that a proposed TM extension to ARMv8 currently being considered within ARM Research is incompatible with lock elision without sacrificing portability or performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Milano:2018:MLM, author = "Matthew Milano and Andrew C. Myers", title = "{MixT}: a language for mixing consistency in geodistributed transactions", journal = j-SIGPLAN, volume = "53", number = "4", pages = "226--241", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192375", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming concurrent, distributed systems is hard-especially when these systems mutate shared, persistent state replicated at geographic scale. To enable high availability and scalability, a new class of weakly consistent data stores has become popular. However, some data needs strong consistency. To manipulate both weakly and strongly consistent data in a single transaction, we introduce a new abstraction: mixed-consistency transactions, embodied in a new embedded language, MixT. Programmers explicitly associate consistency models with remote storage sites; each atomic, isolated transaction can access a mixture of data with different consistency models. Compile-time information-flow checking, applied to consistency models, ensures that these models are mixed safely and enables the compiler to automatically partition transactions. New run-time mechanisms ensure that consistency models can also be mixed safely, even when the data used by a transaction resides on separate, mutually unaware stores. Performance measurements show that despite their stronger guarantees, mixed-consistency transactions retain much of the speed of weak consistency, significantly outperforming traditional serializable transactions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Dolan:2018:BDR, author = "Stephen Dolan and KC Sivaramakrishnan and Anil Madhavapeddy", title = "Bounding data races in space and time", journal = j-SIGPLAN, volume = "53", number = "4", pages = "242--255", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192421", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new semantics for shared-memory parallel programs that gives strong guarantees even in the presence of data races. Our local data race freedom property guarantees that all data-race-free portions of programs exhibit sequential semantics. We provide a straightforward operational semantics and an equivalent axiomatic model, and evaluate an implementation for the OCaml programming language. Our evaluation demonstrates that it is possible to balance a comprehensible memory model with a reasonable (no overhead on x86, ~0.6\% on ARM) sequential performance trade-off in a mainstream programming language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Sanchez-Stern:2018:FRC, author = "Alex Sanchez-Stern and Pavel Panchekha and Sorin Lerner and Zachary Tatlock", title = "Finding root causes of floating point error", journal = j-SIGPLAN, volume = "53", number = "4", pages = "256--269", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192411", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Floating-point arithmetic plays a central role in science, engineering, and finance by enabling developers to approximate real arithmetic. To address numerical issues in large floating-point applications, developers must identify root causes, which is difficult because floating-point errors are generally non-local, non-compositional, and non-uniform. This paper presents Herbgrind, a tool to help developers identify and address root causes in numerical code written in low-level languages like C/C++ and Fortran. Herbgrind dynamically tracks dependencies between operations and program outputs to avoid false positives and abstracts erroneous computations to simplified program fragments whose improvement can reduce output error. We perform several case studies applying Herbgrind to large, expert-crafted numerical programs and show that it scales to applications spanning hundreds of thousands of lines, correctly handling the low-level details of modern floating point hardware and mathematical libraries and tracking error across function boundaries and through the heap.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Adams:2018:RFF, author = "Ulf Adams", title = "{Ry{\=u}}: fast float-to-string conversion", journal = j-SIGPLAN, volume = "53", number = "4", pages = "270--282", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192369", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present Ry{\=u}, a new routine to convert binary floating point numbers to their decimal representations using only fixed-size integer operations, and prove its correctness. Ry{\=u} is simpler and approximately three times faster than the previously fastest implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", keywords = "base conversion; input-output conversion; radix conversion; round-trip base conversion", remark = "PLDI '18 proceedings.", } @Article{Steindorfer:2018:MOA, author = "Michael J. Steindorfer and Jurgen J. Vinju", title = "To-many or to-one? {All}-in-one! {Efficient} purely functional multi-maps with type-heterogeneous hash-tries", journal = j-SIGPLAN, volume = "53", number = "4", pages = "283--295", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192420", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "An immutable multi-map is a many-to-many map data structure with expected fast insert and lookup operations. This data structure is used for applications processing graphs or many-to-many relations as applied in compilers, runtimes of programming languages, or in static analysis of object-oriented systems. Collection data structures are assumed to carefully balance execution time of operations with memory consumption characteristics and need to scale gracefully from a few elements to multiple gigabytes at least. When processing larger in-memory data sets the overhead of the data structure encoding itself becomes a memory usage bottleneck, dominating the overall performance. In this paper we propose AXIOM, a novel hash-trie data structure that allows for a highly efficient and type-safe multi-map encoding by distinguishing inlined values of singleton sets from nested sets of multi-mappings. AXIOM strictly generalizes over previous hash-trie data structures by supporting the processing of fine-grained type-heterogeneous content on the implementation level (while API and language support for type-heterogeneity are not scope of this paper). We detail the design and optimizations of AXIOM and further compare it against state-of-the-art immutable maps and multi-maps in Java, Scala and Clojure. We isolate key differences using microbenchmarks and validate the resulting conclusions on a case study in static analysis. AXIOM reduces the key-value storage overhead by 1.87x; with specializing and inlining across collection boundaries it improves by 5.1x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Koeplinger:2018:SLC, author = "David Koeplinger and Matthew Feldman and Raghu Prabhakar and Yaqi Zhang and Stefan Hadjis and Ruben Fiszel and Tian Zhao and Luigi Nardi and Ardavan Pedram and Christos Kozyrakis and Kunle Olukotun", title = "{Spatial}: a language and compiler for application accelerators", journal = j-SIGPLAN, volume = "53", number = "4", pages = "296--311", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192379", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Industry is increasingly turning to reconfigurable architectures like FPGAs and CGRAs for improved performance and energy efficiency. Unfortunately, adoption of these architectures has been limited by their programming models. HDLs lack abstractions for productivity and are difficult to target from higher level languages. HLS tools are more productive, but offer an ad-hoc mix of software and hardware abstractions which make performance optimizations difficult. In this work, we describe a new domain-specific language and compiler called Spatial for higher level descriptions of application accelerators. We describe Spatial's hardware-centric abstractions for both programmer productivity and design performance, and summarize the compiler passes required to support these abstractions, including pipeline scheduling, automatic memory banking, and automated design tuning driven by active machine learning. We demonstrate the language's ability to target FPGAs and CGRAs from common source code. We show that applications written in Spatial are, on average, 42\% shorter and achieve a mean speedup of 2.9x over SDAccel HLS when targeting a Xilinx UltraScale+ VU9P FPGA on an Amazon EC2 F1 instance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Kislal:2018:ECC, author = "Orhan Kislal and Jagadish Kotra and Xulong Tang and Mahmut Taylan Kandemir and Myoungsoo Jung", title = "Enhancing computation-to-core assignment with physical location information", journal = j-SIGPLAN, volume = "53", number = "4", pages = "312--327", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192386", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Going beyond a certain number of cores in modern architectures requires an on-chip network more scalable than conventional buses. However, employing an on-chip network in a manycore system (to improve scalability) makes the latencies of the data accesses issued by a core non-uniform. This non-uniformity can play a significant role in shaping the overall application performance. This work presents a novel compiler strategy which involves exposing architecture information to the compiler to enable an optimized computation-to-core mapping. Specifically, we propose a compiler-guided scheme that takes into account the relative positions of (and distances between) cores, last-level caches (LLCs) and memory controllers (MCs) in a manycore system, and generates a mapping of computations to cores with the goal of minimizing the on-chip network traffic. The experimental data collected using a set of 21 multi-threaded applications reveal that, on an average, our approach reduces the on-chip network latency in a 6$ \times $6 manycore system by 38.4\% in the case of private LLCs, and 43.8\% in the case of shared LLCs. These improvements translate to the corresponding execution time improvements of 10.9\% and 12.7\% for the private LLC and shared LLC based systems, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Tran:2018:SSH, author = "Kim-Anh Tran and Alexandra Jimborean and Trevor E. Carlson and Konstantinos Koukos and Magnus Sj{\"a}lander and Stefanos Kaxiras", title = "{SWOOP}: software-hardware co-design for non-speculative, execute-ahead, in-order cores", journal = j-SIGPLAN, volume = "53", number = "4", pages = "328--343", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192393", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Increasing demands for energy efficiency constrain emerging hardware. These new hardware trends challenge the established assumptions in code generation and force us to rethink existing software optimization techniques. We propose a cross-layer redesign of the way compilers and the underlying microarchitecture are built and interact, to achieve both performance and high energy efficiency. In this paper, we address one of the main performance bottlenecks --- last-level cache misses --- through a software-hardware co-design. Our approach is able to hide memory latency and attain increased memory and instruction level parallelism by orchestrating a non-speculative, execute-ahead paradigm in software (SWOOP). While out-of-order (OoO) architectures attempt to hide memory latency by dynamically reordering instructions, they do so through expensive, power-hungry, speculative mechanisms.We aim to shift this complexity into software, and we build upon compilation techniques inherited from VLIW, software pipelining, modulo scheduling, decoupled access-execution, and software prefetching. In contrast to previous approaches we do not rely on either software or hardware speculation that can be detrimental to efficiency. Our SWOOP compiler is enhanced with lightweight architectural support, thus being able to transform applications that include highly complex control-flow and indirect memory accesses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Liu:2018:ISI, author = "Hongyu Liu and Sam Silvestro and Wei Wang and Chen Tian and Tongping Liu", title = "{iReplayer}: in-situ and identical record-and-replay for multithreaded applications", journal = j-SIGPLAN, volume = "53", number = "4", pages = "344--358", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192380", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Reproducing executions of multithreaded programs is very challenging due to many intrinsic and external non-deterministic factors. Existing RnR systems achieve significant progress in terms of performance overhead, but none targets the in-situ setting, in which replay occurs within the same process as the recording process. Also, most existing work cannot achieve identical replay, which may prevent the reproduction of some errors. This paper presents iReplayer, which aims to identically replay multithreaded programs in the original process (under the ``in-situ'' setting). The novel in-situ and identical replay of iReplayer makes it more likely to reproduce errors, and allows it to directly employ debugging mechanisms (e.g. watchpoints) to aid failure diagnosis. Currently, iReplayer only incurs 3\% performance overhead on average, which allows it to be always enabled in the production environment. iReplayer enables a range of possibilities, and this paper presents three examples: two automatic tools for detecting buffer overflows and use-after-free bugs, and one interactive debugging tool that is integrated with GDB.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Liu:2018:DFC, author = "Bozhen Liu and Jeff Huang", title = "{D4}: fast concurrency debugging with parallel differential analysis", journal = j-SIGPLAN, volume = "53", number = "4", pages = "359--373", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192390", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present D4, a fast concurrency analysis framework that detects concurrency bugs (e.g., data races and deadlocks) interactively in the programming phase. As developers add, modify, and remove statements, the code changes are sent to D4 to detect concurrency bugs in real time, which in turn provides immediate feedback to the developer of the new bugs. The cornerstone of D4 includes a novel system design and two novel parallel differential algorithms that embrace both change and parallelization for fundamental static analyses of concurrent programs. Both algorithms react to program changes by memoizing the analysis results and only recomputing the impact of a change in parallel. Our evaluation on an extensive collection of large real-world applications shows that D4 efficiently pinpoints concurrency bugs within 100ms on average after a code change, several orders of magnitude faster than both the exhaustive analysis and the state-of-the-art incremental techniques.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Roemer:2018:HCU, author = "Jake Roemer and Kaan Gen{\c{c}} and Michael D. Bond", title = "High-coverage, unbounded sound predictive race detection", journal = j-SIGPLAN, volume = "53", number = "4", pages = "374--389", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192385", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic program analysis can predict data races knowable from an observed execution, but existing predictive analyses either miss races or cannot analyze full program executions. This paper presents Vindicator, a novel, sound (no false races) predictive approach that finds more data races than existing predictive approaches. Vindicator achieves high coverage by using a new, efficient analysis that finds all possible predictable races but may detect false races. Vindicator ensures soundness using a novel algorithm that checks each potential race to determine whether it is a true predictable race. An evaluation using large Java programs shows that Vindicator finds hard-to-detect predictable races that existing sound predictive analyses miss, at a comparable performance cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Peng:2018:CDC, author = "Yuanfeng Peng and Vinod Grover and Joseph Devietti", title = "{CURD}: a dynamic {CUDA} race detector", journal = j-SIGPLAN, volume = "53", number = "4", pages = "390--403", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192368", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As GPUs have become an integral part of nearly every processor, GPU programming has become increasingly popular. GPU programming requires a combination of extreme levels of parallelism and low-level programming, making it easy for concurrency bugs such as data races to arise. These concurrency bugs can be extremely subtle and di cult to debug due to the massive numbers of threads running concurrently on a modern GPU. While some tools exist to detect data races in GPU programs, they are often prohibitively slow or focused only on a small class of data races in shared memory. Compared to prior work, our race detector, CURD, can detect data races precisely on both shared and global memory, selects an appropriate race detection algorithm based on the synchronization used in a program, and utilizes efficient compiler instrumentation to reduce performance overheads. Across 53 benchmarks, we find that using CURD incurs an aver- age slowdown of just 2.88x over native execution. CURD is 2.1x faster than Nvidia's CUDA-Racecheck race detector, despite detecting a much broader class of races. CURD finds 35 races across our benchmarks, including bugs in established benchmark suites and in sample programs from Nvidia.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Alon:2018:GPB, author = "Uri Alon and Meital Zilberstein and Omer Levy and Eran Yahav", title = "A general path-based representation for predicting program properties", journal = j-SIGPLAN, volume = "53", number = "4", pages = "404--419", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192412", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/csharp.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Predicting program properties such as names or expression types has a wide range of applications. It can ease the task of programming, and increase programmer productivity. A major challenge when learning from programs is how to represent programs in a way that facilitates effective learning. We present a general path-based representation for learning from programs. Our representation is purely syntactic and extracted automatically. The main idea is to represent a program using paths in its abstract syntax tree (AST). This allows a learning model to leverage the structured nature of code rather than treating it as a flat sequence of tokens. We show that this representation is general and can: (i) cover different prediction tasks, (ii) drive different learning algorithms (for both generative and discriminative models), and (iii) work across different programming languages. We evaluate our approach on the tasks of predicting variable names, method names, and full types. We use our representation to drive both CRF-based and word2vec-based learning, for programs of four languages: JavaScript, Java, Python and C\#. Our evaluation shows that our approach obtains better results than task-specific handcrafted representations across different tasks and programming languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Feng:2018:PSU, author = "Yu Feng and Ruben Martins and Osbert Bastani and Isil Dillig", title = "Program synthesis using conflict-driven learning", journal = j-SIGPLAN, volume = "53", number = "4", pages = "420--435", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192382", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a new conflict-driven program synthesis technique that is capable of learning from past mistakes. Given a spurious program that violates the desired specification, our synthesis algorithm identifies the root cause of the conflict and learns new lemmas that can prevent similar mistakes in the future. Specifically, we introduce the notion of equivalence modulo conflict and show how this idea can be used to learn useful lemmas that allow the synthesizer to prune large parts of the search space. We have implemented a general-purpose CDCL-style program synthesizer called Neo and evaluate it in two different application domains, namely data wrangling in R and functional programming over lists. Our experiments demonstrate the substantial benefits of conflict-driven learning and show that Neo outperforms two state-of-the-art synthesis tools, Morpheus and Deepcoder, that target these respective domains.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Lee:2018:ASB, author = "Woosuk Lee and Kihong Heo and Rajeev Alur and Mayur Naik", title = "Accelerating search-based program synthesis using learned probabilistic models", journal = j-SIGPLAN, volume = "53", number = "4", pages = "436--449", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192410", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A key challenge in program synthesis concerns how to efficiently search for the desired program in the space of possible programs. We propose a general approach to accelerate search-based program synthesis by biasing the search towards likely programs. Our approach targets a standard formulation, syntax-guided synthesis (SyGuS), by extending the grammar of possible programs with a probabilistic model dictating the likelihood of each program. We develop a weighted search algorithm to efficiently enumerate programs in order of their likelihood. We also propose a method based on transfer learning that enables to effectively learn a powerful model, called probabilistic higher-order grammar, from known solutions in a domain. We have implemented our approach in a tool called Euphony and evaluate it on SyGuS benchmark problems from a variety of domains. We show that Euphony can learn good models using easily obtainable solutions, and achieves significant performance gains over existing general-purpose as well as domain-specific synthesizers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Paletov:2018:ICA, author = "Rumen Paletov and Petar Tsankov and Veselin Raychev and Martin Vechev", title = "Inferring crypto {API} rules from code changes", journal = j-SIGPLAN, volume = "53", number = "4", pages = "450--464", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192403", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Creating and maintaining an up-to-date set of security rules that match misuses of crypto APIs is challenging, as crypto APIs constantly evolve over time with new cryptographic primitives and settings, making existing ones obsolete. To address this challenge, we present a new approach to extract security fixes from thousands of code changes. Our approach consists of: (i) identifying code changes, which often capture security fixes, (ii) an abstraction that filters irrelevant code changes (such as refactorings), and (iii) a clustering analysis that reveals commonalities between semantic code changes and helps in eliciting security rules. We applied our approach to the Java Crypto API and showed that it is effective: (i) our abstraction effectively filters non-semantic code changes (over 99\% of all changes) without removing security fixes, and (ii) over 80\% of the code changes are security fixes identifying security rules. Based on our results, we identified 13 rules, including new ones not supported by existing security checkers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Gulwani:2018:ACP, author = "Sumit Gulwani and Ivan Radicek and Florian Zuleger", title = "Automated clustering and program repair for introductory programming assignments", journal = j-SIGPLAN, volume = "53", number = "4", pages = "465--480", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192387", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Providing feedback on programming assignments is a tedious task for the instructor, and even impossible in large Massive Open Online Courses with thousands of students. Previous research has suggested that program repair techniques can be used to generate feedback in programming education. In this paper, we present a novel fully automated program repair algorithm for introductory programming assignments. The key idea of the technique, which enables automation and scalability, is to use the existing correct student solutions to repair the incorrect attempts. We evaluate the approach in two experiments: (I) We evaluate the number, size and quality of the generated repairs on 4,293 incorrect student attempts from an existing MOOC. We find that our approach can repair 97\% of student attempts, while 81\% of those are small repairs of good quality. (II) We conduct a preliminary user study on performance and repair usefulness in an interactive teaching setting. We obtain promising initial results (the average usefulness grade 3.4 on a scale from 1 to 5), and conclude that our approach can be used in an interactive setting.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Wang:2018:SAR, author = "Ke Wang and Rishabh Singh and Zhendong Su", title = "Search, align, and repair: data-driven feedback generation for introductory programming exercises", journal = j-SIGPLAN, volume = "53", number = "4", pages = "481--495", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192384", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces the ``Search, Align, and Repair'' data-driven program repair framework to automate feedback generation for introductory programming exercises. Distinct from existing techniques, our goal is to develop an efficient, fully automated, and problem-agnostic technique for large or MOOC-scale introductory programming courses. We leverage the large amount of available student submissions in such settings and develop new algorithms for identifying similar programs, aligning correct and incorrect programs, and repairing incorrect programs by finding minimal fixes. We have implemented our technique in the Sarfgen system and evaluated it on thousands of real student attempts from the Microsoft-DEV204.1x edX course and the Microsoft CodeHunt platform. Our results show that Sarfgen can, within two seconds on average, generate concise, useful feedback for 89.7\% of the incorrect student submissions. It has been integrated with the Microsoft-DEV204.1X edX class and deployed for production use.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Ngo:2018:BER, author = "Van Chan Ngo and Quentin Carbonneaux and Jan Hoffmann", title = "Bounded expectations: resource analysis for probabilistic programs", journal = j-SIGPLAN, volume = "53", number = "4", pages = "496--512", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192394", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a new static analysis for deriving upper bounds on the expected resource consumption of probabilistic programs. The analysis is fully automatic and derives symbolic bounds that are multivariate polynomials in the inputs. The new technique combines manual state-of-the-art reasoning techniques for probabilistic programs with an effective method for automatic resource-bound analysis of deterministic programs. It can be seen as both, an extension of automatic amortized resource analysis (AARA) to probabilistic programs and an automation of manual reasoning for probabilistic programs that is based on weakest preconditions. An advantage of the technique is that it combines the clarity and compositionality of a weakest-precondition calculus with the efficient automation of AARA. As a result, bound inference can be reduced to off-the-shelf LP solving in many cases and automatically-derived bounds can be interactively extended with standard program logics if the automation fails. Building on existing work, the soundness of the analysis is proved with respect to an operational semantics that is based on Markov decision processes. The effectiveness of the technique is demonstrated with a prototype implementation that is used to automatically analyze 39 challenging probabilistic programs and randomized algorithms. Experiments indicate that the derived constant factors in the bounds are very precise and even optimal for some programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Wang:2018:PAF, author = "Di Wang and Jan Hoffmann and Thomas Reps", title = "{PMAF}: an algebraic framework for static analysis of probabilistic programs", journal = j-SIGPLAN, volume = "53", number = "4", pages = "513--528", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192408", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Automatically establishing that a probabilistic program satisfies some property $ \varphi $ is a challenging problem. While a sampling-based approach-which involves running the program repeatedly-can suggest that $ \varphi $ holds, to establish that the program satisfies $ \varphi $ , analysis techniques must be used. Despite recent successes, probabilistic static analyses are still more difficult to design and implement than their deterministic counterparts. This paper presents a framework, called PMAF, for designing, implementing, and proving the correctness of static analyses of probabilistic programs with challenging features such as recursion, unstructured control-flow, divergence, nondeterminism, and continuous distributions. PMAF introduces pre-Markov algebras to factor out common parts of different analyses. To perform interprocedural analysis and to create procedure summaries, PMAF extends ideas from non-probabilistic interprocedural dataflow analysis to the probabilistic setting. One novelty is that PMAF is based on a semantics formulated in terms of a control-flow hyper-graph for each procedure, rather than a standard control-flow graph. To evaluate its effectiveness, PMAF has been used to reformulate and implement existing intra procedural analyses for Bayesian-inference and the Markov decision problem, by creating corresponding inter procedural analyses. Additionally, PMAF has been used to implement a new interprocedural linear expectation-invariant analysis. Experiments with benchmark programs for the three analyses demonstrate that the approach is practical.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Acharya:2018:PAT, author = "Aravind Acharya and Uday Bondhugula and Albert Cohen", title = "Polyhedral auto-transformation with no integer linear programming", journal = j-SIGPLAN, volume = "53", number = "4", pages = "529--542", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192401", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "State-of-the-art algorithms used in automatic polyhedral transformation for parallelization and locality optimization typically rely on Integer Linear Programming (ILP). This poses a scalability issue when scaling to tens or hundreds of statements, and may be disconcerting in production compiler settings. In this work, we consider relaxing integrality in the ILP formulation of the Pluto algorithm, a popular algorithm used to find good affine transformations. We show that the rational solutions obtained from the relaxed LP formulation can easily be scaled to valid integral ones to obtain desired solutions, although with some caveats. We first present formal results connecting the solution of the relaxed LP to the original Pluto ILP. We then show that there are difficulties in realizing the above theoretical results in practice, and propose an alternate approach to overcome those while still leveraging linear programming. Our new approach obtains dramatic compile-time speedups for a range of large benchmarks. While achieving these compile-time improvements, we show that the performance of the transformed code is not sacrificed. Our approach to automatic transformation provides a mean compilation time improvement of 5.6$ \times $ over state-of-the-art on relevant challenging benchmarks from the NAS PB, SPEC CPU 2006, and PolyBench suites. We also came across situations where prior frameworks failed to find a transformation in a reasonable amount of time, while our new approach did so instantaneously.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Moll:2018:PCF, author = "Simon Moll and Sebastian Hack", title = "Partial control-flow linearization", journal = j-SIGPLAN, volume = "53", number = "4", pages = "543--556", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192413", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "If-conversion is a fundamental technique for vectorization. It accounts for the fact that in a SIMD program, several targets of a branch might be executed because of divergence. Especially for irregular data-parallel workloads, it is crucial to avoid if-converting non-divergent branches to increase SIMD utilization. In this paper, we present partial linearization, a simple and efficient if-conversion algorithm that overcomes several limitations of existing if-conversion techniques. In contrast to prior work, it has provable guarantees on which non-divergent branches are retained and will never duplicate code or insert additional branches. We show how our algorithm can be used in a classic loop vectorizer as well as to implement data-parallel languages such as ISPC or OpenCL. Furthermore, we implement prior vectorizer optimizations on top of partial linearization in a more general way. We evaluate the implementation of our algorithm in LLVM on a range of irregular data analytics kernels, a neutronics simulation benchmark and NAB, a molecular dynamics benchmark from SPEC2017 on AVX2, AVX512, and ARM Advanced SIMD machines and report speedups of up to 146 \% over ICC, GCC and Clang O3.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Chen:2018:LAT, author = "Dong Chen and Fangzhou Liu and Chen Ding and Sreepathi Pai", title = "Locality analysis through static parallel sampling", journal = j-SIGPLAN, volume = "53", number = "4", pages = "557--570", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192402", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Locality analysis is important since accessing memory is much slower than computing. Compile-time locality analysis can provide detailed program-level feedback for compilers or runtime systems faster than trace-based locality analysis. In this paper, we describe a new approach to locality analysis based on static parallel sampling. A compiler analyzes loop-based code and generates sampler code which is run to measure locality. Our approach can predict precise cache line granularity miss ratio curves for complex loops with non-linear array references and even branches. The precision and overhead of static sampling are evaluated using PolyBench and a bit-reversal loop. Our result shows that by randomly sampling 2\% of loop iterations, a compiler can construct almost exact miss ratio curves as trace based analysis. Sampling 0.5\% and 1\% iterations can achieve good precision and efficiency with an average 0.6\% to 1\% the time of tracing respectively. Our analysis can also be parallelized. The analysis may assist program optimization techniques such as tiling, program co-location, cache hint selection and help to analyze write locality and parallel locality.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Cusumano-Towner:2018:IIP, author = "Marco Cusumano-Towner and Benjamin Bichsel and Timon Gehr and Martin Vechev and Vikash K. Mansinghka", title = "Incremental inference for probabilistic programs", journal = j-SIGPLAN, volume = "53", number = "4", pages = "571--585", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192399", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a novel approach for approximate sampling in probabilistic programs based on incremental inference. The key idea is to adapt the samples for a program P into samples for a program Q, thereby avoiding the expensive sampling computation for program Q. To enable incremental inference in probabilistic programming, our work: (i) introduces the concept of a trace translator which adapts samples from P into samples of Q, (ii) phrases this translation approach in the context of sequential Monte Carlo (SMC), which gives theoretical guarantees that the adapted samples converge to the distribution induced by Q, and (iii) shows how to obtain a concrete trace translator by establishing a correspondence between the random choices of the two probabilistic programs. We implemented our approach in two different probabilistic programming systems and showed that, compared to methods that sample the program Q from scratch, incremental inference can lead to orders of magnitude increase in efficiency, depending on how closely related P and Q are.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Gehr:2018:BPI, author = "Timon Gehr and Sasa Misailovic and Petar Tsankov and Laurent Vanbever and Pascal Wiesmann and Martin Vechev", title = "{Bayonet}: probabilistic inference for networks", journal = j-SIGPLAN, volume = "53", number = "4", pages = "586--602", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192400", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Network operators often need to ensure that important probabilistic properties are met, such as that the probability of network congestion is below a certain threshold. Ensuring such properties is challenging and requires both a suitable language for probabilistic networks and an automated procedure for answering probabilistic inference queries. We present Bayonet, a novel approach that consists of: (i) a probabilistic network programming language and (ii) a system that performs probabilistic inference on Bayonet programs. The key insight behind Bayonet is to phrase the problem of probabilistic network reasoning as inference in existing probabilistic languages. As a result, Bayonet directly leverages existing probabilistic inference systems and offers a flexible and expressive interface to operators. We present a detailed evaluation of Bayonet on common network scenarios, such as network congestion, reliability of packet delivery, and others. Our results indicate that Bayonet can express such practical scenarios and answer queries for realistic topology sizes (with up to 30 nodes).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Mansinghka:2018:PPP, author = "Vikash K. Mansinghka and Ulrich Schaechtle and Shivam Handa and Alexey Radul and Yutian Chen and Martin Rinard", title = "Probabilistic programming with programmable inference", journal = j-SIGPLAN, volume = "53", number = "4", pages = "603--616", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We introduce inference metaprogramming for probabilistic programming languages, including new language constructs, a formalism, and the rst demonstration of effectiveness in practice. Instead of relying on rigid black-box inference algorithms hard-coded into the language implementation as in previous probabilistic programming languages, inference metaprogramming enables developers to (1) dynamically decompose inference problems into subproblems, (2) apply inference tactics to subproblems, (3) alternate between incorporating new data and performing inference over existing data, and (4) explore multiple execution traces of the probabilistic program at once. Implemented tactics include gradient-based optimization, Markov chain Monte Carlo, variational inference, and sequental Monte Carlo techniques. Inference metaprogramming enables the concise expression of probabilistic models and inference algorithms across diverse elds, such as computer vision, data science, and robotics, within a single probabilistic programming language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Bohrer:2018:VVC, author = "Brandon Bohrer and Yong Kiam Tan and Stefan Mitsch and Magnus O. Myreen and Andr{\'e} Platzer", title = "{VeriPhy}: verified controller executables from verified cyber--physical system models", journal = j-SIGPLAN, volume = "53", number = "4", pages = "617--630", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192406", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present VeriPhy, a verified pipeline which automatically transforms verified high-level models of safety-critical cyber-physical systems (CPSs) in differential dynamic logic (dL) to verified controller executables. VeriPhy proves that all safety results are preserved end-to-end as it bridges abstraction gaps, including: (i) the gap between mathematical reals in physical models and machine arithmetic in the implementation, (ii) the gap between real physics and its differential-equation models, and (iii) the gap between nondeterministic controller models and machine code. VeriPhy reduces CPS safety to the faithfulness of the physical environment, which is checked at runtime by synthesized, verified monitors. We use three provers in this effort: KeYmaera X, HOL4, and Isabelle/HOL. To minimize the trusted base, we cross-verify KeYmaeraX in Isabelle/HOL. We evaluate the resulting controller and monitors on commodity robotics hardware.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Kang:2018:CVC, author = "Jeehoon Kang and Yoonseung Kim and Youngju Song and Juneyoung Lee and Sanghoon Park and Mark Dongyeon Shin and Yonghyun Kim and Sungkeun Cho and Joonwon Choi and Chung-Kil Hur and Kwangkeun Yi", title = "{Crellvm}: verified credible compilation for {LLVM}", journal = j-SIGPLAN, volume = "53", number = "4", pages = "631--645", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192377", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Production compilers such as GCC and LLVM are large complex software systems, for which achieving a high level of reliability is hard. Although testing is an effective method for finding bugs, it alone cannot guarantee a high level of reliability. To provide a higher level of reliability, many approaches that examine compilers' internal logics have been proposed. However, none of them have been successfully applied to major optimizations of production compilers. This paper presents Crellvm: a verified credible compilation framework for LLVM, which can be used as a systematic way of providing a high level of reliability for major optimizations in LLVM. Specifically, we augment an LLVM optimizer to generate translation results together with their correctness proofs, which can then be checked by a proof checker formally verified in Coq. As case studies, we applied our approach to two major optimizations of LLVM: register promotion mem2reg and global value numbering gvn, having found four new miscompilation bugs (two in each).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Gu:2018:CCA, author = "Ronghui Gu and Zhong Shao and Jieung Kim and Xiongnan (Newman) Wu and J{\'e}r{\'e}mie Koenig and Vilhelm Sj{\"o}berg and Hao Chen and David Costanzo and Tahina Ramananandro", title = "Certified concurrent abstraction layers", journal = j-SIGPLAN, volume = "53", number = "4", pages = "646--661", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192381", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Concurrent abstraction layers are ubiquitous in modern computer systems because of the pervasiveness of multithreaded programming and multicore hardware. Abstraction layers are used to hide the implementation details (e.g., fine-grained synchronization) and reduce the complex dependencies among components at different levels of abstraction. Despite their obvious importance, concurrent abstraction layers have not been treated formally. This severely limits the applicability of layer-based techniques and makes it difficult to scale verification across multiple concurrent layers. In this paper, we present CCAL---a fully mechanized programming toolkit developed under the CertiKOS project---for specifying, composing, compiling, and linking certified concurrent abstraction layers. CCAL consists of three technical novelties: a new game-theoretical, strategy-based compositional semantic model for concurrency (and its associated program verifiers), a set of formal linking theorems for composing multithreaded and multicore concurrent layers, and a new CompCertX compiler that supports certified thread-safe compilation and linking. The CCAL toolkit is implemented in Coq and supports layered concurrent programming in both C and assembly. It has been successfully applied to build a fully certified concurrent OS kernel with fine-grained locking.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Taube:2018:MDD, author = "Marcelo Taube and Giuliano Losa and Kenneth L. McMillan and Oded Padon and Mooly Sagiv and Sharon Shoham and James R. Wilcox and Doug Woos", title = "Modularity for decidability of deductive verification with applications to distributed systems", journal = j-SIGPLAN, volume = "53", number = "4", pages = "662--677", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192414", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Proof automation can substantially increase productivity in formal verification of complex systems. However, unpredictablility of automated provers in handling quantified formulas presents a major hurdle to usability of these tools. We propose to solve this problem not by improving the provers, but by using a modular proof methodology that allows us to produce decidable verification conditions. Decidability greatly improves predictability of proof automation, resulting in a more practical verification approach. We apply this methodology to develop verified implementations of distributed protocols, demonstrating its effectiveness.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Bastani:2018:ALP, author = "Osbert Bastani and Rahul Sharma and Alex Aiken and Percy Liang", title = "Active learning of points-to specifications", journal = j-SIGPLAN, volume = "53", number = "4", pages = "678--692", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192383", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When analyzing programs, large libraries pose significant challenges to static points-to analysis. A popular solution is to have a human analyst provide points-to specifications that summarize relevant behaviors of library code, which can substantially improve precision and handle missing code such as native code. We propose Atlas, a tool that automatically infers points-to specifications. Atlas synthesizes unit tests that exercise the library code, and then infers points-to specifications based on observations from these executions. Atlas automatically infers specifications for the Java standard library, and produces better results for a client static information flow analysis on a benchmark of 46 Android apps compared to using existing handwritten specifications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Shi:2018:PFP, author = "Qingkai Shi and Xiao Xiao and Rongxin Wu and Jinguo Zhou and Gang Fan and Charles Zhang", title = "{Pinpoint}: fast and precise sparse value flow analysis for million lines of code", journal = j-SIGPLAN, volume = "53", number = "4", pages = "693--706", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192418", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "When dealing with millions of lines of code, we still cannot have the cake and eat it: sparse value-flow analysis is powerful in checking source-sink problems, but existing work cannot escape from the ``pointer trap'' --- a precise points-to analysis limits its scalability and an imprecise one seriously undermines its precision. We present Pinpoint, a holistic approach that decomposes the cost of high-precision points-to analysis by precisely discovering local data dependence and delaying the expensive inter-procedural analysis through memorization. Such memorization enables the on-demand slicing of only the necessary inter-procedural data dependence and path feasibility queries, which are then solved by a costly SMT solver. Experiments show that Pinpoint can check programs such as MySQL (around 2 million lines of code) within 1.5 hours. The overall false positive rate is also very low (14.3\% --- 23.6\%). Pinpoint has discovered over forty real bugs in mature and extensively checked open source systems. And the implementation of Pinpoint and all experimental results are freely available.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Zhu:2018:DDC, author = "He Zhu and Stephen Magill and Suresh Jagannathan", title = "A data-driven {CHC} solver", journal = j-SIGPLAN, volume = "53", number = "4", pages = "707--721", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192416", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a data-driven technique to solve Constrained Horn Clauses (CHCs) that encode verification conditions of programs containing unconstrained loops and recursions. Our CHC solver neither constrains the search space from which a predicate's components are inferred (e.g., by constraining the number of variables or the values of coefficients used to specify an invariant), nor fixes the shape of the predicate itself (e.g., by bounding the number and kind of logical connectives). Instead, our approach is based on a novel machine learning-inspired tool chain that synthesizes CHC solutions in terms of arbitrary Boolean combinations of unrestricted atomic predicates. A CEGAR-based verification loop inside the solver progressively samples representative positive and negative data from recursive CHCs, which is fed to the machine learning tool chain. Our solver is implemented as an LLVM pass in the SeaHorn verification framework and has been used to successfully verify a large number of nontrivial and challenging C programs from the literature and well-known benchmark suites (e.g., SV-COMP).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Raghothaman:2018:UGP, author = "Mukund Raghothaman and Sulekha Kulkarni and Kihong Heo and Mayur Naik", title = "User-guided program reasoning using {Bayesian} inference", journal = j-SIGPLAN, volume = "53", number = "4", pages = "722--735", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192417", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Program analyses necessarily make approximations that often lead them to report true alarms interspersed with many false alarms. We propose a new approach to leverage user feedback to guide program analyses towards true alarms and away from false alarms. Our approach associates each alarm with a confidence value by performing Bayesian inference on a probabilistic model derived from the analysis rules. In each iteration, the user inspects the alarm with the highest confidence and labels its ground truth, and the approach recomputes the confidences of the remaining alarms given this feedback. It thereby maximizes the return on the effort by the user in inspecting each alarm. We have implemented our approach in a tool named Bingo for program analyses expressed in Datalog. Experiments with real users and two sophisticated analyses---a static datarace analysis for Java programs and a static taint analysis for Android apps---show significant improvements on a range of metrics, including false alarm rates and number of bugs found.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Hong:2018:GCO, author = "Changwan Hong and Aravind Sukumaran-Rajam and Jinsung Kim and Prashant Singh Rawat and Sriram Krishnamoorthy and Louis-No{\"e}l Pouchet and Fabrice Rastello and P. Sadayappan", title = "{GPU} code optimization using abstract kernel emulation and sensitivity analysis", journal = j-SIGPLAN, volume = "53", number = "4", pages = "736--751", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192397", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In this paper, we develop an approach to GPU kernel optimization by focusing on identification of bottleneck resources and determining optimization parameters that can alleviate the bottleneck. Performance modeling for GPUs is done by abstract kernel emulation along with latency/gap modeling of resources. Sensitivity analysis with respect to resource latency/gap parameters is used to predict the bottleneck resource for a given kernel's execution. The utility of the bottleneck analysis is demonstrated in two contexts: (1) Coupling the new bottleneck-driven optimization strategy with the OpenTuner auto-tuner: experimental results on all kernels from the Rodinia suite and GPU tensor contraction kernels from the NWChem computational chemistry suite demonstrate effectiveness. (2) Manual code optimization: two case studies illustrate the use of the bottleneck analysis to iteratively improve the performance of code from state-of-the-art domain-specific code generators.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Dathathri:2018:GCO, author = "Roshan Dathathri and Gurbinder Gill and Loc Hoang and Hoang-Vu Dang and Alex Brooks and Nikoli Dryden and Marc Snir and Keshav Pingali", title = "{Gluon}: a communication-optimizing substrate for distributed heterogeneous graph analytics", journal = j-SIGPLAN, volume = "53", number = "4", pages = "752--768", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192404", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces a new approach to building distributed-memory graph analytics systems that exploits heterogeneity in processor types (CPU and GPU), partitioning policies, and programming models. The key to this approach is Gluon, a communication-optimizing substrate. Programmers write applications in a shared-memory programming system of their choice and interface these applications with Gluon using a lightweight API. Gluon enables these programs to run on heterogeneous clusters and optimizes communication in a novel way by exploiting structural and temporal invariants of graph partitioning policies. To demonstrate Gluon's ability to support different programming models, we interfaced Gluon with the Galois and Ligra shared-memory graph analytics systems to produce distributed-memory versions of these systems named D-Galois and D-Ligra, respectively. To demonstrate Gluon's ability to support heterogeneous processors, we interfaced Gluon with IrGL, a state-of-the-art single-GPU system for graph analytics, to produce D-IrGL, the first multi-GPU distributed-memory graph analytics system. Our experiments were done on CPU clusters with up to 256 hosts and roughly 70,000 threads and on multi-GPU clusters with up to 64 GPUs. The communication optimizations in Gluon improve end-to-end application execution time by ~2.6$ \times $ on the average. D-Galois and D-IrGL scale well and are faster than Gemini, the state-of-the-art distributed CPU graph analytics system, by factors of ~3.9$ \times $ and ~4.9$ \times $, respectively, on the average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Acar:2018:HSP, author = "Umut A. Acar and Arthur Chargu{\'e}raud and Adrien Guatto and Mike Rainey and Filip Sieczkowski", title = "Heartbeat scheduling: provable efficiency for nested parallelism", journal = j-SIGPLAN, volume = "53", number = "4", pages = "769--782", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192391", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A classic problem in parallel computing is to take a high-level parallel program written, for example, in nested-parallel style with fork-join constructs and run it efficiently on a real machine. The problem could be considered solved in theory, but not in practice, because the overheads of creating and managing parallel threads can overwhelm their benefits. Developing efficient parallel codes therefore usually requires extensive tuning and optimizations to reduce parallelism just to a point where the overheads become acceptable. In this paper, we present a scheduling technique that delivers provably efficient results for arbitrary nested-parallel programs, without the tuning needed for controlling parallelism overheads. The basic idea behind our technique is to create threads only at a beat (which we refer to as the ``heartbeat'') and make sure to do useful work in between. We specify our heartbeat scheduler using an abstract-machine semantics and provide mechanized proofs that the scheduler guarantees low overheads for all nested parallel programs. We present a prototype C++ implementation and an evaluation that shows that Heartbeat competes well with manually optimized Cilk Plus codes, without requiring manual tuning.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Serrano:2018:GIP, author = "Alejandro Serrano and Jurriaan Hage and Dimitrios Vytiniotis and Simon Peyton Jones", title = "Guarded impredicative polymorphism", journal = j-SIGPLAN, volume = "53", number = "4", pages = "783--796", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192389", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The design space for type systems that support impredicative instantiation is extremely complicated. One needs to strike a balance between expressiveness, simplicity for both the end programmer and the type system implementor, and how easily the system can be integrated with other advanced type system concepts. In this paper, we propose a new point in the design space, which we call guarded impredicativity. Its key idea is that impredicative instantiation in an application is allowed for type variables that occur under a type constructor. The resulting type system has a clean declarative specification --- making it easy for programmers to predict what will type and what will not -, allows for a smooth integration with GHC's OutsideIn(X) constraint solving framework, while giving up very little in terms of expressiveness compared to systems like HMF, HML, FPH and MLF. We give a sound and complete inference algorithm, and prove a principal type property for our system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Bowman:2018:TCC, author = "William J. Bowman and Amal Ahmed", title = "Typed closure conversion for the calculus of constructions", journal = j-SIGPLAN, volume = "53", number = "4", pages = "797--811", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192372", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dependently typed languages such as Coq are used to specify and verify the full functional correctness of source programs. Type-preserving compilation can be used to preserve these specifications and proofs of correctness through compilation into the generated target-language programs. Unfortunately, type-preserving compilation of dependent types is hard. In essence, the problem is that dependent type systems are designed around high-level compositional abstractions to decide type checking, but compilation interferes with the type-system rules for reasoning about run-time terms. We develop a type-preserving closure-conversion translation from the Calculus of Constructions (CC) with strong dependent pairs ($ \Sigma $ types) --- a subset of the core language of Coq --- to a type-safe, dependently typed compiler intermediate language named CC-CC. The central challenge in this work is how to translate the source type-system rules for reasoning about functions into target type-system rules for reasoning about closures. To justify these rules, we prove soundness of CC-CC by giving a model in CC. In addition to type preservation, we prove correctness of separate compilation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Pombrio:2018:ITR, author = "Justin Pombrio and Shriram Krishnamurthi", title = "Inferring type rules for syntactic sugar", journal = j-SIGPLAN, volume = "53", number = "4", pages = "812--825", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192398", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type systems and syntactic sugar are both valuable to programmers, but sometimes at odds. While sugar is a valuable mechanism for implementing realistic languages, the expansion process obscures program source structure. As a result, type errors can reference terms the programmers did not write (and even constructs they do not know), baffling them. The language developer must also manually construct type rules for the sugars, to give a typed account of the surface language. We address these problems by presenting a process for automatically reconstructing type rules for the surface language using rules for the core. We have implemented this theory, and show several interesting case studies.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "PLDI '18 proceedings.", } @Article{Byma:2018:DHP, author = "Stuart Byma and James R. Larus", title = "Detailed heap profiling", journal = j-SIGPLAN, volume = "53", number = "5", pages = "1--13", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210564", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern software systems heavily use the memory heap. As systems grow more complex and compute with increasing amounts of data, it can be difficult for developers to understand how their programs actually use the bytes that they allocate on the heap and whether improvements are possible. To answer this question of heap usage efficiency, we have built a new, detailed heap profiler called Memoro. Memoro uses a combination of static instrumentation, subroutine interception, and runtime data collection to build a clear picture of exactly when and where a program performs heap allocation, and crucially how it actually uses that memory. Memoro also introduces a new visualization application that can distill collected data into scores and visual cues that allow developers to quickly pinpoint and eliminate inefficient heap usage in their software. Our evaluation and experience with several applications demonstrates that Memoro can reduce heap usage and produce runtime improvements of 10\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Tripp:2018:FHP, author = "Charles Tripp and David Hyde and Benjamin Grossman-Ponemon", title = "{FRC}: a high-performance concurrent parallel deferred reference counter for {C++}", journal = j-SIGPLAN, volume = "53", number = "5", pages = "14--28", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present FRC, a high-performance concurrent parallel reference counter for unmanaged languages. It is well known that high-performance garbage collectors help developers write memory-safe, highly concurrent systems and data structures. While C++, C, and other unmanaged languages are used in high-performance applications, adding concurrent memory management to these languages has proven to be difficult. Unmanaged languages like C++ use pointers instead of references, and have uncooperative mutators which do not pause easily at a safe point. Thus, scanning mutator stack root references is challenging. FRC only defers decrements and does not require mutator threads to pause during collection. By deferring only decrements, FRC avoids much of the synchronization overhead of a fully-deferred implementation. Root references are scanned without interrupting the mutator by publishing these references to a thread-local array. FRC's performance can exceed that of the C++ standard library's shared pointer by orders of magnitude. FRC's thread-safety guarantees and low synchronization overhead enable significant throughput gains for concurrently-readable shared data structures. We describe the components of FRC, including our static tree router data structure: a novel barrier which improves the scalability of parallel collection workers. FRC's performance is evaluated on several concurrent data structures. We release FRC and our tests as open-source code and expect FRC will be useful for many concurrent C++ software systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Brandt:2018:DGC, author = "Steven R. Brandt and Hari Krishnan and Costas Busch and Gokarna Sharma", title = "Distributed garbage collection for general graphs", journal = j-SIGPLAN, volume = "53", number = "5", pages = "29--44", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210572", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose a scalable, cycle-collecting, decentralized, reference counting garbage collector with partial tracing. The algorithm is based on the Brownbridge system but uses four different types of references to label edges. Memory usage is O (log n) bits per node, where n is the number of nodes in the graph. The algorithm assumes an asynchronous network model with a reliable reordering channel. It collects garbage in O (E a ) time, where E a is the number of edges in the induced subgraph. The algorithm uses termination detection to manage the distributed computation, a unique identifier to break the symmetry among multiple collectors, and a transaction-based approach when multiple collectors conflict. Unlike existing algorithms, ours is not centralized, does not require barriers, does not require migration of nodes, does not require back-pointers on every edge, and is stable against concurrent mutation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Ismail:2018:HSC, author = "Mohamed Ismail and G. Edward Suh", title = "Hardware-software co-optimization of memory management in dynamic languages", journal = j-SIGPLAN, volume = "53", number = "5", pages = "45--58", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210566", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Dynamic programming languages are becoming increasingly popular, yet often show a significant performance slowdown compared to static languages. In this paper, we study the performance overhead of automatic memory management in dynamic languages. We propose to improve the performance and memory bandwidth usage of dynamic languages by co-optimizing garbage collection overhead and cache performance for newly-initialized and dead objects. Our study shows that less frequent garbage collection results in a large number of cache misses for initial stores to new objects. We solve this problem by directly placing uninitialized objects into on-chip caches without off-chip memory accesses. We further optimize the garbage collection by reducing unnecessary cache pollution and write-backs through partial tracing that invalidates dead objects between full garbage collections. Experimental results on PyPy and V8 show that less frequent garbage collection along with our optimizations can significantly improve the performance of dynamic languages.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Bruno:2018:DVM, author = "Rodrigo Bruno and Paulo Ferreira and Ruslan Synytsky and Tetiana Fydorenchyk and Jia Rao and Hang Huang and Song Wu", title = "Dynamic vertical memory scalability for {OpenJDK} cloud applications", journal = j-SIGPLAN, volume = "53", number = "5", pages = "59--70", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210567", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The cloud is an increasingly popular platform to deploy applications as it lets cloud users to provide resources to their applications as needed. Furthermore, cloud providers are now starting to offer a ``pay-as-you-use'' model in which users are only charged for the resources that are really used instead of paying for a statically sized instance. This new model allows cloud users to save money, and cloud providers to better utilize their hardware. However, applications running on top of runtime environments such as the Java Virtual Machine (JVM) cannot benefit from this new model because they cannot dynamically adapt the amount of used resources at runtime. In particular, if an application needs more memory than what was initially predicted at launch time, the JVM will not allow the application to grow its memory beyond the maximum value defined at launch time. In addition, the JVM will hold memory that is no longer being used by the application. This lack of dynamic vertical scalability completely prevents the benefits of the ``pay-as-you-use'' model, and forces users to over-provision resources, and to lose money on unused resources. We propose a new JVM heap sizing strategy that allows the JVM to dynamically scale its memory utilization according to the application's needs. First, we provide a configurable limit on how much the application can grow its memory. This limit is dynamic and can be changed at runtime, as opposed to the current static limit that can only be set at launch time. Second, we adapt current Garbage Collection policies that control how much the heap can grow and shrink to better fit what is currently being used by the application. The proposed solution is implemented in the OpenJDK 9 HotSpot JVM, the new release of OpenJDK. Changes were also introduced inside the Parallel Scavenge collector and the Garbage First collector (the new by-default collector in HotSpot). Evaluation experiments using real workloads and data show that, with negligible throughput and memory overhead, dynamic vertical memory scalability can be achieved. This allows users to save significant amounts of money by not paying for unused resources, and cloud providers to better utilize their physical machines.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Kaur:2018:OCM, author = "Gurneet Kaur and Keval Vora and Sai Charan Koduru and Rajiv Gupta", title = "{OMR}: out-of-core {MapReduce} for large data sets", journal = j-SIGPLAN, volume = "53", number = "5", pages = "71--83", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210568", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "While single machine MapReduce systems can squeeze out maximum performance from available multi-cores, they are often limited by the size of main memory and can thus only process small datasets. Our experience shows that the state-of-the-art single-machine in-memory MapReduce system Metis frequently experiences out-of-memory crashes. Even though today's computers are equipped with efficient secondary storage devices, the frameworks do not utilize these devices mainly because disk access latencies are much higher than those for main memory. Therefore, the single-machine setup of the Hadoop system performs much slower when it is presented with the datasets which are larger than the main memory. Moreover, such frameworks also require tuning a lot of parameters which puts an added burden on the programmer. In this paper we present OMR, an Out-of-core MapReduce system that not only successfully handles datasets that are far larger than the size of main memory, it also guarantees linear scaling with the growing data sizes. OMR actively minimizes the amount of data to be read/written to/from disk via on-the-fly aggregation and it uses block sequential disk read/write operations whenever disk accesses become necessary to avoid running out of memory. We theoretically prove OMR's linear scalability and empirically demonstrate it by processing datasets that are up to 5x larger than main memory. Our experiments show that in comparison to the standalone single-machine setup of the Hadoop system, OMR delivers far higher performance. Also in contrast to Metis, OMR avoids out-of-memory crashes for large datasets as well as delivers higher performance when datasets are small enough to fit in main memory.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Byrne:2018:MMR, author = "Daniel Byrne and Nilufer Onder and Zhenlin Wang", title = "{mPart}: miss-ratio curve guided partitioning in key--value stores", journal = j-SIGPLAN, volume = "53", number = "5", pages = "84--95", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210571", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Web applications employ key-value stores to cache the data that is most commonly accessed. The cache improves an web application's performance by serving its requests from memory, avoiding fetching them from the backend database. Since the memory space is limited, maximizing the memory utilization is a key to delivering the best performance possible. This has lead to the use of multi-tenant systems, allowing applications to share cache space. In addition, application data access patterns change over time, so the system should be adaptive in its memory allocation. In this work, we address both multi-tenancy (where a single cache is used for multiple applications) and dynamic workloads (changing access patterns) using a model that relates the cache size to the application miss ratio, known as a miss ratio curve. Intuitively, the larger the cache, the less likely the system will need to fetch the data from the database. Our efficient, online construction of the miss ratio curve allows us to determine a near optimal memory allocation given the available system memory, while adapting to changing data access patterns. We show that our model outperforms an existing state-of-the-art sharing model, Memshare, in terms of overall cache hit ratio and does so at a lower time cost. We show that for a typical system, overall hit ratio is consistently 1 percentage point greater and 99.9th percentile latency is reduced by as much as 2.9\% under standard web application workloads containing millions of requests.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Brock:2018:PBS, author = "Jacob Brock and Chen Ding and Rahman Lavaee and Fangzhou Liu and Liang Yuan", title = "Prediction and bounds on shared cache demand from memory access interleaving", journal = j-SIGPLAN, volume = "53", number = "5", pages = "96--108", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210565", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Cache in multicore machines is often shared, and the cache performance depends on how memory accesses belonging to different programs interleave with one another. The full range of performance possibilities includes all possible interleavings, which are too numerous to be studied by experiments for any mix of non-trivial programs. This paper presents a theory to characterize the effect of memory access interleaving due to parallel execution of non-data-sharing programs. The theory uses an established metric called the footprint (which can be used to calculate miss ratios in fully-associative LRU caches) to measure cache demand, and considers the full range of interleaving possibilities. The paper proves a lower bound for footprints of interleaved traces, and then formulates an upper bound in terms of the footprints of the constituent traces. It also shows the correctness of footprint composition used in a number of existing techniques, and places precise bounds on its accuracy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Horie:2018:BDQ, author = "Michihiro Horie and Hiroshi Horii and Kazunori Ogata and Tamiya Onodera", title = "Balanced double queues for {GC} work-stealing on weak memory models", journal = j-SIGPLAN, volume = "53", number = "5", pages = "109--119", month = may, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299706.3210570", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Work-stealing is promising for scheduling and balancing parallel workloads. It has a wide range of applicability on middleware, libraries, and runtime systems of programming languages. OpenJDK uses work-stealing for copying garbage collection (GC) to balance copying tasks among GC threads. Each thread has its own queue to store tasks. When a thread has no task in its queue, it acts as a thief and attempts to steal a task from another thread's queue. However, this work-stealing algorithm requires expensive memory fences for pushing, popping, and stealing tasks, especially on weak memory models such as POWER and ARM. To address this problem, we propose a work-stealing algorithm that uses double queues. Each GC thread has a public queue that is accessible from other GC threads and a private queue that is only accessible by itself. Pushing and popping tasks in the private queue are free from expensive memory fences. The most significant point in our algorithm is providing a mechanism to maintain the load balance on the basis of the use of double queues. We developed a prototype implementation for parallel GC in OpenJDK8 for ppc64le. We evaluated our algorithm by using SPECjbb2015, SPECjvm2008, TPC-DS, and Apache DayTrader.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "ISMM '18 proceedings.", } @Article{Santos:2018:MBD, author = "Rodrigo C. M. Santos and Guilherme F. Lima and Francisco Sant'Anna and Roberto Ierusalimschy and Edward H. Haeusler", title = "A memory-bounded, deterministic and terminating semantics for the synchronous programming language {C{\'e}u}", journal = j-SIGPLAN, volume = "53", number = "6", pages = "1--18", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211334", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "C{\'e}u is a synchronous programming language for embedded soft real-time systems. It focuses on control-flow safety features, such as safe shared-memory concurrency and safe abortion of lines of execution, while enforcing memory bounded, deterministic, and terminating reactions to the environment. In this work, we present a small-step structural operational semantics for C{\'e}u and a proof that reactions have the properties enumerated above: that for a given arbitrary timeline of input events, multiple executions of the same program always react in bounded time and arrive at the same final finite memory state.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Devine:2018:MCI, author = "James Devine and Joe Finney and Peli de Halleux and Micha{\l} Moskal and Thomas Ball and Steve Hodges", title = "{MakeCode} and {CODAL}: intuitive and efficient embedded systems programming for education", journal = j-SIGPLAN, volume = "53", number = "6", pages = "19--30", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211335", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Across the globe, it is now commonplace for educators to engage in the making (design and development) of embedded systems in the classroom to motivate and excite their students. This new domain brings its own set of unique requirements. Historically, embedded systems development requires knowledge of low-level programming languages, local installation of compilation toolchains, device drivers, and applications. For students and educators, these requirements can introduce insurmountable barriers. We present the motivation, requirements, implementation, and evaluation of a new programming platform that enables novice users to create software for embedded systems. The platform has two major components: (1) Microsoft MakeCode ( www.makecode.com ), a web app that encapsulates an entire beginner IDE for microcontrollers; and (2) CODAL, an efficient component-oriented C++ runtime for microcontrollers. We show how MakeCode and CODAL provide an accessible, cross-platform, installation-free programming experience for the BBC micro:bit and other embedded devices.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Taylor:2018:ADL, author = "Ben Taylor and Vicent Sanz Marco and Willy Wolff and Yehia Elkhatib and Zheng Wang", title = "Adaptive deep learning model selection on embedded systems", journal = j-SIGPLAN, volume = "53", number = "6", pages = "31--43", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211336", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The recent ground-breaking advances in deep learning networks (DNNs) make them attractive for embedded systems. However, it can take a long time for DNNs to make an inference on resource-limited embedded devices. Offloading the computation into the cloud is often infeasible due to privacy concerns, high latency, or the lack of connectivity. As such, there is a critical need to find a way to effectively execute the DNN models locally on the devices. This paper presents an adaptive scheme to determine which DNN model to use for a given input, by considering the desired accuracy and inference time. Our approach employs machine learning to develop a predictive model to quickly select a pre-trained DNN to use for a given input and the optimization constraint. We achieve this by first training off-line a predictive model, and then use the learnt model to select a DNN model to use for new, unseen inputs. We apply our approach to the image classification task and evaluate it on a Jetson TX2 embedded deep learning platform using the ImageNet ILSVRC 2012 validation dataset. We consider a range of influential DNN models. Experimental results show that our approach achieves a 7.52\% improvement in inference accuracy, and a 1.8x reduction in inference time over the most-capable single DNN model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Han:2018:ORS, author = "Lei Han and Zhaoyan Shen and Zili Shao and Tao Li", title = "Optimizing {RAID\slash SSD} controllers with lifetime extension for flash-based {SSD} array", journal = j-SIGPLAN, volume = "53", number = "6", pages = "44--54", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211338", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Flash-based SSD RAID arrays are increasingly being deployed in data centers. Compared with HDD arrays, SSD arrays drastically enhance storage density and I/O performance, and reduce power and rack space. Nevertheless, SSDs suffer aging issues. Though prior studies have been conducted to address this disadvantage, effective techniques of RAID/SSD controllers are urgently needed to extend the lifetime of SSD arrays. In this paper, we for the first time apply approximate storage via the interplay of RAID and SSD controllers to optimize the lifespan of SSD arrays. Our basic idea is to reuse faulty blocks (those contain pages with uncorrectable errors) to store approximate data (which can tolerate more errors). By relaxing the integrity of flash blocks, we observed that the endurance of NAND flash memory can be significantly boosted, thereby providing huge potentials to significantly extend the lifetime of SSDs. Based on this observation, we propose the use of an efficient space management scheme for data allocation and FTL strategies by coordinating the interplay of RAID and SSD controllers to optimize the lifetime of SSD arrays. We implemented a prototype, called FreeRAID, based on an SSD array simulator. Our experiments show that we can significantly increase the lifetime by up to 2.17$ \times $ compared with conventional SSD-based RAID arrays.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Skelin:2018:CSA, author = "Mladen Skelin and Marc Geilen", title = "Compositionality in scenario-aware dataflow: a rendezvous perspective", journal = j-SIGPLAN, volume = "53", number = "6", pages = "55--64", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211339", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Finite-state machine-based scenario-aware dataflow (FSM-SADF) is a dynamic dataflow model of computation that combines streaming data and finite-state control. For the most part, it preserves the determinism of its underlying synchronous dataflow (SDF) concurrency model and only when necessary introduces the non-deterministic variation in terms of scenarios that are represented by SDF graphs. This puts FSM-SADF in a sweet spot in the trade-off space between expressiveness and analyzability. However, FSM-SADF supports no notion of compositionality, which hampers its usability in modeling and consequent analysis of large systems. In this work we propose a compositional semantics for FSM-SADF that overcomes this problem. We base the semantics of the composition on standard composition of processes with rendezvous communication in the style of CCS or CSP at the control level and the parallel, serial and feedback composition of SDF graphs at the dataflow level. We evaluate the approach on a case study from the multimedia domain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Stokes:2018:DAG, author = "Michael Stokes and Ryan Baird and Zhaoxiang Jin and David Whalley and Soner Onder", title = "Decoupling address generation from loads and stores to improve data access energy efficiency", journal = j-SIGPLAN, volume = "53", number = "6", pages = "65--75", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211340", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Level-one data cache (L1 DC) accesses impact energy usage as they frequently occur and use significantly more energy than register file accesses. A memory access instruction consists of an address generation operation calculating the location where the data item resides in memory and the data access operation that loads/stores a value from/to that location. We propose to decouple these two operations into separate machine instructions to reduce energy usage. By associating the data translation lookaside buffer (DTLB) access and level-one data cache (L1 DC) tag check with an address generation instruction, only a single data array in a set-associative L1 DC needs to be accessed during a load instruction when the result of the tag check is known at that point. In addition, many DTLB accesses and L1 DC tag checks are avoided by memoizing the DTLB way and L1 DC way with the register that holds the memory address to be dereferenced. Finally, we are able to often coalesce an ALU operation with a load or store data access using our technique to reduce the number of instructions executed.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Egger:2018:VCG, author = "Bernhard Egger and Eunjin Song and Hochan Lee and Daeyoung Shin", title = "Verification of coarse-grained reconfigurable arrays through random test programs", journal = j-SIGPLAN, volume = "53", number = "6", pages = "76--88", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211342", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We propose and evaluate a framework to test the functional correctness of coarse-grained reconfigurable array (CGRA) processors for pre-silicon verification and post-silicon validation. To reflect the reconfigurable nature of CGRAs, an architectural model of the system under test is built directly from the hardware description files. A guided place-and-routing algorithm is used to map operations and operands onto the heterogeneous processing elements (PE). Test coverage is maximized by favoring unexercised parts of the architecture. Requiring no explicit knowledge about the semantics of operations, the random test program generator (RTPG) framework seamlessly supports custom ISA extensions. The proposed framework is applied to the Samsung Reconfigurable Processor, a modulo-scheduled CGRA integrated in smartphones, cameras, printers, and smart TVs. Experiments demonstrate that the RTPG is versatile, efficient, and quickly achieves a high coverage. In addition to detecting all randomly inserted faults, the generated test programs also exposed two yet unknown actual faults in the architecture.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Chang:2018:DNN, author = "Andre Xian Ming Chang and Aliasger Zaidy and Lukasz Burzawa and Eugenio Culurciello", title = "Deep neural networks compiler for a trace-based accelerator (short {WIP} paper)", journal = j-SIGPLAN, volume = "53", number = "6", pages = "89--93", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211333", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Deep Neural Networks (DNNs) are the algorithm of choice for image processing applications. DNNs present highly parallel workloads that lead to the emergence of custom hardware accelerators. Deep Learning (DL) models specialized in different tasks require a programmable custom hardware and a compiler/mapper to efficiently translate different DNNs into an efficient dataflow in the accelerator. The goal of this paper is to present a compiler for running DNNs on Snowflake, which is a programmable hardware accelerator that targets DNNs. The compiler correctly generates instructions for various DL models: AlexNet, VGG, ResNet and LightCNN9. Snowflake, with a varying number of processing units, was implemented on FPGA to measure the compiler and Snowflake performance properties upon scaling up. The system achieves 70 frames/s and 4.5 GB/s of off-chip memory bandwidth for AlexNet without linear layers on Xilinx's Zynq-SoC XC7Z045 FPGA.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{SantAnna:2018:TSL, author = "Francisco Sant'Anna and Alexandre Sztajnberg and Ana L{\'u}cia de Moura and Noemi Rodrigues", title = "Transparent standby for low-power, resource-constrained embedded systems: a programming language-based approach (short {WIP} paper)", journal = j-SIGPLAN, volume = "53", number = "6", pages = "94--98", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211337", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Standby efficiency for connected devices is one of the priorities of the G20's Energy Efficiency Action Plan. We propose transparent programming language mechanisms to enforce that applications remain in the deepest standby modes for the longest periods of time. We extend the programming language C{\'e}u with support for interrupt service routines and with a simple power management runtime. Based on these primitives, we also provide device drivers that allow applications to take advantage of standby automatically. Our approach relies on the synchronous semantics of the language which guarantees that reactions to the environment always reach an idle state amenable to standby. In addition, in order to lower the programming barrier of adoption, we show that programs in C{\'e}u can keep a sequential syntactic structure, even when applications require non-trivial concurrent behavior.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Chimdyalwar:2018:SRP, author = "Bharti Chimdyalwar and Priyanka Darke", title = "Statically relating program properties for efficient verification (short {WIP} paper)", journal = j-SIGPLAN, volume = "53", number = "6", pages = "99--103", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211341", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Efficient automatic verification of real world embedded software with numerous properties is a challenge. Existing techniques verify a sufficient subset of properties by identifying implication relations between their verification outcomes. We believe this is expensive and propose a novel complementary approach called grouping. Grouping does not consider the verification outcomes but uses data and control flow characteristics of the program to create disjoint groups of properties verifiable one group at a time.We present three grouping techniques, a framework, and experiments over open source and industrial applications to support our thesis. The experiments show a high gain in performance of a few state-of-the-art tools. This led to the integration of grouping into the verification process of an automotive software manufacturer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Chadha:2018:JAS, author = "Gaurav Chadha", title = "{JSCore}: architectural support for accelerating {JavaScript} execution (short {WIP} paper)", journal = j-SIGPLAN, volume = "53", number = "6", pages = "104--108", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211343", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "JavaScript has seen meteoric growth in popularity as it has increasingly become the language of choice for developers, both for front-end web development and server code development through various JavaScript frameworks and Node.js. Part of the reason for its wide use is that it is a prototype based language with dynamic types, making it easy to learn and program in. This flexibility and ease of programming comes at the cost of performance. There are two sources of significant slowdown. First, since the number and type of properties of prototypes is dynamic, accessing a property involves a slow dictionary lookup, as opposed to it being present at a fixed offset from the base address. Second, the dynamism in type of values necessitates wrapping and unwrapping of values into objects with a variety of checks including for type of the value. To mitigate these performance problems, this paper proposes JSCore, a core specialized for JavaScript execution, that vastly reduces the performance degradation due to the above two causes. It uses a hardware lookup table to accelerate property access, and extends the data path to store data types with the data, nearly eliminating the second source of slowdown. Combining the two, JSCore accelerates real world JavaScript applications by 23\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Mehrotra:2018:OSR, author = "Pavan Mehrotra and Sabar Dasgupta and Samantha Robertson and Paul Nuyujukian", title = "An open-source realtime computational platform (short {WIP} paper)", journal = j-SIGPLAN, volume = "53", number = "6", pages = "109--112", month = jun, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299710.3211344", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Systems neuroscience studies involving in-vivo models often require realtime data processing. In these studies, many events must be monitored and processed quickly, including behavior of the subject (e.g., movement of a limb) or features of neural data (e.g., a neuron transmitting an action potential). Unfortunately, most realtime platforms are proprietary, require specific architectures, or are limited to low-level programming languages. Here we present a hardware-independent, open-source realtime computation platform that supports high-level programming. The resulting platform, LiCoRICE, can process on order 10e10 bits/sec of network data at 1 ms ticks with 18.2 \micro s jitter. It connects to various inputs and outputs (e.g., DIO, Ethernet, database logging, and analog line in/out) and minimizes reliance on custom device drivers by leveraging peripheral support via the Linux kernel. Its modular architecture supports model-based design for rapid prototyping with C and Python/Cython and can perform numerical operations via BLAS/LAPACK-optimized NumPy that is statically compiled via Numba's pycc. LiCoRICE is not only suitable for systems neuroscience research, but also for applications requiring closed-loop realtime data processing from robotics and control systems to interactive applications and quantitative financial trading.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "LCTES '18 proceedings.", } @Article{Mista:2018:BPQ, author = "Agust{\'\i}n Mista and Alejandro Russo and John Hughes", title = "Branching processes for {QuickCheck} generators", journal = j-SIGPLAN, volume = "53", number = "7", pages = "1--13", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242747", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In QuickCheck (or, more generally, random testing), it is challenging to control random data generators' distributions---specially when it comes to user-defined algebraic data types (ADT). In this paper, we adapt results from an area of mathematics known as branching processes, and show how they help to analytically predict (at compile-time) the expected number of generated constructors, even in the presence of mutually recursive or composite ADTs. Using our probabilistic formulas, we design heuristics capable of automatically adjusting probabilities in order to synthesize generators which distributions are aligned with users' demands. We provide a Haskell implementation of our mechanism in a tool called DRaGeN and perform case studies with real-world applications. When generating random values, our synthesized QuickCheck generators show improvements in code coverage when compared with those automatically derived by state-of-the-art tools.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Breitner:2018:PCP, author = "Joachim Breitner", title = "A promise checked is a promise kept: inspection testing", journal = j-SIGPLAN, volume = "53", number = "7", pages = "14--25", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242748", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Occasionally, developers need to ensure that the compiler treats their code in a specific way that is only visible by inspecting intermediate or final compilation artifacts. This is particularly common with carefully crafted compositional libraries, where certain usage patterns are expected to trigger an intricate sequence of compiler optimizations --- stream fusion is a well-known example. The developer of such a library has to manually inspect build artifacts and check for the expected properties. Because this is too tedious to do often, it will likely go unnoticed if the property is broken by a change to the library code, its dependencies or the compiler. The lack of automation has led to released versions of such libraries breaking their documented promises. This indicates that there is an unrecognized need for a new testing paradigm, inspection testing, where the programmer declaratively describes non-functional properties of an compilation artifact and the compiler checks these properties. We define inspection testing abstractly, implement it in the context of the Haskell Compiler GHC and show that it increases the quality of such libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Handley:2018:ACT, author = "Martin A. T. Handley and Graham Hutton", title = "{AutoBench}: comparing the time performance of {Haskell} programs", journal = j-SIGPLAN, volume = "53", number = "7", pages = "26--37", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242749", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Two fundamental goals in programming are correctness (producing the right results) and efficiency (using as few resources as possible). Property-based testing tools such as QuickCheck provide a lightweight means to check the correctness of Haskell programs, but what about their efficiency? In this article, we show how QuickCheck can be combined with the Criterion benchmarking library to give a lightweight means to compare the time performance of Haskell programs. We present the design and implementation of the AutoBench system, demonstrate its utility with a number of case studies, and find that many QuickCheck correctness properties are also efficiency improvements.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Sun:2018:AMB, author = "Marilyn Sun and Kathleen Fisher", title = "{Autobahn 2.0}: minimizing bangs while maintaining performance (system demonstration)", journal = j-SIGPLAN, volume = "53", number = "7", pages = "38--40", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3264734", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Lazy evaluation has many advantages, but it can cause bad performance. Consequently, Haskell allows users to force eager evaluation at certain program points by inserting strictness annotations, known and written as bangs (!). Unfortunately, manual bang placement is difficult. Autobahn 1.0 uses a genetic algorithm to infer bang annotations that improve performance. However, Autobahn 1.0 often generates large numbers of superfluous bangs, which is problematic because users must inspect each such bang to determine whether it is safe. We introduce Autobahn 2.0, which uses GHC profiling information to reduce the number of superfluous bangs. When evaluated on the NoFib benchmark suite, Autobahn 2.0 reduced the number of inferred bangs by 90.2\% on average, while only degrading program performance by 15.7\% compared with the performance produced by Autobahn 1.0. In a case study on a garbage collection simulator, Autobahn 2.0 eliminated 81.8\% of the recommended bangs, with the same 15.7\% optimization degradation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Serrano:2018:GPA, author = "Alejandro Serrano and Victor Cacciari Miraldo", title = "Generic programming of all kinds", journal = j-SIGPLAN, volume = "53", number = "7", pages = "41--54", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242745", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Datatype-generic programming is a widely used technique to define functions that work regularly over a class of datatypes. Examples include deriving serialization of data, equality or even functoriality. The state-of-the-art of generic programming still lacks handling GADTs, multiple type variables, and some other features. This paper exploits modern GHC extensions, including {\tt preTypeInType}, to handle arbitrary number of type variables, constraints, and existentials. We also provide an Agda model of our construction that does not require Russel's paradox, proving the construction is consistent.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Blondal:2018:DHT, author = "Baldur Bl{\"o}ndal and Andres L{\"o}h and Ryan Scott", title = "{Deriving Via}: or, how to turn hand-written instances into an anti-pattern", journal = j-SIGPLAN, volume = "53", number = "7", pages = "55--67", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242746", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Haskell's deriving construct is a cheap and cheerful way to quickly generate instances of type classes that follow common patterns. But at present, there is only a subset of such type class patterns that deriving supports, and if a particular class lies outside of this subset, then one cannot derive it at all, with no alternative except for laboriously declaring the instances by hand. To overcome this deficit, we introduce Deriving Via, an extension to deriving that enables programmers to compose instances from named programming patterns, thereby turning deriving into a high-level domain-specific language for defining instances. Deriving Via leverages newtypes---an already familiar tool of the Haskell trade---to declare recurring patterns in a way that both feels natural and allows a high degree of abstraction.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Martinez:2018:ITR, author = "Guido Mart{\'\i}nez and Mauro Jaskelioff and Guido {De Luca}", title = "Improving typeclass relations by being open", journal = j-SIGPLAN, volume = "53", number = "7", pages = "68--80", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242751", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Mathematical concepts such as monads, functors, monoids, and semigroups are expressed in Haskell as typeclasses. Therefore, in order to exploit relations such as ``every monad is a functor'', and ``every monoid is a semigroup'', we need to be able to also express relations between typeclasses. Currently, the only way to do so is using superclasses. However, superclasses can be problematic due to their closed nature. Adding a superclass implies modifying the subclass' definition, which is either impossible if one does not own such code, or painful as it requires cascading changes and the introduction of boilerplate throughout the codebase. In this article, we introduce class morphisms, a way to relate classes in an open fashion, without changing class definitions. We show how class morphisms improve the expressivity, conciseness, and maintainability of code. Further, we show how to implement them while maintaining canonicity and coherence, two key properties of the Haskell type system. Extending a typechecker with class morphisms amounts to adding an elaboration phase and is an unintrusive change. We back this claim with a prototype extension of GHC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Winant:2018:CED, author = "Thomas Winant and Dominique Devriese", title = "Coherent explicit dictionary application for {Haskell}", journal = j-SIGPLAN, volume = "53", number = "7", pages = "81--93", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242752", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type classes are one of Haskell's most popular features and extend its type system with ad-hoc polymorphism. Since their conception, there were useful features that could not be offered because of the desire to offer two correctness properties: coherence and global uniqueness of instances. Coherence essentially guarantees that program semantics are independent from type-checker internals. Global uniqueness of instances is relied upon by libraries for enforcing, for example, that a single order relation is used for all manipulations of an ordered binary tree. The features that could not be offered include explicit dictionary application and local instances, which would be highly useful in practice. In this paper, we propose a new design for offering explicit dictionary application, without compromising coherence and global uniqueness. We introduce a novel criterion based on GHC's type argument roles to decide when a dictionary application is safe with respect to global uniqueness of instances. We preserve coherence by detecting potential sources of incoherence, and prove it formally. Moreover, our solution makes it possible to use local dictionaries. In addition to developing our ideas formally, we have implemented a working prototype in GHC.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Eisenberg:2018:TVP, author = "Richard A. Eisenberg and Joachim Breitner and Simon Peyton Jones", title = "Type variables in patterns", journal = j-SIGPLAN, volume = "53", number = "7", pages = "94--105", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242753", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "For many years, GHC has implemented an extension to Haskell that allows type variables to be bound in type signatures and patterns, and to scope over terms. This extension was never properly specified. We rectify that oversight here. With the formal specification in hand, the otherwise-labyrinthine path toward a design for binding type variables in patterns becomes blindingly clear. We thus extend ScopedTypeVariables to bind type variables explicitly, obviating the Proxy workaround to the dustbin of history.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Otwani:2018:TPY, author = "Divesh Otwani and Richard A. Eisenberg", title = "The {Thoralf} plugin: for your fancy type needs", journal = j-SIGPLAN, volume = "53", number = "7", pages = "106--118", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many fancy types (e.g., generalized algebraic data types, type families) require a type checker plugin. These fancy types have a type index (e.g., type level natural numbers) with an equality relation that is difficult or impossible to represent using GHC's built-in type equality. The most practical way to represent these equality relations is through a plugin that asserts equality constraints. However, such plugins are difficult to write and reason about. In this paper, we (1) present a formal theory of reasoning about the correctness of type checker plugins for type indices, and, (2) apply this theory in creating Thoralf, a generic and extensible plugin for type indices that translates GHC constraint problems to queries to an external SMT solver. By ``generic and extensible'', we mean the restrictions on extending Thoralf are slight, and, if some type index could be encoded as an SMT sort, then a programmer could extend Thoralf by providing this encoding function.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Noonan:2018:GDP, author = "Matt Noonan", title = "Ghosts of departed proofs (functional pearl)", journal = j-SIGPLAN, volume = "53", number = "7", pages = "119--131", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242755", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Library authors often are faced with a design choice: should a function with preconditions be implemented as a partial function, or by returning a failure condition on incorrect use? Neither option is ideal. Partial functions lead to frustrating run-time errors. Failure conditions must be checked at the use-site, placing an unfair tax on the users who have ensured that the function's preconditions were correctly met. In this paper, we introduce an API design concept called ``ghosts of departed proofs'' based on the following observation: sophisticated preconditions can be encoded in Haskell's type system with no run-time overhead, by using proofs that inhabit phantom type parameters attached to newtype wrappers. The user expresses correctness arguments by constructing proofs to inhabit these phantom types. Critically, this technique allows the library user to decide when and how to validate that the API's preconditions are met. The ``ghosts of departed proofs'' approach to API design can achieve many of the benefits of dependent types and refinement types, yet only requires some minor and well-understood extensions to Haskell 2010. We demonstrate the utility of this approach through a series of case studies, showing how to enforce novel invariants for lists, maps, graphs, shared memory regions, and more.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Vazou:2018:TPA, author = "Niki Vazou and Joachim Breitner and Rose Kunkel and David {Van Horn} and Graham Hutton", title = "Theorem proving for all: equational reasoning in liquid {Haskell} (functional pearl)", journal = j-SIGPLAN, volume = "53", number = "7", pages = "132--144", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242756", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Equational reasoning is one of the key features of pure functional languages such as Haskell. To date, however, such reasoning always took place externally to Haskell, either manually on paper, or mechanised in a theorem prover. This article shows how equational reasoning can be performed directly and seamlessly within Haskell itself, and be checked using Liquid Haskell. In particular, language learners --- to whom external theorem provers are out of reach --- can benefit from having their proofs mechanically checked. Concretely, we show how the equational proofs and derivations from Graham's textbook can be recast as proofs in Haskell (spoiler: they look essentially the same).", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Barenz:2018:RFT, author = "Manuel B{\"a}renz and Ivan Perez", title = "{Rhine}: {FRP} with type-level clocks", journal = j-SIGPLAN, volume = "53", number = "7", pages = "145--157", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242757", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Processing data at different rates is generally a hard problem in reactive programming. Buffering problems, lags, and concurrency issues often occur. Many of these problems are clock errors, where data at different rates is combined incorrectly. Techniques to avoid clock errors, such as type-level clocks and deterministic scheduling, exist in the field of synchronous programming, but are not implemented in general-purpose languages like Haskell. Rhine is a clock-safe library for synchronous and asynchronous Functional Reactive Programming (FRP). It separates the aspects of clocking, scheduling and resampling from each other, and ensures clock-safety at the type level. Concurrent communication is encapsulated safely. Diverse reactive subsystems can be combined in a coherent, declarative data-flow framework, while correct interoperability of data at different rates is guaranteed by type-level clocks. This provides a general-purpose framework that simplifies multi-rate FRP systems and can be used for game development, media applications, GUIs and embedded systems, through a flexible API with many reusable components.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Matsuda:2018:EIL, author = "Kazutaka Matsuda and Meng Wang", title = "Embedding invertible languages with binders: a case of the {FliPpr} language", journal = j-SIGPLAN, volume = "53", number = "7", pages = "158--171", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242758", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes a new embedding technique of invertible programming languages, through the case of the FliPpr language. Embedded languages have the advantage of inheriting host languages' features and supports; and one of the influential methods of embedding is the tagless-final style, which enables a high level of programmability and extensibility. However, it is not straightforward to apply the method to the family of invertible/reversible/bidirectional languages, due to the different ways functions in such domains are represented. We consider FliPpr, an invertible pretty-printing system, as a representative of such languages, and show that Atkey et al.'s unembedding technique can be used to address the problem. Together with a reformulation of FliPpr, our embedding achieves a high level of interoperability with the host language Haskell, which is not found in any other invertible languages. We implement the idea and demonstrate the benefits of the approach with examples.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Han:2018:HPM, author = "Dong Han and Tao He", title = "A high-performance multicore {IO} manager based on {\tt libuv} (experience report)", journal = j-SIGPLAN, volume = "53", number = "7", pages = "172--178", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242759", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a high performance multicore I/O manager based on libuv for Glasgow Haskell Compiler (GHC). The new I/O manager is packaged as an ordinary Haskell package rather than baked into GHC's runtime system(GHC RTS), yet takes advantage of GHC RTS's comprehensive concurrent support, such as lightweight threads and safe/unsafe FFI options. The new I/O manager's performance is comparable with existing implementation, with greater stability under high load. It also can be easily extended to support all of libuv's callback-based APIs, allowing us to write a complete high performance I/O toolkit without spending time on dealing with OS differences or low-level I/O system calls.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Gissurarson:2018:SVH, author = "Matth{\'\i}as P{\'a}ll Gissurarson", title = "Suggesting valid hole fits for typed-holes (experience report)", journal = j-SIGPLAN, volume = "53", number = "7", pages = "179--185", month = jul, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3299711.3242760", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Type systems allow programmers to communicate a partial specification of their program to the compiler using types, which can then be used to check that the implementation matches the specification. But can the types be used to aid programmers during development? In this experience report I describe the design and implementation of my lightweight and practical extension to the typed-holes of GHC that improves user experience by adding a list of valid hole fits and refinement hole fits to the error message of typed-holes. By leveraging the type checker, these fits are selected from identifiers in scope such that if the hole is substituted with a valid hole fit, the resulting expression is guaranteed to type check.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", remark = "Haskell '18 proceedings.", } @Article{Wilson:2018:BGT, author = "Preston Tunnell Wilson and Ben Greenman and Justin Pombrio and Shriram Krishnamurthi", title = "The behavior of gradual types: a user study", journal = j-SIGPLAN, volume = "53", number = "8", pages = "1--12", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276947", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276947", abstract = "There are several different gradual typing semantics, reflecting different trade-offs between performance and type soundness guarantees. Notably absent, however, are any data on which of these semantics developers actually prefer. We begin to rectify \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Bodin:2018:TMF, author = "Martin Bodin and Tom{\'a}s Diaz and {\'E}ric Tanter", title = "A trustworthy mechanized formalization of {R}", journal = j-SIGPLAN, volume = "53", number = "8", pages = "13--24", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276946", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276946", abstract = "The R programming language is very popular for developing statistical software and data analysis, thanks to rich libraries, concise and expressive syntax, and support for interactive programming. Yet, the semantics of R is fairly complex, contains many subtle corner cases, and is not formally specified. This makes it difficult to reason about R programs. In this work, we develop a big-step operational semantics for R in the form of an interpreter written in the Coq proof assistant. We ensure the trustworthiness of the formalization by introducing a monadic encoding that allows the Coq interpreter, CoqR, to be in direct visual correspondence with the reference R interpreter, GNU R. Additionally, we provide a testing framework that supports systematic comparison of CoqR and GNU R. In its current state, CoqR covers the nucleus of the R language as well as numerous additional features, making it pass a significant number of realistic test cases from the GNU R and FastR projects. To exercise the formal specification, we prove in Coq the preservation of memory invariants in selected parts of the interpreter. This work is an important first step towards a robust environment for formal verification of R programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Marron:2018:LLC, author = "Mark Marron", title = "Log++ logging for a cloud-native world", journal = j-SIGPLAN, volume = "53", number = "8", pages = "25--36", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276952", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276952", abstract = "Logging is a fundamental part of the software development and deployment lifecycle but logging support is often provided as an afterthought via limited library APIs or third-party modules. Given the critical nature of logging in modern cloud, mobile, \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chen:2018:HBA, author = "Hanfeng Chen and Joseph Vinish D'Silva and Hongji Chen and Bettina Kemme and Laurie Hendren", title = "{HorseIR}: bringing array programming languages together with database query processing", journal = j-SIGPLAN, volume = "53", number = "8", pages = "37--49", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276951", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276951", abstract = "Relational database management systems (RDBMS) are operationally similar to a dynamic language processor. They take SQL queries as input, dynamically generate an optimized execution plan, and then execute it. In recent decades, the emergence of in-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Serrano:2018:JAC, author = "Manuel Serrano", title = "{JavaScript AOT} compilation", journal = j-SIGPLAN, volume = "53", number = "8", pages = "50--63", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276950", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276950", abstract = "Static compilation, a.k.a., ahead-of-time (AOT) compilation, is an alternative approach to JIT compilation that can combine good speed and lightweight memory footprint, and that can accommodate read-only memory constraints that are imposed by some devices and some operating systems. Unfortunately the highly dynamic nature of JavaScript makes it hard to compile statically and all existing AOT compilers have either gave up on good performance or full language support. We have designed and implemented an AOT compiler that aims at satisfying both. It supports full unrestricted ECMAScript 5.1 plus many ECMAScript 2017 features and the majority of benchmarks are within 50\% of the performance of one of the fastest JIT compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Seginer:2018:QBO, author = "Yoav Seginer and Theo Vosse and Gil Harari and Uri Kolodny", title = "Query-based object-oriented programming: a declarative web of objects", journal = j-SIGPLAN, volume = "53", number = "8", pages = "64--75", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276949", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276949", abstract = "We present a declarative, object-oriented language in which queries play a central role. Queries are used not only to access data, but also to refer to the application's object members and as a means of program control. The language is fully declarative,. \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Chari:2018:SCD, author = "Guido Chari and Javier Pim{\'a}s and Jan Vitek and Olivier Fl{\"u}ckiger", title = "Self-contained development environments", journal = j-SIGPLAN, volume = "53", number = "8", pages = "76--87", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276948", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276948", abstract = "Operating systems are traditionally implemented in low- level, performance-oriented programming languages. These languages typically rely on minimal runtime support and provide unfettered access to the underlying hardware. Tradition has benefits: \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Herrera:2018:NCW, author = "David Herrera and Hanfeng Chen and Erick Lavoie and Laurie Hendren", title = "Numerical computing on the web: benchmarking for the future", journal = j-SIGPLAN, volume = "53", number = "8", pages = "88--100", month = oct, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393673.3276968", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276968", abstract = "Recent advances in execution environments for JavaScript and WebAssembly that run on a broad range of devices, from workstations and mobile phones to IoT devices, provide new opportunities for portable and web-based numerical computing. Indeed, numerous \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Smeltzer:2018:DSL, author = "Karl Smeltzer and Martin Erwig", title = "A domain-specific language for exploratory data visualization", journal = j-SIGPLAN, volume = "53", number = "9", pages = "1--13", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278138", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278138", abstract = "With an ever-growing amount of collected data, the importance of visualization as an analysis component is growing in concert. The creation of good visualizations often doesn't happen in one step but is rather an iterative and exploratory process. \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Stucki:2018:PUM, author = "Nicolas Stucki and Aggelos Biboudis and Martin Odersky", title = "A practical unification of multi-stage programming and macros", journal = j-SIGPLAN, volume = "53", number = "9", pages = "14--27", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278139", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278139", abstract = "Program generation is indispensable. We propose a novel unification of two existing metaprogramming techniques: multi-stage programming and hygienic generative macros. The former supports runtime code generation and execution in a type-safe manner while \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Hatch:2018:RRI, author = "William Gallard Hatch and Matthew Flatt", title = "{Rash}: from reckless interactions to reliable programs", journal = j-SIGPLAN, volume = "53", number = "9", pages = "28--39", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278129", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278129", abstract = "Command languages like the Bourne Shell provide a terse syntax for exploratory programming and system interaction. Shell users can begin to write programs that automate their tasks by simply copying their interactions verbatim into a script file. \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Soares:2018:EFI, author = "Larissa Rocha Soares and Jens Meinicke and Sarah Nadi and Christian K{\"a}stner and Eduardo Santana de Almeida", title = "Exploring feature interactions without specifications: a controlled experiment", journal = j-SIGPLAN, volume = "53", number = "9", pages = "40--52", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278127", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278127", abstract = "In highly configurable systems, features may interact unexpectedly and produce faulty behavior. Those faults are not easily identified from the analysis of each feature separately, especially when feature specifications are missing. We propose \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Khalaj:2018:IOD, author = "Ebrahim Khalaj and Marwan Abi-Antoun", title = "Inferring ownership domains from refinements", journal = j-SIGPLAN, volume = "53", number = "9", pages = "53--65", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278128", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278128", abstract = "Ownership type qualifiers clarify aliasing invariants that cannot be directly expressed in mainstream programming languages. Adding qualifiers to code, however, often involves significant overhead and difficult interaction. We propose an analysis to \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Volanschi:2018:ISC, author = "Nic Volanschi and Bernard Serpette and Charles Consel", title = "Implementing a semi-causal domain-specific language for context detection over binary sensors", journal = j-SIGPLAN, volume = "53", number = "9", pages = "66--78", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278134", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278134", abstract = "In spite of the fact that many sensors in use today are binary (i.e. produce only values of 0 and 1), and that useful context-aware applications are built exclusively on top of them, there is currently no development approach specifically targeted to \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Susungi:2018:MPC, author = "Adilla Susungi and Norman A. Rink and Albert Cohen and Jeronimo Castrillon and Claude Tadonki", title = "Meta-programming for cross-domain tensor optimizations", journal = j-SIGPLAN, volume = "53", number = "9", pages = "79--92", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278131", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278131", abstract = "Many modern application domains crucially rely on tensor operations. The optimization of programs that operate on tensors poses difficulties that are not adequately addressed by existing languages and tools. Frameworks such as TensorFlow offer good \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Peldszus:2018:MBS, author = "Sven Peldszus and Daniel Str{\"u}ber and Jan J{\"u}rjens", title = "Model-based security analysis of feature-oriented software product lines", journal = j-SIGPLAN, volume = "53", number = "9", pages = "93--106", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278126", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278126", abstract = "Today's software systems are too complex to ensure security after the fact --- security has to be built into systems by design. To this end, model-based techniques such as UMLsec support the design-time specification and analysis of security requirements \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Christophe:2018:ODA, author = "Laurent Christophe and Coen {De Roover} and Elisa Gonzalez Boix and Wolfgang {De Meuter}", title = "Orchestrating dynamic analyses of distributed processes for full-stack {JavaScript} programs", journal = j-SIGPLAN, volume = "53", number = "9", pages = "107--118", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278135", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278135", abstract = "Dynamic analyses are commonly implemented by instrumenting the program under analysis. Examples of such analyses for JavaScript range from checkers of user- defined invariants to concolic testers. For a full-stack JavaScript program, these analyses \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Ruland:2018:MES, author = "Sebastian Ruland and Lars Luthmann and Johannes B{\"u}rdek and Sascha Lity and Thomas Th{\"u}m and Malte Lochau and M{\'a}rcio Ribeiro", title = "Measuring effectiveness of sample-based product-line testing", journal = j-SIGPLAN, volume = "53", number = "9", pages = "119--133", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278130", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278130", abstract = "Recent research on quality assurance (QA) of configurable software systems (e.g., software product lines) proposes different analysis strategies to cope with the inherent complexity caused by the well-known combinatorial-explosion problem. Those \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Zhang:2018:PMO, author = "Weixin Zhang and Bruno C. d. S. Oliveira", title = "Pattern matching in an open world", journal = j-SIGPLAN, volume = "53", number = "9", pages = "134--146", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278124", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278124", abstract = "Pattern matching is a pervasive and useful feature in functional programming. There have been many attempts to bring similar notions to Object-Oriented Programming (OOP) in the past. However, a key challenge in OOP is how pattern matching can coexist \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Al-Sibahi:2018:VHL, author = "Ahmad Salim Al-Sibahi and Thomas P. Jensen and Aleksandar S. Dimovski and Andrzej Wasowski", title = "Verification of high-level transformations with inductive refinement types", journal = j-SIGPLAN, volume = "53", number = "9", pages = "147--160", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278125", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278125", abstract = "High-level transformation languages like Rascal include expressive features for manipulating large abstract syntax trees: first-class traversals, expressive pattern matching, backtracking and generalized iterators. We present the design and \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Cunha:2018:ESS, author = "J{\'a}come Cunha and Mihai Dan and Martin Erwig and Danila Fedorin and Alex Grejuc", title = "Explaining spreadsheets with spreadsheets (short paper)", journal = j-SIGPLAN, volume = "53", number = "9", pages = "161--167", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278136", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278136", abstract = "Based on the concept of explanation sheets, we present an approach to make spreadsheets easier to understand and thus easier to use and maintain. We identify the notion of explanation soundness and show that explanation sheets which conform to simple \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{vanBinsbergen:2018:FHF, author = "L. Thomas van Binsbergen", title = "Funcons for {HGMP}: the fundamental constructs of homogeneous generative meta-programming (short paper)", journal = j-SIGPLAN, volume = "53", number = "9", pages = "168--174", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278132", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278132", abstract = "The PLanCompS project proposes a component-based approach to programming-language development in which fundamental constructs (funcons) are reused across language definitions. Homogeneous Generative Meta-Programming (HGMP) enables writing programs that \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Liu:2018:RTA, author = "Yin Liu and Kijin An and Eli Tilevich", title = "{RT-trust}: automated refactoring for trusted execution under real-time constraints", journal = j-SIGPLAN, volume = "53", number = "9", pages = "175--187", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278137", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278137", abstract = "Real-time systems must meet strict timeliness requirements. These systems also often need to protect their critical program information (CPI) from adversarial interference and intellectual property theft. Trusted execution environments (TEE) execute CPI \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Nieke:2018:AAF, author = "Michael Nieke and Jacopo Mauro and Christoph Seidl and Thomas Th{\"u}m and Ingrid Chieh Yu and Felix Franzke", title = "Anomaly analyses for feature-model evolution", journal = j-SIGPLAN, volume = "53", number = "9", pages = "188--201", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278123", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278123", abstract = "Software Product Lines (SPLs) are a common technique to capture families of software products in terms of commonalities and variabilities. On a conceptual level, functionality of an SPL is modeled in terms of features in Feature Models (FMs). As other \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", } @Article{Radanne:2018:RLG, author = "Gabriel Radanne and Peter Thiemann", title = "{Regenerate}: a language generator for extended regular expressions", journal = j-SIGPLAN, volume = "53", number = "9", pages = "202--214", month = nov, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3393934.3278133", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Apr 8 13:49:51 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278133", abstract = "Regular expressions are part of every programmer's toolbox. They are used for a wide variety of language-related tasks and there are many algorithms for manipulating them. In particular, matching algorithms that detect whether a word belongs to the language described by a regular expression are well explored, yet new algorithms appear frequently. However, there is no satisfactory methodology for testing such matchers. We propose a testing methodology which is based on generating positive as well as negative examples of words in the language. To this end, we present a new algorithm to generate the language described by a generalized regular expression with intersection and complement operators. The complement operator allows us to generate both positive and negative example words from a given regular expression. We implement our generator in Haskell and OCaml and show that its performance is more than adequate for testing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "https://dl.acm.org/loi/sigplan", }