%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.104", %%% date = "17 March 2026", %%% time = "15:41:08 MDT", %%% filename = "todaes.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "57620 65028 332542 3252205", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; ACM Transactions on %%% Design Automation of Electronic Systems; %%% TODAES", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% ACM Transactions on Design Automation of %%% Electronic Systems (CODEN ATASFO, ISSN %%% 1084-4309 (print), 1557-7309 (electronic)), %%% completely covering all issues from volume 1, %%% number 1, January 1996 to date. %%% %%% The ACM maintains World Wide Web pages with %%% journal tables of contents for 1996--date at %%% %%% http://www.acm.org/todaes/ %%% http://www.acm.org/pubs/contents/journals/todaes/ %%% http://portal.acm.org/browse_dl.cfm?idx=J776 %%% %%% That data has been automatically converted to %%% BibTeX form, corrected for spelling and page %%% number errors, and merged into this file. %%% %%% At version 1.104, the COMPLETE year coverage %%% looks like this: %%% %%% 1996 ( 20) 2007 ( 52) 2018 ( 70) %%% 1997 ( 19) 2008 ( 68) 2019 ( 69) %%% 1998 ( 29) 2009 ( 65) 2020 ( 56) %%% 1999 ( 17) 2010 ( 35) 2021 ( 50) %%% 2000 ( 35) 2011 ( 40) 2022 ( 65) %%% 2001 ( 28) 2012 ( 67) 2023 ( 105) %%% 2002 ( 31) 2013 ( 55) 2024 ( 106) %%% 2003 ( 30) 2014 ( 48) 2025 ( 107) %%% 2004 ( 21) 2015 ( 68) 2026 ( 59) %%% 2005 ( 35) 2016 ( 73) %%% 2006 ( 44) 2017 ( 68) %%% %%% Article: 1635 %%% %%% Total entries: 1635 %%% %%% Numerous errors in the sources noted above %%% have been corrected. Spelling has been %%% verified with the UNIX spell and GNU ispell %%% programs using the exception dictionary %%% stored in the companion file with extension %%% .sok. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% bibsource keys in the bibliography entries %%% below indicate the entry originally came %%% from the computer science bibliography %%% archive, even though it has likely since %%% been corrected and updated. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed by the %%% author for the BibNet Project. %%% %%% In this bibliography, entries are sorted %%% by journal, and then by publication order, %%% with the help of ``bibsort -byvolume''. The %%% bibsort utility is available from %%% ftp://ftp.math.utah.edu/pub/tex/bib. %%% %%% The author will be grateful for reports of %%% errors of any kind in this bibliography. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility." %%% } %%% ==================================================================== @Preamble{ "\input bibnames.sty" # "\ifx \undefined \circled \def \circled #1{(#1)}\fi" # "\ifx \undefined \reg \def \reg {\circled{R}}\fi" # "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TODAES = "ACM Transactions on Design Automation of Electronic Systems"} %%% ==================================================================== %%% Bibliography entries from Communications of the ACM. @Article{Pedram:1996:PMI, author = "Massoud Pedram", title = "Power minimization in {IC} design: principles and applications", journal = j-TODAES, volume = "1", number = "1", pages = "3--56", month = jan, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p3-pedram/p3-pedram.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p3-pedram/", abstract = "Low power has emerged as a principal theme in today's electronics industry. The need for low power has caused a major paradigm shift in which power dissipation is as important as performance and area. This article presents an in-depth survey of CAD methodologies and techniques for designing low power digital CMOS circuits and systems and describes the many issues facing designers at architectural, logical, and physical levels of design abstraction. It reviews some of the techniques and tools that have been proposed to overcome these difficulties and outlines the future challenges that must be met to design low power, high performance systems.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "adiabatic circuits; CMOS circuits; computer-aided design of VLSI; dynamic power dissipation; energy-delay product; gated clocks; layout; low power layout; low power synthesis; lower-power design; power analysis and estimation; power management; power minimization and management; probabilistic analysis; silicon-on-insulator technology; statistical sampling; switched capacitance; switching activity; symbolic simulation; synthesis; system design", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Hardware --- Integrated Circuits --- General (B.7.0)", } @Article{Cheng:1996:AGF, author = "Kwang-Ting Cheng and A. S. Krishnakumar", title = "Automatic generation of functional vectors using the extended finite state machine model", journal = j-TODAES, volume = "1", number = "1", pages = "57--79", month = jan, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p57-cheng/p57-cheng.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p57-cheng/", abstract = "We present a method of automatic generation of functional vectors for sequential circuits. These vectors can be used for design verification, manufacturing testing, or power estimation. A high-level description of the circuit in VHDL or C is assumed available. Our method automatically transforms the high-level description of a circuit in VHDL or C into an extended finite state machine (EFSM) model that is used to generate functional vectors. The EFSM model is a generalization of the traditional state machine model. It is a compact representation of models with local data variables and preserves many nice properties of a traditional state machine model. The theoretical background of the EFSM model is addressed in this article. Our method guarantees that the generated vectors cover every statement in the high-level description at least once. Experimental results show that a set of comprehensive functional vectors for sequential circuits with more than a hundred flip-flops can be generated automatically in a few minutes of CPU time using our prototype system.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Experimentation; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "automatic test generation; design verification; extended finite state machines; functional testing", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}; Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}; Theory of Computation --- Computation by Abstract Devices --- Models of Computation (F.1.1): {\bf Automata}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Graph algorithms}; Hardware --- Integrated Circuits --- Reliability and Testing** (B.7.3): {\bf Testability**}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Hardware description languages}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2)", } @Article{Chang:1996:USM, author = "Yao-Wen Chang and D. F. Wong and C. K. Wong", title = "Universal switch modules for {FPGA} design", journal = j-TODAES, volume = "1", number = "1", pages = "80--101", month = jan, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p80-chang/p80-chang.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p80-chang/", abstract = "A switch module $M$ with $W$ terminals on each side is said to be {\em universal\/} if every set of nets satisfying the dimensional constraint (i.e., the number of nets on each side of $M$ is at most $W$) is simultaneously rout able through $M$. In this article, we present a class of universal switch modules. Each of our switch modules has $ 6 W$ switches and {\em switch-module flexibility\/} three (i.e., $ F_S = 3$). We prove that no switch module with less than $ 6 W$ switches can be universal. We also compare our switch modules with those used in the Xilinx XC4000 family FPGAs and the {\em antisymmetric\/} switch modules (with $ F_S = 3$) suggested by Rose and Brown [1991]. Although these two kinds of switch modules also have $ F_S = 3$ and $ 6 W$ switches, we show that they are not universal. Based on combinatorial counting techniques, we show that each of our universal switch modules can accommodate up to 25\% more routing instances, compared with the XC4000-type switch module of the same size. Experimental results demonstrate that our universal switch modules improve routability at the chip level. Finally, our work also provides a theoretical insight into the important observation by Rose and Brown [1991] (based on extensive experiments) that $ F_S = 3$ is often sufficient to provide high routability.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Measurement; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}", } @Article{Thakur:1996:SPF, author = "Shashidhar Thakur and D. F. Wong", title = "Series-parallel functions and {FPGA} logic module design", journal = j-TODAES, volume = "1", number = "1", pages = "102--122", month = jan, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p102-thakur/p102-thakur.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p102-thakur/", abstract = "The need for a two-way interaction between logic synthesis and FPGA logic module design has been stressed recently. Having a logic module that can implement many functions is a good idea only if one can also give a synthesis strategy that makes efficient use of this functionality. Traditionally, technology mapping algorithms have been developed after the logic architecture has been designed. We follow a dual approach, by focusing on a specific technology mapping algorithm, namely, the structural tree-based mapping algorithm, and designing a logic module that can be mapped efficiently by this algorithm. It is known that the tree-based mapping algorithm makes optimal use of a library of functions, each of which can be represented by a tree of AND, OR, and NOT gates (series-parallel or SP functions). We show how to design a SP function with a minimum number of inputs that can implement all possible SP functions with a specified number of inputs. For instances, we demonstrate a seven-input SP function that can implement all four-input SP functions. Mapping results show that, on an average, the number blocks of this function needed to map benchmark circuits are 12\% less than those for Actel's ACT1 logic modules. Our logic modules show a 4\% improvement over ACT1, if the block count is scaled to take into account the number of transistors needed to implement different logic modules.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "field programmable gate arrays; series-parallel technology mapping; tree-based technology mapping algorithm; universal logic modules", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Combinational logic}; Hardware --- Logic Design --- Design Aids (B.6.3); Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Trees}", } @Article{Thanvantri:1996:OFS, author = "Venkat Thanvantri and Sartaj Sahni", title = "Optimal folding of standard and custom cells", journal = j-TODAES, volume = "1", number = "1", pages = "123--143", month = jan, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p123-thanvantri/p123-thanvantri.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p123-thanvantri/", abstract = "We study the problem of folding an ordered list of standard and custom cells into rows of a chip so as to minimize either the routing area or the total chip area. Nine versions of the folding problem are formulated and fast polynomial time algorithms are obtained for each. Two of our formulations correspond to problems formulated in Paik and Sahni [1993] for the folding of a stack of bit-slice components. Our algorithms for these two formulations are asymptotically superior to those of Paik and Sahni [1993].", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "custom cell folding; layout area; standard cell folding", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Routing and layout}", } @Article{Cong:1996:CLS, author = "Jason Cong and Yuzheng Ding", title = "Combinational logic synthesis for {LUT} based field programmable gate arrays", journal = j-TODAES, volume = "1", number = "2", pages = "145--204", month = apr, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p145-cong/p145-cong.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p145-cong/", abstract = "The increasing popularity of the field programmable gate-array (FPGA) technology has generated a great deal of interest in the algorithmic study and tool development for FPGA-specific design automation problems. The most widely used FPGAs are LUT based FPGAs, in which the basic logic element is a $K$-input one-output lookup-table (LUT) that can implement any Boolean function of up to $K$ variables. This unique feature of the LUT has brought new challenges to logic synthesis and optimization, resulting in many new techniques reported in recent years. This article summarizes the research results on combinational logic synthesis for LUT based FPGAs under a coherent framework. These results were dispersed in various conference proceedings and journals and under various formulations and terminologies. We first present general problem formulations, various optimization objectives and measurements, then focus on a set of commonly used basic concepts and techniques, and finally summarize existing synthesis algorithms and systems. We classify and summarize the basic techniques into two categories, namely, {\em logic optimization\/} and {\em technology mapping}, and describe the existing algorithms and systems in terms of how they use the classified basic techniques. A comprehensive list of references is compiled in the attached bibliography.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Measurement; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "area minimization; computer-aided design of VLSI; decomposition; delay minimization; delay modeling; FPGA; logic optimization; power minimization; programmable logic; routing; simplification; synthesis; system design; technology mapping", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Combinational logic}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Middelhoek:1996:VEF, author = "Peter F. A. Middelhoek and Sreeranga P. Rajan", title = "From {VHDL} to efficient and first-time-right designs: a formal approach", journal = j-TODAES, volume = "1", number = "2", pages = "205--250", month = apr, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p205-middelhoek/p205-middelhoek.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p205-middelhoek/", abstract = "In this article we provide a practical transformational approach to the synthesis of correct synchronous digital hardware designs from high-level specifications. We do this while taking into account the complete life cycle of a design from early prototype to full custom implementation. Besides time-to-market, both flexibility with respect to target architecture and efficiency issues are addressed by the methodology. The utilization of user-selected behavior-preserving transformation steps ensures first-time-right design while exploiting the experience, flexibility, and creativity of the designer. \par To ensure that design transformations are indeed behavior-preserving a novel mechanized approach to the specification and verification of design transformations on control data flow graphs which is independent of a specific behavioral model or graph size has been developed. \par As a demonstration of an industrial application we use a video processing algorithm needed for the conversion from a line-interlaced to progressively scanned video format. Both a video signal processor-based prototype implementation as well as a very efficient full custom implementation are developed starting from a single high-level behavioral specification of the algorithm in VHDL. Results are compared with those previously obtained using different tools and methodologies.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Human Factors; Languages; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "CDFG; correctness by construction; design methodology; rapid system prototyping; SFG; transformational design; VHDL", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Arithmetic and logic units}; Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Control design}; Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Data-path design}; Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Styles}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Hardware description languages}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Verification}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Hardware description languages}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Verification}; Software --- Programming Languages --- Language Classifications (D.3.2): {\bf Applicative (functional) languages}; Software --- Programming Languages --- Language Classifications (D.3.2): {\bf Data-flow languages}; Theory of Computation --- Logics and Meanings of Programs --- Specifying and Verifying and Reasoning about Programs (F.3.1): {\bf Mechanical verification}; Theory of Computation --- Mathematical Logic and Formal Languages --- Mathematical Logic (F.4.1): {\bf Mechanical theorem proving}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Software --- Software Engineering --- Software/Program Verification (D.2.4): {\bf Correctness proofs}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf VHDL}", } @Article{Kolson:1996:ORA, author = "David J. Kolson and Alexandru Nicolau and Nikil Dutt and Ken Kennedy", title = "Optimal register assignment to loops for embedded code generation", journal = j-TODAES, volume = "1", number = "2", pages = "251--279", month = apr, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p251-kolson/p251-kolson.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p251-kolson/", abstract = "One of the challenging tasks in code generation for embedded systems is register assignment. When more live variables than registers exist, some variables will necessarily be accessed from data memory. Because loops are typically executed many times and are often time-critical, good register assignment in loops is exceedingly important as accessing data memory can degrade performance. The issue of finding an optimal register assignment to loops has been open for some time. In this article, we present a technique for optimal (i.e., spill minimizing) register assignment to loops. First we present a technique for register assignment to architecture styles that are characterized by a consolidated register file. Then we extend the technique to include architecture styles that are characterized by distributed memories and/or a combination of general- and special-purpose registers. Experimental results demonstrate that although the optimal algorithm may be computationally prohibitive, heuristic versions obtain results with performance better than that of an existing graph coloring approach.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Languages", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code generation; embedded systems; system design", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}; Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}", } @Article{Prasad:1996:TRP, author = "S. C. Prasad and K. Roy", title = "Transistor reordering for power minimization under delay constraint", journal = j-TODAES, volume = "1", number = "2", pages = "280--300", month = apr, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p280-prasad/p280-prasad.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p280-prasad/", abstract = "In this article we address the problem of optimization of VLSI circuits to minimize power consumption while meeting performance goals. We present a method of estimating power consumption of a basic or complex CMOS gate which takes the internal capacitances of the gate into account. This method is used to select an ordering of series-connected transistors found in CMOS gates to achieve lower power consumption. The method is very efficient when used by library-based design styles. We describe a multipass algorithm that makes use of transistor reordering to optimize performance and power consumption of circuits, has a linear time complexity per pass, and converges to a solution in a small number of passes. Transformations in addition to transistor reordering can be used by the algorithm. The algorithm has been benchmarked on several large examples and the results are presented.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "circuit optimization; critical path enumeration; gate input reordering; power estimation; transistor reordering", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}", } @Article{Wolf:1996:OOC, author = "Wayne Wolf", title = "Object-oriented cosynthesis of distributed embedded systems", journal = j-TODAES, volume = "1", number = "3", pages = "301--314", month = jul, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p301-wolf/p301-wolf.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p301-wolf/", abstract = "This article describes a new hardware-software cosynthesis algorithm that takes advantage of the structure inherent in an object-oriented specification. The algorithm creates a distributed system implementation with arbitrary topology, using the object-oriented structure to partition functionality in addition to scheduling and allocating processes. Process partitioning is an especially important optimization for such systems because the specification will not, in general, take into account the process structure required for efficient execution on the distributed engine. The object-oriented specification naturally provides both coarse-grained and fine-grained partitions of the system. Our algorithm uses that multilevel structure to guide synthesis. Experimental results show that our algorithm takes advantage of the object-oriented specification to quickly converge on high-quality implementations.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "distributed embedded systems; hardware-software co-design; object-oriented co-synthesis", subject = "Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Microprocessor/microcomputer applications}; Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}", } @Article{Chow:1996:LPR, author = "Sue-Hong Chow and Yi-Cheng Ho and TingTing Hwang and C. L. Liu", title = "Low power realization of finite state machines --- a decomposition approach", journal = j-TODAES, volume = "1", number = "3", pages = "315--340", month = jul, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p315-chow/p315-chow.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p315-chow/", abstract = "We present in this article a new approach to the synthesis problem for finite state machines with the reduction of power dissipation as a design objective. A finite state machine is decomposed into a number of {\em coupled\/} submachines. Most of the time, only one of the submachines will be activated which, consequently, could lead to substantial savings in power consumption. The key steps in our approach are: (1) decomposition of a finite state machine into submachines so that there is a high probability that state transitions will be confined to the smaller of the submachines most of the time, and (2) synthesis of the coupled submachines to optimize the logic circuits. Experimental results confirmed that our approach produced very good results (in particular, for finite state machines with a large number of states.)", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "decomposition of finite state machines; lower power design; state assignment", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Kagaris:1996:FAM, author = "Dimitrios Kagaris and Spyros Tragoudas", title = "A fast algorithm for minimizing {FPGA} combinational and sequential modules", journal = j-TODAES, volume = "1", number = "3", pages = "341--351", month = jul, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p341-kagaris/p341-kagaris.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p341-kagaris/", abstract = "We present a quadratic-time algorithm for minimizing the number of modules in an FPGA with combinational and sequential modules (like the C-modules and S-modules of the ACT2 and ACT3 architectures). The constraint is that a combinational module can be combined with one flip-flop in a single sequential module, only if the combinational module drives no other combinational modules. Our algorithm uses a minimum-cost flow formulation to solve the problem with a significant time improvement over a previous approach that used a general linear program.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "field programmable gate arrays; retiming", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}", } @Article{Chang:1996:OCP, author = "En-Shou Chang and Daniel D. Gajski and Sanjiv Narayan", title = "An optimal clock period selection method based on slack minimization criteria", journal = j-TODAES, volume = "1", number = "3", pages = "352--370", month = jul, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p352-chang/p352-chang.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p352-chang/", abstract = "An important decision in synthesizing a hardware implementation from a behavioral description is selecting the clock period to schedule the datapath operations into control steps. Prior to scheduling, most existing behavioral synthesis systems either require the designer to specify the clock period explicitly or require that the delays of the operators used in the design be specified in multiples of the clock period. An unfavorable choice of clock period could result in operations being idle for a large portion of the clock period and, consequently, affect the performance of the synthesized design. In this article, we demonstrate the effect of clock slack on the performance of designs and present an algorithm to find a slack-minimal clock period. We prove the optimality of our method and apply it to several examples to demonstrate its effectiveness in maximizing design performance.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "clock period; clock slack; performance estimation; scheduling", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2)", } @Article{Lopez:1996:EDP, author = "Mario A. Lopez and Dinesh P. Mehta", title = "Efficient decomposition of polygons into {L-shapes} with application to {VLSI} layouts", journal = j-TODAES, volume = "1", number = "3", pages = "371--395", month = jul, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p371-lopez/p371-lopez.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p371-lopez/", abstract = "We present two practical algorithms for partitioning circuit components represented by rectilinear polygons so that they can be stored using the L-shaped corner stitching data structure; that is, our algorithms decompose a simple polygon into a set of nonoverlapping L-shapes and rectangles by using horizontal cuts only. The more general of our algorithms computes and optimal configuration for a wide variety of optimization functions, whereas the other computes a minimum configuration of rectangles and L-shapes. Both algorithms run in $ O(n + h \log h) $ time, where $n$ is the number of vertices in the polygon and $h$ is the number of H-pairs. Because for VLSI data $h$ is small, in practice these algorithms are linear in $n$. Experimental results on actual VLSI data compare our algorithms and demonstrate the gains in performance for corner stitching (as measured by different objective functions) obtained by using them instead of more traditional rectangular partitioning algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "corner stitching; L-shapes; partition; rectangle; rectilinear polygons", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Geometrical problems and computations}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Graph algorithms}", } @Article{Moreno:1996:REU, author = "R. Moreno and R. Hermida and M. Fern{\'a}ndez", title = "Register estimation in unscheduled dataflow graphs", journal = j-TODAES, volume = "1", number = "3", pages = "396--403", month = jul, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p396-moreno/p396-moreno.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p396-moreno/", abstract = "A method for register number estimation in unscheduled or partially scheduled dataflow graphs is presented. The strategy consists of studying the probability that an edge between two nodes crosses the boundary between two control steps, and its is based on a model that associates probabilities with the different scheduling alternatives of each node. These probabilities are computed by means of an analytic method that takes into account the distribution of operations in the dataflow graph and the hardware modules available in the library. The results highlight that the estimation method is very accurate because the error between the estimated value and the real value is always within a narrow margin.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "area estimation; high-level synthesis; probabilities; register estimation; scheduling", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Data-path design}", } @Article{Cheng:1996:GLT, author = "Kwang-Ting Cheng", title = "Gate-level test generation for sequential circuits", journal = j-TODAES, volume = "1", number = "4", pages = "405--442", month = oct, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p405-cheng/p405-cheng.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p405-cheng/", abstract = "This paper discusses the gate-level automatic test pattern generation (ATPG) methods and techniques for sequential circuits. The basic concepts, examples, advantages, and limitations of representative methods are reviewed in detail. The relationship between gate-level sequential circuit ATPG and the partial scan design is also discussed.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Reliability; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "automatic test generation; IC testing; sequential circuit test generation; testing", subject = "Hardware --- Integrated Circuits --- Reliability and Testing** (B.7.3); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1)", } @Article{Langevin:1996:RTC, author = "M. Langevin and E. Cerny", title = "A recursive technique for computing lower-bound performance of schedules", journal = j-TODAES, volume = "1", number = "4", pages = "443--455", month = oct, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p443-langevin/p443-langevin.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p443-langevin/", abstract = "We present a fast recursive technique for estimating lower-bound performance of data path schedules. The method relies on the determination of an ASAPUC a(s Soon As Possible Under Constraint) time-step value for each node of the DFG (Data-Flow Graph) that is based on the ASAPUC values of its predecessor nodes. That is, the lower-bound estimation is applied to each subgraph permitting the derivation of a tight lower bound on the performance of the complete DFG. Applying the greedy lower-bound estimator of Rim and Jain [1994] to each subgraph improves the complete lower bound in more than 50\% of the experiments reported in Rim and Jain [1994], and the CPU time is only about twice as long. The recursive methodology can be extended to exploit other lower-bound techniques, for example, considering other constraints such as the number of busses or registers.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "dataflow graph; lower-bound on performance; microcode optimization; resource constraints; scheduling", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}", } @Article{Sosic:1996:UAF, author = "Rok Sosi{\=c} and Jun Gu and Robert R. Johnson", title = "The {Unison} algorithm: fast evaluation of {Boolean} expressions", journal = j-TODAES, volume = "1", number = "4", pages = "456--477", month = oct, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 22 15:33:01 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p456-sosic/p456-sosic.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p456-sosic/", abstract = "We present a Unison algorithm to evaluate arbitrarily complex Boolean expressions. This novel algorithm, based on the total differential of a Boolean function, enables fast evaluation of Boolean expressions in software. Any combination of Boolean operations can be packed into the bits of one computer word and evaluated in parallel by bitwise logical operations. Sample runs of the Unison algorithm show that many Boolean operations can evaluated in one clock cycle. The Unison algorithm is able to evaluate Boolean expressions at an execution speed that is comparable to compiled evaluation while retaining the flexibility of interpreted approaches. The algorithm lends itself well to many practical applications.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance; Reliability; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Boolean differential; Boolean evaluation; Boolean expressions; Unison algorithm", subject = "Hardware --- Logic Design --- General (B.6.0); Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2)", } @Article{Cong:1996:OWI, author = "Jason Cong and Lei He", title = "Optimal wiresizing for interconnects with multiple sources", journal = j-TODAES, volume = "1", number = "4", pages = "478--511", month = oct, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p478-cong/p478-cong.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p478-cong/", abstract = "In this paper, we study the optimal wiresizing problem for nets with multiple sources under the RC tree model and the Elmore delay model. We decompose the routing tree for a multisource net into the source subtree (SST) and a set of loading subtrees (LSTs), and show that the optimal wiresizing solution satisfies a number of interesting properties, including: LST separability, the LST monotone property, the SST local monotone property, and the dominance property. Furthermore, we study the optimal wiresizing problem using a variable segment-division rather than an a priori fixed segment-division as in all previous works and reveal the bundled refinement property. These properties lead to efficient algorithms to compute the optimal solutions. We have tested our algorithm on nets extracted from the multilayer layout for a high-performance Intel microprocessor. Accurate SPICE simulation shows that our methods reduce the average delay by up to 23.5\% and the maximum delay by up to 37.8\%, respectively, for the submicron CMOS technology when compared to the minimal wire width solution. In addition, the algorithm based on the variable segment-division yields a speedup of over 100$ \times $ time and does not lose any accuracy, when compared with the algorithm based on the a priori fixed segment-division.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bundled refinement; decomposition of multi-source routing tree; dominance property; Elmore delay; fidelity; high performance; interconnect optimization; layout optimization; local refinement; multi-source net; multi-source routing tree; optimal wiresizing; variable segment-division", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1); Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf SPICE}; Hardware --- Input/Output and Data Communications --- Interconnections (Subsystems) (B.4.3)", } @Article{Ganley:1996:RST, author = "Joseph L. Ganley and James P. Cohoon", title = "Rectilinear {Steiner} trees on a checkerboard", journal = j-TODAES, volume = "1", number = "4", pages = "512--522", month = oct, year = "1996", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p512-ganley/p512-ganley.pdf; http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p512-ganley/", abstract = "The rectilinear Steiner tree problem is to find a minimum-length set of horizontal and vertical line segments that interconnect a given set of points in the plane. Here we study the {\em thumbnail rectilinear Steiner tree\/} problem, where the input points are drawn from a small integer grid. Specifically, we devise a fully-set decomposition algorithm for computing optimal thumbnail rectilinear Steiner trees. We then present experimental results comparing the performance of this algorithm with two existing algorithms for computing optimal rectilinear Steiner trees. The thumbnail rectilinear Steiner tree problem has applications in VLSI placement algorithms, based on geometric partitioning, global routing of field-programmable gate arrays, and routing estimation during floorplanning.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "exact algorithms; full-set decomposition; rectilinear Steiner tree; routing", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Geometrical problems and computations}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Graph algorithms}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Trees}", } @Article{Lin:1997:RDH, author = "Youn-Long Lin", title = "Recent developments in high-level synthesis", journal = j-TODAES, volume = "2", number = "1", pages = "2--21", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p2-lin/p2-lin.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p2-lin/", abstract = "We survey recent developments in high level synthesis technology for VLSI design. The need for higher-level design automation tools are discussed first. We then describe some basic techniques for various subtasks of high-level synthesis. Techniques that have been proposed in the past few years (since 1994) for various subtasks of high-level synthesis are surveyed. We also survey some new synthesis objectives including testability, power efficiency, and reliability.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Languages; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design automation; design methodology; high level synthesis; VLSI design", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Data-path design}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Hardware description languages}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}", } @Article{Gong:1997:MRH, author = "Jie Gong and Daniel D. Gajski and Smita Bakshi", title = "Model refinement for hardware-software codesign", journal = j-TODAES, volume = "2", number = "1", pages = "22--41", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p22-gong/p22-gong.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p22-gong/", abstract = "Hardware-software codesign, which implements a given specification with a set of system components such as ASICs and processors, includes several key tasks such as system component allocation, functional partitioning, quality metrics estimation, and model refinement. In this work, we focus on the model refinement task which transforms a specification from an original functional model to a refined implementation model. First, we categorize several commonly used implementation models and describe a set of refinement procedures to transform a specification to each of these implementation models. We also present a set of experimental results to compare the implementation models and to demonstrate how the proposed approach can be used to explore different implementation styles.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Languages; Measurement", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "functional model; implementation model; model refinement; software-hardware codesign", subject = "Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Computer Systems Organization --- General (C.0): {\bf Hardware/software interfaces}; Hardware --- Register-Transfer-Level Implementation --- General (B.5.0); Computer Systems Organization --- General (C.0): {\bf Modeling of computer architecture}", } @Article{deAbreuMoreira:1997:ADC, author = "Dilvan {de Abreu Moreira} and Les T. Walczowski", title = "{AGENTS} a distributed client-server system for leaf cell generation", journal = j-TODAES, volume = "2", number = "1", pages = "42--61", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:28:35 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p42-moreira/p42-moreira.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p42-moreira/", abstract = "The AGENTS system is a set of programs designed to generate automatically the mask-level layout of full custom CMOS, BICMOS, and bipolar leaf cells. The system is formed from four sever programs: the placer, router, database, and broker. \par The placer places components in a cell, the router wires the circuits sent to it, the database stores all the information that is dependent upon the fabrication process, such as the design rules, and the Broker makes the services of the other servers available. \par These servers communicate over a computer network using the TCP/IP Internet Protocol. The Placer server receives from its client the description and netlist of the circuit to be generated using EDIF (Electronic Design Interchange Format.) The output to its client is the mask layout of the circuit, again codified in EDIF. The concept of agents as software components which have the ability to communicate and cooperate with each other is at the heart of the AGENTS system. This concept is not only used at the higher level, for the four servers, but at a lower level as well, inside the Router and Placer servers, where small relatively simple agents work together to accomplish complex tasks. These small agents are responsible for all the reasoning carried out by the two servers, as they hold the basic inference routines and the knowledge needed by the servers. The system's philosophy is that competence should emerge out of the collective behavior of a large number of relatively simple agents. In addition and integrated to these small agents, the system uses a genetic algorithm to improve components' placement before routing.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "client/server model; genetic algorithms; software agents", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1)", } @Article{Esbensen:1997:PDI, author = "Henrik Esbensen and Ernest S. Kuh", title = "A performance-driven {IC\slash MCM} placement algorithm featuring explicit design space exploration", journal = j-TODAES, volume = "2", number = "1", pages = "62--80", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p62-esbensen/p62-esbensen.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p62-esbensen/", abstract = "A genetic algorithm for building-block placement of ICs and MCMs is presented that simultaneously minimizes layout area and an Elmore-based estimate of the maximum path delay while trying to meet a target aspect ratio. Explicit design space exploration is performed by using a vector-valued, 3-dimensional cost function and searching for a set of distinct solutions representing the best trade-offs of the cost dimensions. From the output solutions, the designer can choose the solution with the preferred trade-off. In contrast to existing approaches, the required properties of the output solutions are specified without using weights or bounds. Consequently, the practical problems of specifying these quantities are eliminated. Promising experimental results are obtained for various placement problems, including a real-world design. Solution sets representing good, balanced cost trade-offs are found using a reasonable amount of runtime. Furthermore, the performance is shown to be comparable to that of simulated annealing in the special case of 1-dimensional optimization, in which direct comparison is possible.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design space exploration; timing-driven building-block placement", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Computing Methodologies --- Artificial Intelligence --- Problem Solving, Control Methods, and Search (I.2.8): {\bf Heuristic methods}", } @Article{Lin:1997:STV, author = "Yann-Rue Lin and Cheng-Tsung Hwang and Allen C.-H. Wu", title = "Scheduling techniques for variable voltage low power designs", journal = j-TODAES, volume = "2", number = "2", pages = "81--97", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p81-lin/p81-lin.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p81-lin/", abstract = "This paper presents an integer linear programming (ILP) model and a heuristic for the variable voltage scheduling problem. We present the variable voltage scheduling techniques that consider in turn timing constraints alone, resource constraints alone, and timing and resource constraints together for design space exploration. Experimental results show that our heuristic produces results competitive with those of the ILP method in a fraction of the run-time. The results also show that a wide range of design alternatives can be generated using our design space exploration method. Using different cost/delay combinations, power consumption in a single design can differ by as much as a factor of 6 when using mixed 3.3V and 5V supply voltages.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "high-level synthesis; lower power design; scheduling; variable voltage", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2); Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Styles}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}", } @Article{Fummi:1997:FDT, author = "F. Fummi and U. Rovati and D. Sciuto", title = "Functional design for testability of control-dominated architectures", journal = j-TODAES, volume = "2", number = "2", pages = "98--122", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p98-fummi/p98-fummi.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p98-fummi/", abstract = "Control-dominated architectures are usually described in a hardware description language (HDL) by means of interacting FSMs. A VHDL or Verilog specification can be translated into an interacting FSM (IFSM) representation as described here. The IFSM model allows us to approach the testable synthesis problem at the level of each FSM. The functionality is modified by the addition of transparency to data flow. The complete testability of the IFSM implementation is thus achieved by connecting fully testable implementations of each modified FSM. In this way, test sequences separately generated for each FSM are directly applied to the IFSM to achieve complete fault coverage. The addition of test functionality to each FSM description, and its simultaneous synthesis with the FSM functionality, produces a lower area overhead than that necessary for the application of a partial-scan technique. Moreover, the test generation problem is highly simplified since it is reduced to the test generation for each separate FSM.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Measurement; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "functional testing; interacting FSMs", subject = "Hardware --- Register-Transfer-Level Implementation --- Reliability and Testing** (B.5.3): {\bf Test generation**}; Hardware --- Register-Transfer-Level Implementation --- Reliability and Testing** (B.5.3): {\bf Testability**}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Hardware description languages}", } @Article{Kormicki:1997:PLS, author = "Maciek Kormicki and Ausif Mahmood and Bradley S. Carlson", title = "Parallel logic simulation on a network of workstations using parallel virtual machine", journal = j-TODAES, volume = "2", number = "2", pages = "123--134", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p123-kormicki/p123-kormicki.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p123-kormicki/", abstract = "This paper explores parallel logic simulation on a network of workstations using a parallel virtual machine (PVM). A novel parallel implementation of the centralized-time event-driven logic simulation algorithm is carried out such that no global controlling workstation is needed to synchronize the advance of simulation time. Further advantages of our new approach include a random partitioning of the circuit onto available workstations and a pipelined execution of the different phases of the simulation algorithm. To achieve a better load balance, we employ a semioptimistic scheme for gate evaluations (in conjunction with a centralized-time algorithm) such that no rollback is required. The performance of this implementation has been evaluated using the ISCAS benchmark circuits. Speedups improve with the size of the circuit and the activity level in the circuit. Analyses of the communication overhead show that the techniques developed here will yield even higher gains as newer networking technologies like ATM are employed to connect workstations.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "distributed computing; parallel logic simulation; PVM; synchronous simulation", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Simulation}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}", } @Article{Yang:1997:HFM, author = "Cheng-Hsing Yang and Chia-Chun Tsai and Jan-Ming Ho and Sao-Jie Chen", title = "Hmap: a fast mapper for {EPGAs} using extended {GBDD} hash tables", journal = j-TODAES, volume = "2", number = "2", pages = "135--150", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p135-yang/p135-yang.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p135-yang/", abstract = "A fast and efficient algorithm for technology mapping of electrically programmable gate arrays (EPGAs) is proposed. This Hmap algorithm covers the Boolean network with programmed logic modules bottom-up. The covering operation is based on collapsing the fanins of a node to form a bigger supernode such that fewer clusters are needed to be detected. Then Boolean matching is used to detect whether the collapsed supernode can be mapped into a logic module by looking up an extended GBDD hash table. The use of this table look-up matching can shorten the matching time significantly. As shown in the experiments, the average running time of Hmap is 20 times faster than that of MIS-pga2.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}", } @Article{Mak:1997:BLM, author = "Wai-Kei Mak and D. F. Wong", title = "Board-level multiterminal net routing for {FPGA-based} logic emulation", journal = j-TODAES, volume = "2", number = "2", pages = "151--167", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p151-mak/p151-mak.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p151-mak/", abstract = "We consider a board-level routing problem applicable to FPGA-based logic emulation systems such as the Realizer System [Varghese et al. 1993] and the Enterprise Emulation System [Maliniak 1992] manufactured by Quickturn Design Systems. Optimal algorithms have been proposed for the case where all nets are two-terminal nets [Chan and Schlag 1993; Mak and Wong 1995]. We show how multiterminal nets can be handled by decomposition into two-terminal nets. We show that the multiterminal net decomposition problem can be modeled as a bounded-degree hypergraph-to-graph transformation problem where hyperedges are transformed to spanning trees. A network flow-based algorithm that solves both problems is proposed. It determines if there is a feasible decomposition and gives one whenever such a decomposition exists.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "board-level routing; crossbars; field programmable gate arrays; logic emulation; multi-terminal net decomposition", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}", } @Article{Kahng:1997:ARI, author = "Andrew B. Kahng and Sudhakar Muddu", title = "Analysis of {RC} interconnections under ramp input", journal = j-TODAES, volume = "2", number = "2", pages = "168--192", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p168-kahng/p168-kahng.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p168-kahng/", abstract = "We give new methods for calculating the time-domain response for a finite-length distributed {\em RC\/} line that is stimulated by a ramp input. The following are our contributions. First, we obtain the solution of the diffusion equation for a seminfinite distributed {\em RC\/} line with ramp input. We then present a general and, in the limit, {\em exact\/} approach to compute the time-domain response for finite-length {\em RC\/} lines under ramp input by summing distinct diffusions starting at either end of the line. Next, we obtain analytical expressions for the finite time-domain voltage response for an open-ended finite {\em RC\/} line and for a finite {\em RC\/} line with capacitive load. The delay estimates using this method are very close to SPICE-computing delays. Finally, we present a general recursive equation for computing the higher-order diffusion components due to reflections at the source and load ends. Future work extends our method to response computations in general interconnection trees by modeling both reflection and transmission coefficients at discontinuities.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "diffusion equation analysis; ramp input response; VLSI interconnects", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}", } @Article{Benini:1997:SBM, author = "Luca Benini and Giovanni {De Micheli}", title = "A survey of {Boolean} matching techniques for library binding", journal = j-TODAES, volume = "2", number = "3", pages = "193--226", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p193-benini/p193-benini.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p193-benini/", abstract = "When binding a logic network to a set of cells, a fundamental problem is recognizing whether a cell can implement a portion of the network. Boolean matching means solving this task using a formalism based on Boolean algebra. In its simplest form, Boolean matching can be posed as a tautology check. We review several approaches to Boolean matching as well as to its generalization to cases involving {\em don't care\/} conditions and its restriction to specific libraries such as those typical of anti-fuse based FPGAs. We then present a general formulation of Boolean matching supporting multiple-output logic cells.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Logic Design --- Design Styles (B.6.1)", } @Article{Johnson:1997:DSM, author = "Mark C. Johnson and Kaushik Roy", title = "Datapath scheduling with multiple supply voltages and level converters", journal = j-TODAES, volume = "2", number = "3", pages = "227--248", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p227-johnson/p227-johnson.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p227-johnson/", abstract = "We present an algorithm called MOVER (Multiple Operating Voltage Energy Reduction) to minimize datapath energy dissipation through use of multiple supply voltages. In a single voltage design, the critical path length, clock period, and number of control steps limit minimization of voltage and power. Multiple supply voltages permit localized voltage reductions to take up remaining schedule slack. MOVER initially finds one minimum voltage for an entire datapath. It then determines a second voltage for operations where there is still schedule slack. New voltages con be introduced and minimized until no schedule slack remains. MOVER was exercised for a variety of DSP datapath examples. Energy savings ranged from 0\% to 50\% when comparing dual to single voltage results. The benefit of going from two to three voltages never exceeded 15\%. Power supply costs are not reflected in these savings, but a simple analysis shows that energy savings can be achieved even with relatively inefficient DC-DC converters. Datapath resource requirements were found to vary greatly with respect to number of supplies. Area penalties ranged from 0\% to 170\%. Implications of multiple voltage design for IC layout and power supply requirements are discussed.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "datapath scheduling; DSP; high-level synthesis; level conversion; low power design; multiple voltage; power optimization; scheduling", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Data-path design}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Mathematics of Computing --- Numerical Analysis --- Optimization (G.1.6): {\bf Integer programming}", } @Article{Yalcin:1997:EPC, author = "Hakan Yalcin and John P. Hayes", title = "Event propagation conditions in circuit delay computation", journal = j-TODAES, volume = "2", number = "3", pages = "249--280", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p249-yalcin/p249-yalcin.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p249-yalcin/", abstract = "Accurate and efficient computation of delays is a central problem in computer-aided design of complex VLSI circuits. Delays are determined by events (signal transitions) propagated from the inputs of a circuit to its outputs, so precise characterization of event propagation is required for accurate delay computation. Although many different propagation conditions (PCs) have been proposed for delay computation, their properties and relationships have been far from clear. We present a systematic analysis of delay computation based on a series of waveform models that capture signal behavior rigorously at different levels of details. The most general model, called the exact of W0 model, specifies each event occurring in a circuit signal. A novel method is presented that generates approximate waveforms by progressively eliminating signal values from the exact model. For each waveform model, we drive the PCs that correctly capture the requirements under which an event propagates along a path. The waveform models and their PCs are shown to form a well-defined hierarchy, which provides a means to trade accuracy for computational effort. The relationships among the derived PCs and existing ones are analyzed in depth. It is proven that though many PCs, such as the popular floating mode condition, produce a correct upper bound on the circuit delay, they can fail to recognize event propagation in some instances. This analysis further enables us to derive new and useful PCs. We describe such a PC, called safe static. Experimental results demonstrate that safe static provides an excellent accuracy/efficiency tradeoff.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "delay computation; event propagation; false path; path sensitization; propagation condition; timing analysis; waveform modeling", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Verification}", } @Article{Thadikaran:1997:ACB, author = "Paul Thadikaran and Sreejit Chakravarty and Janak Patel", title = "Algorithms to compute bridging fault coverage of {IDDQ} test sets", journal = j-TODAES, volume = "2", number = "3", pages = "281--305", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p281-thadikaran/p281-thadikaran.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p281-thadikaran/", abstract = "We present two algorithms, called list-based scheme and tree-based scheme, to compute bridging fault (BF) coverage of {\em I DDQ\/} tests. These algorithms use the novel ideal of ``indistinguishable pairs,'' which makes it more efficient and versatile than known fault simulation algorithms. Unlike known algorithms, the two algorithms can be used for combinational as well as sequential circuits and for arbitrary sets of BFs. Experiments show that the tree-based scheme is, in general, better than the list-based scheme. But the list-based scheme is better for some classes of faults.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Simulation}", } %%% Check page gap: 306--311 between issues 3 and 4 of volume 2 ?? @Article{Xu:1997:LDR, author = "Min Xu and Fadi J. Kurdahi", title = "Layout-driven {RTL} binding techniques for high-level synthesis using accurate estimators", journal = j-TODAES, volume = "2", number = "4", pages = "312--343", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p312-xu/p312-xu.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p312-xu/", abstract = "The importance of effective and efficient accounting of layout effects is well established in High-Level Synthesis (HLS), since it allows more realistic exploration of the design space and the generation of solutions with predictable metrics. This feature is highly desirable in order to avoid unnecessary iterations through the design process. In this article, we address the problem of layout-driven register-transfer-level (RTL) binding as this step has a direct relevance to the final performance of the design. By producing not only an RTL design but also an approximate physical topology of the chip-level implementation, we ensure that the solution will perform at the predicted metric once implemented, thus avoiding unnecessary delays in the design process.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}", } @Article{Munch:1997:EIB, author = "Michael M{\"u}nch and Norbert Wehn and Manfred Glesner", title = "An efficient {ILP-based} scheduling algorithm for control-dominated {VHDL} descriptions", journal = j-TODAES, volume = "2", number = "4", pages = "344--364", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p344-munch/p344-munch.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p344-munch/", abstract = "To adopt behavioral synthesis techniques in existing design flows, the synthesis methodology must provide the designer with a mechanism to specify a component's interface timing. This will permit pre- and postsynthesis validation through cosimulation with other subsystems or even through formal verification. In control-flow dominated designs, additional timing constraints will result in a complex specification/constraint system for which the scheduling problem has been shown to be NP-complete. In this article, we present a mathematical framework for solving a special instance of the scheduling problem in control-flow dominated behavioral VHDL descriptions given that the timing of I/O signals has been completely or partially specified. It is based on a code-transformation approach that fully preserves the VHDL semantics. The scheduling problem is mapped onto an integer linear program (ILP) solvable in polynomial time assuming a restricted partial order on selected statements. It captures both control-flow and timing constraints in a single model and also exploits dataflow information to optimize the statement sequence across basic block boundaries.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Control design}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}", } @Article{Freund:1997:CEA, author = "L. Freund and M. Israel and F. Rousseau and J. M. Berg{\'e} and M. Auguin and C. Belleudy and G. Gogniat", title = "A codesign experiment in acoustic echo cancellation {GMDF}", journal = j-TODAES, volume = "2", number = "4", pages = "365--383", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p365-freund/p365-freund.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p365-freund/", abstract = "Continuous advances in processor and ASIC technologies enable the integration of more and more complex embedded systems. Embedded systems have become commonplace in recent years. Since their implementations generally require the use of heterogeneous resources (e.g., processor cores, ASICs) in one system with hard design constraints, the importance of hardware/software codesign methodologies increases steadily. HW/SW codesign approaches consist generally of HW/SW partitioning and scheduling, constrained code generation, and hardware and interface synthesis. This article presents the codesign of an industrial experiment in acoustic echo cancellation (GMDF algorithm); and emphasizes the partitioning and communication synthesis steps. This experiment brings to light interesting problems such as data and program distribution between system memories and the modeling of communications in the partitioning process", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3)", } @Article{Panda:1997:MDO, author = "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru Nicolau", title = "Memory data organization for improved cache performance in embedded processor applications", journal = j-TODAES, volume = "2", number = "4", pages = "384--409", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p384-panda/p384-panda.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p384-panda/", abstract = "Code generation for embedded processors opens up the possibility for several performance optimization techniques that have been ignored by traditional compilers due to compilation time constraints. We present techniques that take into account the parameters of the data caches for organizing scalar and array variables declared in embedded code into memory, with the objective of improving data cache performance. We present techniques for clustering variables to minimize compulsory cache misses, and for solving the memory assignment problem to minimize conflict cache misses. Our experiments with benchmark code kernels from DSP and other domains on the CW4001 embedded processor from LSI Logic indicate significant improvements in data cache performance by the application of our memory organization technique.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "cache memory; data cache; memory synthesis; system design; system synthesis", subject = "Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Cache memories}; Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}", } @Article{Tomiyama:1997:CPT, author = "Hiroyuki Tomiyama and Hiroto Yasuura", title = "Code placement techniques for cache miss rate reduction", journal = j-TODAES, volume = "2", number = "4", pages = "410--429", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p410-tomiyama/p410-tomiyama.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p410-tomiyama/", abstract = "In the design of embedded systems with cache memories, it is important to minimize the cache miss rates to reduce power consumption of the systems as well as improve the performance. In this article, we propose two code placement methods (a simplified method and a refined one) to reduce miss rates of instruction caches. We first define a simplified code placement problem without an attempt to minimize the code size. The problem is formulated as an integer linear programming (ILP) problem, by which an optimal placement can be found. Experimental results show that the simplified method reduces cache misses by an average of 30\% (max. 77\%). However, the code size obtained by the simplified method tends to be large, which inevitably leads to a larger memory size. In order to overcome this limitation, we further propose a refined code placement method in which the code size provided by the system designers must be satisfied. The effectiveness of the refined method is also demonstrated.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}; Hardware --- Control Structures and Microprogramming --- Microprogram Design Aids (B.1.4): {\bf Languages and compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}; Hardware --- Control Structures and Microprogramming --- Microprogram Design Aids (B.1.4): {\bf Optimization}", } @Article{Johnson:1998:MAS, author = "E. W. Johnson and J. B. Brockman", title = "Measurement and analysis of sequential design processes", journal = j-TODAES, volume = "3", number = "1", pages = "1--20", month = jan, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p1-johnson/p1-johnson.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p1-johnson/", abstract = "As design processes continue to increase in complexity it is important to base process-improvement decisions on quantitative analysis. We describe the development of an analytical approach for evaluating sequential design-process completion time and for determining the sensitivities of design time with respect to individual task durations and transition probabilities. Techniques are also detailed for collecting process metadata and calibrating a design process model. Example applications illustrate the use of the methodology in analyzing and improving software and hardware design processes.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Documentation; Human Factors; Management; Measurement", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "management science; sensitivity analysis; workflow", subject = "Computer Applications --- Computer-Aided Engineering (J.6); Computing Milieux --- Computers and Education --- Computer and Information Science Education (K.3.2)", } @Article{Khordoc:1998:SVA, author = "K. Khordoc and E. Cerny", title = "Semantics and verification of action diagrams with linear timing", journal = j-TODAES, volume = "3", number = "1", pages = "21--50", month = jan, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p21-khordoc/p21-khordoc.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p21-khordoc/", abstract = "Specifications containing linear timing constraints, such as found in action diagrams (timing diagrams) defining interface behaviors, are often used in practice. Although efficient $ O(n 3) $ shortest path algorithms exist for computing the minimum and maximum time distances between actions, subject to the timing constraints, there is so far no accurate method that can decide (a) whether a specification of this kind is realizable (i.e., can be simulated by a causal system), and (b) given the action diagrams of the interfaces of two or more communicating systems, whether the systems implementing such independent specifications will correctly interoperate (i.e., satisfy the respective protocols and timing assumptions). First we illustrate the weakness of existing action diagram verification techniques: the causality issue is not addressed, and the proposed methods to answer the compatibility (interoperability) question yield false negative answers in many practical situations. We then define the meaning of causality in an action diagram specification and state a set of sufficient conditions for causality to hold. This development then leads to an exact procedure for the verification of the interface compatibility of communicating action diagrams. the results are illustrated on a practical example.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "causality; compatibility of interfaces; hardware interfaces; timing diagrams; timing verification", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2); Software --- Software Engineering --- Requirements/Specifications (D.2.1)", } @Article{Liao:1998:NVC, author = "S. Liao and K. Keutzer and S. Tjiang and S. Devadas", title = "A new viewpoint on code generation for directed acyclic graphs", journal = j-TODAES, volume = "3", number = "1", pages = "51--75", month = jan, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p51-liao/p51-liao.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p51-liao/", abstract = "We present a new viewpoint on code generation for directed acyclic graphs (DAGs). Our formulation is based on {\em binate covering}, the problem of satisfying, with minimum cost, a set of disjunctive clauses, and can take into account commutativity of operators and of the machine model. An important contribution of this work is a set of necessary and sufficient conditions for a valid schedule to be derived, based on the notion of {\em worms\/} and {\em worm-partitions}. This set of conditions can be compactly expressed with clauses that relate scheduling to code selection. For the case of one-register machines, we can derive clauses that lead to generation of optimal code for the DAG. Recent advances in exact binate covering algorithms allows us to use this strategy to generate optimal code for large basic blocks. The optimal code generated by our algorithm results in significant reductions in overall code size.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "binate covering; code generation; directed acyclic graphs", subject = "Software --- Programming Languages --- Processors (D.3.4); Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2)", } @Article{Shi:1998:CCT, author = "C.-J. Shi and J. A. Brzozowski", title = "Cluster-cover a theoretical framework for a class of {VLSI-CAD} optimization problems", journal = j-TODAES, volume = "3", number = "1", pages = "76--107", month = jan, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p76-shi/p76-shi.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p76-shi/", abstract = "This article introduces a mathematical framework called cluster-cover. We show that this framework captures the combinatorial structure of a class of VLSI design optimization problems, including two-level logic minimization, constrained encoding, multilayer topological planar routing, application timing assignment for delay-fault testing, and minimization of monitoring logic for BIST enhancement. These apparently unrelated problems can all be cast into two metaproblems in our framework: finding a maximum cluster and finding a minimum cover. We describe paradigms for developing algorithms for these problems. First, a simple heuristic called greedy peeling is presented and characterized. We derive sufficient conditions that guarantee optimum solutions by greedy peeling. We generalize the performance analysis of a multilayer topological planar routing heuristic to greedy peeling for the general cluster-cover problems. We propose a performance bound of greedy set covering that can be computed efficiently for a given problem instance; this bound is much tighter than the previously known bounds. Second, prime covering --- originally developed for logic minimization --- is generalized to finding exact solutions for cluster-cover problems. Previously, only the connection between logic minimization and constrained encoding was known.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "cluster-cover; logic minimization; NP-completeness; self-checking logic design; state assignment; topological routing", subject = "Hardware --- Logic Design --- Design Aids (B.6.3); Hardware --- Integrated Circuits --- General (B.7.0); Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2)", } @Article{Hsiung:1998:IIC, author = "Pao-Ann Hsiung and Chung-Hwang Chen and Trong-Yen Lee and Sao-Jie Chen", title = "{ICOS}: an intelligent concurrent object-oriented synthesis methodology for multiprocessor systems", journal = j-TODAES, volume = "3", number = "2", pages = "109--135", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p109-hsiung/p109-hsiung.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p109-hsiung/", abstract = "The design of multiprocessor architectures differs from uniprocessor systems in that the number of processors and their interconnection must be considered. This leads to an enormous increase in the design-space exploration time, which is exponential in the total number of system components. The methodology proposed here, called {\em Intelligent Concurrent Object-Oriented Synthesis\/} (ICOS) methodology, makes feasible the synthesis of complex multiprocessor systems through the application of several techniques that speed up the design process. ICOS is based on {\em Performance Synthesis Methodology\/} (PSM), a recently proposed object-oriented system-level design methodology. Four major techniques: object-oriented design, fuzzy design-space exploration, concurrent design, and intelligent reuse of complete subsystems are integrated in ICOS. First, object-oriented modeling and design, through the use of object-oriented relationships and operators, make the whole design process manageable and maintainable in ICOS. Second, fuzzy comparison applied to the specializations or instances of components reduces the exponential growth of design-space exploration in ICOS. Third, independent components from different design alternatives are synthesized in parallel; this design concurrency shortens the overall design time. Lastly, the resynthesis of complete subsystems can be avoided through the application of learning, thus making the methodology intelligent enough to reuse previous design configurations. Experiments show that all these applied techniques contribute to the synthesis efficiency and the degree of automation in ICOS.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "concurrent object-oriented system-level synthesis; fuzzy design-space exploration; learning", subject = "Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Computing Methodologies --- Artificial Intelligence --- Learning (I.2.6): {\bf Knowledge acquisition}; Computing Methodologies --- Artificial Intelligence --- Learning (I.2.6): {\bf Analogies}; Computing Methodologies --- Artificial Intelligence --- Deduction and Theorem Proving (I.2.3): {\bf Deduction}; Computer Systems Organization --- Processor Architectures --- Multiple Data Stream Architectures (Multiprocessors) (C.1.2)", } @Article{Araujo:1998:CGF, author = "Guido Araujo and Sharad Malik", title = "Code generation for fixed-point {DSPs}", journal = j-TODAES, volume = "3", number = "2", pages = "136--161", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p136-araujo/p136-araujo.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p136-araujo/", abstract = "This paper examines the problem of code-generation for Digital Signal Processors (DSPs). We make two major contributions. First, for an important class of DSP architectures, we propose an optimal $ O(n) $ algorithm for the tasks of register allocation and instruction scheduling for expression trees. Optimality is guaranteed by sufficient conditions derived from a structural representation of the processor Instruction Set Architecture (ISA). Second, we develop heuristics for the case when basic blocks are Directed Acyclic Graphs (DAGs).", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code generation; register allocation; scheduling", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}; Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}", } @Article{Tiruvuri:1998:ELB, author = "Giri Tiruvuri and Moon Chung", title = "Estimation of lower bounds in scheduling algorithms for high-level synthesis", journal = j-TODAES, volume = "3", number = "2", pages = "162--180", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p162-tiruvuri/p162-tiruvuri.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p162-tiruvuri/", abstract = "To produce efficient design, a high-level synthesis system should be able to analyze a variety of cost-performance tradeoffs. The system can use lower-bound performance estimated methods to identify and prune inferior designs without producing complete designs. We present a lower-bound performance estimate method that is not only faster than existing methods, but also produces better lower bounds. In most cases, the lower bound produced by our algorithm is tight. \par Scheduling algorithms such as branch-and-bound need fast and effective lower-bound estimate methods, often for a large number of partially scheduled dataflow graphs, to reduce the search space. We extend our method to efficiently estimate completion time of partial schedules. This problem is not addressed by existing methods in the literature. Our lower-bound estimate is shown to by very effective in reducing the size of the search space when used in a branch-and-bound scheduling algorithm. \par Our methods can handle multicycle operations, pipelined functional units, and chaining of operations. We also present an extension to handle conditional branches. A salient feature of the extended method is its applicability to speculative execution as well as C-select implementation of conditional branches.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "dynamic programming; high-level synthesis; lower-bound estimated; scheduling", subject = "Hardware --- Register-Transfer-Level Implementation --- General (B.5.0); Hardware --- Performance and Reliability --- Performance Analysis and Design Aids (B.8.2); Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Sequencing and scheduling}; Hardware --- Integrated Circuits --- General (B.7.0)", } @Article{Vahid:1998:FPI, author = "Frank Vahid and Thuy Dm Le and Yu-Chin Hsu", title = "Functional partitioning improvements over structural partitioning for packaging constraints and synthesis: tool performance", journal = j-TODAES, volume = "3", number = "2", pages = "181--208", month = apr, year = "1998", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/290833.290841", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p181-vahid/p181-vahid.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p181-vahid/", abstract = "Incorporating functional partitioning into a synthesis methodology leads to several important advantages. In functional partitioning, we first partition a functional specification into smaller subspecifications and then synthesize structure for each, in contrast to the current approach of first synthesizing structure for the entire specification and then partitioning that structure. One advantage is the improvement in I/O performance and package count, when partitioning among hardware blocks with size and I/O constraints, such as FPGAs or blocks within an ASIC. A second advantage is reduction in synthesis runtimes. We describe these important advantages, concluding that further research on functional partitioning can lead to improved results from synthesis environments.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "behavioral synthesis; functional partitioning; system-level design", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Hardware description languages}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Koch:1998:BBD, author = "Gernot H. Koch and W. Rosenstiel and U. Kebschull", title = "Breakpoints and breakpoint detection in source-level emulation", journal = j-TODAES, volume = "3", number = "2", pages = "209--230", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p209-koch/p209-koch.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p209-koch/", abstract = "We present an approach for accelerating the validation speed of behavioral system descriptions through hardware emulation. The method allows source-level debugging of running hardware specified in behavioral VH DL in a way similar to source-leve debugging in software programming languages. We discuss breakpoints in source-level emulation and how the circuit generated by high-level synthesis has to be modified to work with breakpoints. Breakpoint encoding and detection are shown in detail. Our approach allows breakpoint detection by hardware with seriously slowing the circuit or dramatically increasing its size.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "debugging; emulation; high-level synthesis", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf VHDL}", } @Article{Pomeranz:1998:FTG, author = "Irith Pomeranz and Sudhakar M. Reddy", title = "Functional test generation for delay faults in combinational circuits", journal = j-TODAES, volume = "3", number = "2", pages = "231--248", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p231-pomeranz/p231-pomeranz.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p231-pomeranz/", abstract = "We propose a functional fault model for delay faults in combinational circuits and describe a functional test generation procedure based on this model. The proposed method is most suitable when a gate-level description of the circuit-under-test, necessary for employing existing gate-level delay fault test generators, is not available or does not accurately describe the circuit. It is also suitable for generating tests in early design stages of a circuit, before a gate-level implementation is selected. In addition, it can potentially be employed to supplement conventional test generators for gate-level circuits to reduce the cost of handling large numbers of paths. A parameter called is used to control the number of functional faults targeted and thus the number of tests generated. If is unlimited, the functional test set detects every robustly testable path delay fault in any gate-level implementation of the given ciruit. An appropriate subset of tests can be selected once the implementation is known. The test sets generated for various values of are fault simulated on gate-level realizations to demonstrate their effectiveness. The experiments indicate that functional test sets may be able to identify functions whose realizations have low path delay fault coverage.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "delay faults; function-robust tests; functional delay fault model; path delay faults; robust tests", subject = "Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1)", } @Article{Chen:1998:SDI, author = "X. T. Chen and F. J. Meyer and F. Lombardi", title = "Structural diagnosis of interconnects by coloring", journal = j-TODAES, volume = "3", number = "2", pages = "249--271", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p249-chen/p249-chen.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p249-chen/", abstract = "This paper presents a new approach for diagnosing shorts in interconnects in which the adjacencies between nets are known. This structural approach exploits different graph coloring techniques to generate a test set with no aliasing and confounding, i.e., full diagnosis (detection and location) is accomplished. Initially, a simple coloring approach based on a greedy condition of the adjacency graph is proposed for fault detection. Then, the conditions for aliasing and confounding are analyzed with respect to the sizes of the possible shorts. These results are used to generate new colors using a process called color mixing. Color mixing guarantees that additional tests, required in order to avoid aliasing/confounding, will use appropriate codes. The characteristics of unbalanced/balanced codes for encoding the colors in the vector-generation process of interconnect diagnosis are discussed and are proved to yield full diagnosis using a novel method. An algorithm for full diagnosis is then presented; this algorithm has an execution complexity of $ O(\max (N^2, N \times D^3)) $ where $N$ is the number of nets and $D$ is the maximum degree of the nodes in the adjacency graph. Simulation results show that the proposed approach requires a smaller number of test vectors than previous approaches.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "balanced code; diagnosis; graph coloring; interconnect; syndrome", subject = "Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2); Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1)", } @Article{Mehta:1998:ESR, author = "Dinesh P. Mehta", title = "Estimating the storage requirements of the rectangular and {L-shaped} corner stitching data structures", journal = j-TODAES, volume = "3", number = "2", pages = "272--284", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p272-mehta/p272-mehta.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p272-mehta/", abstract = "This paper proposes a technique for estimating the storage requirements of the Rectangular Corner Stitching (RCS) data structure [Ousterhout 1984] and the L-shaped Corner Stitching (LCS) data structure [Mehta and Blust 1997] on a given circuit by studying its (the circuit's) geometric properties. This provides a method for estimating the storage requirements of a circuit without having to implement the corner stitching data structure, which is a tedious and time-consuming task. This technique can also be used to estimate the amount of space saved by employing the LCS data structure over the RCS data structure on a given circuit.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "corner stitching; data structures; L-shapes; memory requirements analysis; rectangle; rectilinear polygons", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}; Data --- Data Storage Representations (E.2): {\bf Linked representations}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Geometrical problems and computations}", } @Article{Bhattacharya:1998:ERS, author = "Subhrajit Bhattacharya and Sujit Dey and Franc Breglez", title = "Effects of resource sharing on circuit delay: an assignment algorithm for clock period optimization", journal = j-TODAES, volume = "3", number = "2", pages = "285--307", month = apr, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p285-bhattacharya/p285-bhattacharya.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p285-bhattacharya/", abstract = "This paper analyzes the effect of resource sharing and assignment on the clock period of the synthesized circuit. The assignment phase assigns or binds operations of the scheduled behavioral description to a set of allocated resources. We focus on control-flow intensive descriptions, characterized by the presence of mutually exclusive paths due to the presence of nested conditional branches and loops. \par We show that clustering multiple operations in the same state of the schedule, possibly leading to chaining of functional units (FUs) in the RTL circuit, is an effective way to minimize the total number of clock cycles, and hence total execution time. We present an assignment algorithm that is particularly effective for such design styles by minimizing data chaining and hence the clock period of the circuit, thereby leading to further reduction in total execution time. \par Existing resource sharing and assignment approaches for reducing the clock period of the resulting circuit either increase the resource allocation or use faster modules, both leading to leading to larger area requirements. In this paper we show that even when the type of available resource units and the number of resource units of each type is fixed, different assignments may lead to circuits with significant differences in clock period. \par We provide a comprehensive analysis of how resource sharing and assignment introduces long paths in the circuit. Based on the analysis, we develop an assignment algorithm that uses a high-level delay estimator to assign operations to a fixed set of available resources so as to minimize the clock period of the resultant circuit, with no or minimal effect on the area of the circuit. Experimental results on several conditional-intensive designs demonstrate the effectiveness of the assignment algorithm.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "clock period; high-level synthesis; resource sharing", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}", } @Article{Cabodi:1998:AVB, author = "Gianpiero Cabodi and Paolo Camurati and Stefano Quer", title = "Auxiliary variables for {BDD-based} representation and manipulation of {Boolean} functions", journal = j-TODAES, volume = "3", number = "3", pages = "309--340", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p309-cabodi/p309-cabodi.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p309-cabodi/", abstract = "BDDs are the state-of-the-art technique for representing and manipulating Boolean functions. Their introduction caused a major leap forward in synthesis, verification, and testing. However, they are often unmanageable because of the large amount of nodes. To attack this problem, we insert auxiliary variables that decompose monolithic BDDs in smaller ones. This method works very well for Boolean function representation. As far as combinational circuits are concerned, representing their functions is the main issue. Going into the sequential domain, we focus on traversal techniques. We show that, once we have Boolean functions in decomposed form, symbolic manipulations are viable and efficient. We investigate the relation between auxiliary variables and static and dynamic ordering strategies. Experimental evidence shows that we achieve a certain degree of independence from variable ordering. Thus, this approach can be an alternative to dynamic re-ordering. Experimental results on Boolean function representation, and exact and approximate forward symbolic traversal of FSMs, demonstrate the benefits both in terms of memory requirements and of CPU time.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "binary decision diagrams; finite state machines; functional decompositions; reachability analysis", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Verification}", } @Article{Cong:1998:BSC, author = "Jason Cong and Andrew B. Kahng and Cheng-Kok Koh and C.-W. Albert Tsao", title = "Bounded-skew clock and {Steiner} routing", journal = j-TODAES, volume = "3", number = "3", pages = "341--388", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p341-cong/p341-cong.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p341-cong/", abstract = "We study the minimum-cost bounded-skew routing tree problem under the pathlength (linear) and Elmore delay models. This problem captures several engineering tradeoffs in the design of routing topologies with controlled skew. Our bounded-skew routing algorithm, called the BST/DME algorithm, extends the DME algorithm for exact zero-skew trees via the concept of {\em a merging region}. For a {\em prescribed topology}, BST/DME constructs a bounded-skew tree (BST) in two phases: (i) a bottom-up phase to construct a binary tree of merging regions which represent the loci of possible embedding points of the internal nodes, and (ii) a top-down phase to determine the exact locations of the internal nodes. We present two approaches to construct the merging regions: (i) the {\em Boundary Merging and Embedding\/} (BME) method which utilizes merging points that are restricted to the {\em boundaries\/} of merging regions, and (ii) the {\em Interior Merging and Embedding\/} (IME) algorithm which employs a sampling strategy and a dynamic programming-based selection technique to consider merging points that are {\em interior\/} to, as well as on the boundary of, the merging regions. When the topology is not prescribed, we propose a new {\em Greedy\/}-BST/DME algorithm which combines the merging region computation with topology generation. The Greedy-BST/DME algorithm very closely matches the best known heuristics for the zero-skew case and for the unbounded-skew case (i.e., the Steiner minimal tree problem). Experimental results show that our BST algorithms can produce a set of routing solutions with smooth skew and wire length tradeoffs.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "(inter)connection; boundary merging and embedding; bounded-skew; clock tree; Elmore delay; interior merging and embedding; low power; merging region; merging segment; pathlength delay; Steiner tree; synchronization; VLSI; zero-skew", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Jone:1998:CAD, author = "Wen-Ben Jone and K. S. Tsai", title = "Confidence analysis for defect-level estimation of {VLSI} random testing", journal = j-TODAES, volume = "3", number = "3", pages = "389--407", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p389-jone/p389-jone.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p389-jone/", abstract = "The defect level in circuit testing is the percentage of circuits such as chips, that are defective and shipped for use after testing. Our previously published results showed that the defect level of circuit fabrication and testing should be a probability distribution, rather than a single value, and the concept of confidence degree was proposed [Gondalia et al. 1993; Jone et al. 1995]. In this work, defect level is represented by a confidence interval which is more conventional and easier to interpret. The point estimate of defect level analysis and conditions to avoid meaningless confidence intervals are also investigated. Methods for adaptive random test length determination driven by different confidence intervals or interval length are proposed to meet both test requirements and test costs tradeoff. Finally, a complete test plan that can direct the test flow from fabrication infancy to maturity is suggested.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Experimentation; Measurement; Performance; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "defect level analysis; random testing; test confidence analysis; test quality; VLSI testing", subject = "Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1)", } @Article{Mathur:1998:RAE, author = "Anmol Mathur and Ali Dasdan and Rajesh K. Gupta", title = "Rate analysis for embedded systems", journal = j-TODAES, volume = "3", number = "3", pages = "408--436", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p408-mathur/p408-mathur.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p408-mathur/", abstract = "Embedded systems consist of interacting components that are required to deliver a specific functionality under constraints on execution rates and relative time separation of the components. In this article, we model an embedded system using concurrent processes interacting through synchronization. We assume that there are rate constraints on the execution rates of processes imposed by the designer or the environment of the system, where the execution rate of a process is the number of its executions per unit time. We address the problem of computing bounds on the execution rates of processes constituting an embedded system, and propose an interactive rate analysis framework. As part of the rate analysis framework we present an efficient algorithms for checking the consistency of the rate constraints. Bounds on the execution rate of each process are computed using an efficient algorithm based on the relationship between the execution rate of a process and the maximum mean delay cycles in the process graph. Finally, if the computed rates violate some of the rate constraints, some of the processes in the system are redesigned using information from the rate analysis step. This rate analysis framework is implemented in a tool called RATAN. We illustrate by an example how RATAN can be used in an embedded system design.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "average execution rate; concurrent system modeling; embedded systems; interactive rate violation debugging; rate analysis; rate constraints", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Modeling techniques}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Performance attributes}; Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Design studies}", } @Article{Pan:1998:OCP, author = "Peichen Pan and C. L. Liu", title = "Optimal clock period {FPGA} technology mapping for sequential circuits", journal = j-TODAES, volume = "3", number = "3", pages = "437--462", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p437-pan/p437-pan.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p437-pan/", abstract = "We study the technology mapping problem for sequential circuits for look-up table (LUT) based field programmable gate arrays (FPGAs). Existing approaches to the problem simply remove the flip-flops (FFs), then map the remaining combinational logic, and finally put the FFs back. These approaches ignore the sequential nature of a circuit and assume the positions of the FFs are fixed. However, FFs in a sequential circuit can be repositioned by a functionality-preserving transformation called retiming. As a result, existing approaches can only consider a very small portion of the available solution space. We propose in this paper a novel approach to the technology mapping problem. In our approach, retiming is integrated into the technology mapping process so as to consider the full solution space. We then present a polynomial technology mapping algorithm that, for a given circuit, produces a mapping solution with the minimum clock period among all possible ways of retiming. The effectiveness of the algorithm is also demonstrated experimentally.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "clock period; field-programmable gate arrays; FPGAs; logic replication; look-up tables; retiming; sequential synthesis; technology mapping", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}", } @Article{Riepe:1998:EBD, author = "Michael A. Riepe and Karem A. Sakallah", title = "The edge-based design rule model revisited", journal = j-TODAES, volume = "3", number = "3", pages = "463--486", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p463-riepe/p463-riepe.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p463-riepe/", abstract = "A model for integrated circuit design rules based on rectangle edge constraints has been proposed by Jeppson, Christensson, and Hedenstierna. This model appears to be the most rigorous proposed to date for the description of such edge-based design rules. However, in certain rare circumstances their model is unable to express the correct design rule when the constrained edges are not adjacent in the layout. We introduce a new notation, called an edge path, which allows us to extend their model to allow for constraints between edges separated by an arbitrary number of intervening edges. Using this notation we enumerate all edge paths that are required to correctly model the original design rule macros of the JCH model, and prove that these macros are sufficient to model the most common rules. We also show how this notation allows us to directly specify many kinds of conditional design rules that required ad hoc specification under the JCH model.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design rule checking; design rules; layout verification", subject = "Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}", } @Article{Su:1998:EFL, author = "Alan Su and Yu-Chin Hsu and Ta-Yung Liu and Mike Tien-Chien Lee", title = "Eliminating false loops caused by sharing in control path", journal = j-TODAES, volume = "3", number = "3", pages = "487--495", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p487-su/p487-su.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p487-su/", abstract = "In high-level synthesis, resource sharing may result in a circuit containing false loops that create great difficulty in timing validation during the design sign-off phase. It is hence desirable to avoid generating any false loops in a synthesized circuit. Previous work [Stok 1992; Huang et al. 1995] considered mainly data path sharing for false loop elimination. However, for a complete circuit with both data path and control path, false loops can be created due to control logic sharing. In this article, we present a novel approach to detect and eliminate the false loops caused by control logic sharing. An effective filter is devised to reduce the computational complexity of false loop detection, which is based on checking the level numbers that are propagated from data path operators to inputs and outputs of the control path. Only the input/output pairs of the control path identified by the filter are further investigated by traversing into the data path for false loop detection. A removal algorithm is then applied to eliminate the detected false loops, followed by logic minimization to further optimize the circuit. Experimental results show that for the nine example circuits we tested, the final designs after false loop removal and logic minimization give only slightly larger area than the original ones that contain false loops.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "control path; false loop", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Hardware description languages}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Verification}", } @Article{Zhou:1998:ORR, author = "Hai Zhou and D. F. Wong", title = "Optimal river routing with crosstalk constraints", journal = j-TODAES, volume = "3", number = "3", pages = "496--514", month = jul, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p496-zhou/p496-zhou.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p496-zhou/", abstract = "With the increasing density of VLSI circuits, the interconnection wires are being packed even closer. This has increased the effect of interaction among these wires on circuit performance and hence, the importance of controlling crosstalk. In this article, we consider river routing with crosstalk constraints. Given the positions of the pins in a single-layer routing channel and the maximum tolerable crosstalk between each pair of neighboring nets, we give a polynomial time algorithm to decide whether there is a feasible river routing solution and produce one with minimum crosstalk when it is feasible.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "crosstalk; river routing", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Network problems}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Passerone:1998:MRS, author = "C. Passerone and C. Sansoe and L. Lavagno and R. McGeer and J. Martin and R. Passerone and A. Sangiovanni-Vincentelli", title = "Modeling reactive systems in {Java}", journal = j-TODAES, volume = "3", number = "4", pages = "515--523", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p515-passerone/p515-passerone.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p515-passerone/", abstract = "We present an application of the Java TM programming language to specify and implement reactive real-time systems. We have developed and tested a collection of classes and methods to describe concurrent modules and their asynchronous communication by means of signals. The control structures are closely patterned after those of the synchronous language {\em Esterel}, succinctly describing concurrency, sequencing and preemption. We show the user-friendliness and efficiency of the proposed technique by using an example from the automotive domain.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Languages; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "high level design; Java; prototyping; simulation", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Hardware description languages}; Computing Methodologies --- Simulation and Modeling --- Model Validation and Analysis (I.6.4); Computer Applications --- Physical Sciences and Engineering (J.2): {\bf Electronics}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Wang:1998:MEV, author = "Li-C. Wang and Magdy S. Abadir and Jing Zeng", title = "On measuring the effectiveness of various design validation approaches for {PowerPC} microprocessor embedded arrays", journal = j-TODAES, volume = "3", number = "4", pages = "524--532", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p524-wang/p524-wang.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p524-wang/", abstract = "Design validation for embedded arrays remains as a challenging problem in today's microprocessor design environment. At Somerset, validation of array designs relies on both formal verification and vector simulation. Although several methods for array design validation have been proposed and had great success [Ganguly et al. 1996; Pandey et al. 1996, 1997; Wang and Abadir 1997], little evidence has been reported for the effectiveness of these methods with respect to the detection of design errors. In this paper, we measure the effectiveness of different validation approaches based on automatic design error injection and simulation. The technique provides a systematic way to evaluate various validation approaches at both logic and transistor levels. Experimental results on recent PowerPC microprocessor arrays will be discussed and reported.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "assertion test generation; ATPG; design error model; logic verification; symbolic trajectory evaluation; validation", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Simulation}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Verification}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}", } @Article{Dasdan:1998:TDD, author = "Ali Dasdan and Dinesh Ramanathan and Rajesh K. Gupta", title = "A timing-driven design and validation methodology for embedded real-time systems", journal = j-TODAES, volume = "3", number = "4", pages = "533--553", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p533-dasdan/p533-dasdan.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p533-dasdan/", abstract = "We address the problem of timing constraint derivation and validation for reactive and real-time embedded systems. We assume that such a system is structured into its tasks, and the structure is modeled using a task graph. Our solution uses the timing behavior committed by the environment to the system first to derive the timing constraints on the system's internal behavior and then use them to derive and validate the timing constraints on the system's external behavior. Our solution consists of the following contributions: a generalized task graph model, a comprehensive classification of timing constraints, algorithms for derivation and validation of timing constraints of the system modeled in the generalized task graph model, a codesign methodology that combines the model and the algorithms, and the implementation of this methodology in a tool called RADHA-RATAN. The main advantages of our solution are that it simplifies the problem of ensuring timing correctness of the system by reducing the complexity of the problem from system level to task level, and that it makes the codesign methodology timing-driven in that our solution makes it possible to maintain a handle on the system's timing correctness from very early stages in the system's design flow.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "performance verification; period assignment; period derivation; rate assignment; rate derivation; requirements analysis; system-level design; timing analysis; timing-driven codesign", subject = "Computer Systems Organization --- General (C.0): {\bf Systems specification methodology}; Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Modeling techniques}; Computer Systems Organization --- Performance of Systems (C.4): {\bf Performance attributes}; Software --- Operating Systems --- Organization and Design (D.4.7): {\bf Real-time systems and embedded systems}; Software --- Operating Systems --- Performance (D.4.8): {\bf Modeling and prediction}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Rajan:1998:ASD, author = "S. P. Rajan and M. Fujita and K. Yuan and M. T-C. Lee", title = "{ATM} switch design by high-level modeling, formal verification and high-level synthesis", journal = j-TODAES, volume = "3", number = "4", pages = "554--562", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p554-rajan/p554-rajan.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p554-rajan/", abstract = "Asynchronous Transfer Mode (ATM) has emerged as a backbone for high-speed broadband telecommunication networks. In this paper, we present ATM switch design, starting from a parametric high-level model and debugging the model using a combination of formal verification and simulation. The model has been used to synthesize ATM switches according to customers' choices, by choosing concrete values for each of the generic parameters. We provide a pragmatic combination of simulation, model checking, and theorem proving to gain confidence in the ATM switch design correctness.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "ATM switch; high-level design; synthesis; verification", subject = "Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Huggins:1998:SVP, author = "James K. Huggins and David {Van Campenhout}", title = "Specification and verification of pipelining in the {ARM2} {RISC} microprocessor", journal = j-TODAES, volume = "3", number = "4", pages = "563--580", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p563-huggins/p563-huggins.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p563-huggins/", abstract = "Gurevich Abstract State Machines (ASMs) provide a sound mathematical basis for the specification and verification of systems. An application of the ASM methodology to the verification of a pipelined microprocessor (an ARM2 implementation) is described. Both the sequential execution model and final pipelined model are formalized using ASMs. A series of intermediate models are introduced that gradually expose the complications of pipelining. The first intermediate model is proven equivalent to the sequential model in the absence of structural, control, and data hazards. In the following steps, these simplifying assumptions are lifted one by one, and the original proof is refined to establish the equivalence of each intermediate model with the sequential model, leading ultimately to a full proof of equivalence of the sequential and pipelined models.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "abstract state machines; ARM processor; design verification; formal verification; pipelined processors; pipelining", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2); Computer Systems Organization --- General (C.0): {\bf Systems specification methodology}; Computer Systems Organization --- Processor Architectures --- Single Data Stream Architectures (C.1.1)", } @Article{VanCampenhout:1998:HLD, author = "D. {Van Campenhout} and H. Al-Asaad and J. P. Hayes and T. Mudge and R. B. Brown", title = "High-level design verification of microprocessors via error modeling", journal = j-TODAES, volume = "3", number = "4", pages = "581--599", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p581-campenhout/p581-campenhout.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p581-campenhout/", abstract = "A design verification methodology for microprocessor hardware based on modeling design errors and generating simulation vectors for the modeled errors via physical fault testing techniques is presented. We have systematically collected design error data from a number of microprocessor design projects. The error data is used to derive error models suitable for design verification testing. A class of basic error models is identified and shown to yield tests that provide good coverage of common error types. To improve coverage for more complex errors, a new class of conditional error models is introduced. An experiment to evaluate the effectiveness of our methodology is presented. Single actual design errors are injected into a correct design, and it is determined if the methodology will generate a test that detects the actual errors. The experiment has been conducted for two microprocessor designs and the results indicate that very high coverage of actual design errors can be obtained with test sets that are complete for a small number of synthetic error models.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design errors; design verification; error modeling", subject = "Hardware --- General (B.0); Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2)", } @Article{Hasteer:1998:EEC, author = "G. Hasteer and A. Mathur and P. Banerjee", title = "Efficient equivalence checking of multi-phase designs using phase abstraction and retiming", journal = j-TODAES, volume = "3", number = "4", pages = "600--625", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p600-hasteer/p600-hasteer.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p600-hasteer/", abstract = "Equivalence checking of finite state machines (FSMs) traditionally assumes single phase machines where a single clock (implicit or explicit) synchronizes the state of the FSM. We extend the equivalence checking paradigm to FSMs with multi-phase clocks. Such designs are becoming increasingly popular in high performance microprocessors since they result in lower synchronization overhead. In addition, aggressive pipelining and the use of ``sparse'' encodings results in designs where the ratio of steady states to the total state space is very low. In this paper, we show that automatically transforming such designs to ones that have more ``dense'' encodings can result in significant benefits in using implicit BDD-based techniques for their verification. We explore two such techniques: {\em phase abstraction\/} and {\em retiming\/} and demonstrate their utility in the context of FSM equivalence checking. The main contributions of our work are: \par --We show that a multi-phase FSM can be transformed to a functionally equivalent one phase FSM and this phase abstraction leads to significant improvement in the size of FSMs that can be checked for equivalence. \par --We show that min-latch retiming preserves equivalence and can be performed efficiently in multi-phase designs, even when latch borrowing and discarding is allowed at the primary inputs and outputs. \par --We demonstrate the utility of our approach on several controller FSMs from the industry.", acknowledgement = ack-nhfb, annote = "Article title page incorrectly has Bannerjee instead of Banerjee.", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance; Theory; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "binary decision; diagram; encoding density; multi-phase FSM; product machine; sequential hardware equivalence; steady states", subject = "Hardware --- Logic Design --- Design Aids (B.6.3); Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Verification}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Benso:1998:ELC, author = "A. Benso and P. Prinetto and M. Rebaudengo and M. Sonza Reorda", title = "{EXFI}: a low-cost fault injection system for embedded microprocessor-based boards", journal = j-TODAES, volume = "3", number = "4", pages = "626--634", month = oct, year = "1998", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p626-benso/p626-benso.pdf; http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p626-benso/", abstract = "Evaluating the faulty behavior of low-cost embedded microprocessor-based boards is an increasingly important issue, due to their adoption in many safety critical systems. The architecture of a complete Fault Injection environment is proposed, integrating a module for generating a collapsed list of faults, and another for performing their injection and gathering the results. To address this issue, the paper describes a software-implemented Fault Injection approach based on the Trace Exception Mode available in most microprocessors. The authors describe EXFI, a prototypical system implementing the approach, and provide data about some sample benchmark applications. The main advantages of EXFI are the low cost, the good portability, and the high efficiency", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Measurement", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "fault coverage; fault injection; microprocessor systems; software-implemented fault injection; trace exception mode", subject = "Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1); Hardware --- Performance and Reliability --- Performance Analysis and Design Aids (B.8.2)", } @Article{Gasteier:1999:BBC, author = "Michael Gasteier and Manfred Glesner", title = "Bus-based communication synthesis on system level", journal = j-TODAES, volume = "4", number = "1", pages = "1--11", month = jan, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p1-gasteier/p1-gasteier.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p1-gasteier/", abstract = "In this article, we present an approach to automatic generation of communication topologies for statically scheduled systems of subsystems. Given a specification containing a set of processes that communicate via abstract send and receive functions, we show how a cost-efficient communication topology consisting of one or more buses without arbitration scheme can be set up for such applications.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bus generation; bus without arbitration; communication synthesis; statically scheduled systems; transfer scheduling", subject = "Hardware --- Input/Output and Data Communications --- Interconnections (Subsystems) (B.4.3)", } @Article{Liao:1999:TCB, author = "Stan Liao and Srinivas Devadas and Kurt Keutzer", title = "A text-compression-based method for code size minimization in embedded systems", journal = j-TODAES, volume = "4", number = "1", pages = "12--38", month = jan, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p12-liao/p12-liao.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p12-liao/", abstract = "We address the problem of code-size minimization in VLSI systems with embedded DSP processors. Reducing code size reduces the production cost of embedded systems \par we use data-compression methods to develop code-size minimization strategies. In our framework, the compressed program consists of a skeleton and a dictionary. We show that the dictionary can be computed by solving a set-covering problem derived from the original program. To execute the compressed code, we describe two methods that have different performance characteristics and different degrees of freedom in compressing the code. We also address performance considerations, and show that they can be incorporated easily into the set-covering formulation, and present experimental results obtained with Texas Instruments' optimizing TMS3220C25 compiler.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Experimentation; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code size optimization; compression", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}; Data --- Coding and Information Theory (E.4): {\bf Data compaction and compression}", } @Article{Song:1999:CDP, author = "Xiaoyu Song and Yuke Wang", title = "On the crossing distribution problem", journal = j-TODAES, volume = "4", number = "1", pages = "39--51", month = jan, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p39-song/p39-song.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p39-song/", abstract = "VLSI layout design is typically decomposed into four steps: {\em placement, global routing, routing region definition, and detailed routing}. The crossing distribution problem occurs prior to detailed routing [Groenveld 1989; Mared-Sadowska and Sarrafzadeh 1995; Wang and Shung 1992]. A {\em crossing\/} is defined as the intersection of two nets. The problem of net crossing distribution is important in layout design, such as design of dense chips, multichip modules (MCM), critical net routing, and analog circuits [Groenveld 1989; Sarrafzadeh 1995; Wang and Shung 1992]. It is observed that nets crossing each other are more difficult to route than those that do not cross. The layout of crossing nets has to be realized in more than two layers and requires a larger number of {\em vias}. In this paper we study the crossing distribution problem of two-terminal nets between two regions. We present an optimal $ O(n^2) $ time algorithm for two-sided nets, where n is the number of nets. Our results are superior to previous ones [Markek-Sadowska and Sarrafzadeh 1995; Wang and Shung 1992]. We give an optimal $ O(n^2) $ time algorithm for the crossing distribution problem with one-sided nets. We solve optimally the complete version of the crossing distribution problem for two-terminal nets in two regions that has not been studied before.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "crossings; VLSI layout", subject = "Hardware --- Integrated Circuits (B.7); Hardware --- Integrated Circuits --- Design Aids (B.7.2); Theory of Computation --- Analysis of Algorithms and Problem Complexity (F.2); Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Sequencing and scheduling}", } @Article{Tseng:1999:TLL, author = "Jyh-Mou Tseng and Jing-Yang Jou", title = "Two-level logic minimization for low power", journal = j-TODAES, volume = "4", number = "1", pages = "52--69", month = jan, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p52-tseng/p52-tseng.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p52-tseng/", abstract = "In this paper we present a complete Boolean method for reducing the power consumption in two-level combinational circuits. The two-level logic optimizer performs the logic minimization for low power targeting static PLA, general logic gates, and dynamic PLA implementations. We modify the Espresso algorithm by adding our heuristics, which bias logic minimization toward lowering power dissipation. In our heuristics, signal probabilities and transition densities are two important parameters. The experimental results are promising.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "logic synthesis; low power design; programmable logic array; two-level logic minimization", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Combinational logic}; Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Logic arrays}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}", } @Article{Vahid:1999:PCT, author = "Frank Vahid", title = "Procedure cloning: a transformation for improved system-level functional partitioning", journal = j-TODAES, volume = "4", number = "1", pages = "70--96", month = jan, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p70-vahid/p70-vahid.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p70-vahid/", abstract = "Functional partitioning assigns the functions of a system's program-like specification among system components, such as standard-software and custom-hardware processors. We introduce a new transformation, called procedure cloning, that significantly improves functional partitioning results. The transformation creates a clone of a procedure for sole use by a particular procedure caller, so the clone can be assigned to the caller's processor, which in turn improves performance through reduced communication. Heuristics are used to prevent the exponential size increase that could occur if cloning were done indiscriminately. We introduce a variety of cloning heuristics, highlight experiments demonstrating the improvements obtained using cloning, and compare the various cloning heuristics.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "behavioral synthesis; embedded systems; functional partitioning; hardware/software codesign; replication; system-level design; system-on-a-chip; transformations", subject = "Hardware --- General (B.0); Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Hardware description languages}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Wang:1999:PRP, author = "Qi Wang and Sarma B. K. Vrudhula and Gary Yeap and Shantanu Ganguly", title = "Power reduction and power-delay trade-offs using logic transformations", journal = j-TODAES, volume = "4", number = "1", pages = "97--121", month = jan, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p97-wang/p97-wang.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p97-wang/", abstract = "We present an efficient technique to reduce the switching activity in a technology-mapped CMOS combinational circuit based on local logic transformations. The transformations consist of adding redundant connections or gates so as to reduce switching activity. We describe simple and efficient procedures, based on logic implication, for identifying the sources and targets of the redundant connections. Additionally, we give procedures that permit the designer to trade-off power and delay after the transformations. Results of experiments on both the MCNC benchmark circuits and the circuits of a PowerPC microprocessor chip are given. The results indicate that significant power reduction of a CMOS combinational circuit can be achieved with very low area overhead, delay penalty, and computational cost.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "CMOS logic; logic optimization; logic synthesis; low power; power estimation", subject = "Hardware --- Integrated Circuits --- General (B.7.0); Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Combinational logic}", } @Article{Kern:1999:FVH, author = "Christoph Kern and Mark R. Greenstreet", title = "Formal verification in hardware design: a survey", journal = j-TODAES, volume = "4", number = "2", pages = "123--193", month = apr, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p123-kern/p123-kern.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p123-kern/", abstract = "In recent years, formal methods have emerged as an alternative approach to ensuring the quality and correctness of hardware designs, overcoming some of the limitations of traditional validation techniques such as simulation and testing. \par There are two main aspects to the application of formal methods in a design process: the formal framework used to specify desired properties of a design and the verification techniques and tools used to reason about the relationship between a specification and a corresponding implementation. We survey a variety of frameworks and techniques proposed in the literature and applied to actual designs. The specification frameworks we describe include temporal logics, predicate logic, abstraction and refinement, as well as containment between $ \omega $-regular languages. The verification techniques presented include model checking, automata-theoretic techniques, automated theorem proving, and approaches that integrate the above methods. \par In order to provide insight into the scope and limitations of currently available techniques, we present a selection of case studies where formal methods were applied to industrial-scale designs, such as microprocessors, floating-point hardware, protocols, memory subsystems, and communications hardware.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "case studies; formal methods; formal verification; hardware verification; language containment; model checking; survey; theorem proving", subject = "General Literature --- Introductory and Survey (A.1); Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}", } @Article{Lee:1999:BBI, author = "Kuen-Jong Lee and Jing-Jou Tang and Tsung-Chu Huang", title = "{BIFEST}: a built-in intermediate fault effect sensing and test generation system for {CMOS} bridging faults", journal = j-TODAES, volume = "4", number = "2", pages = "194--218", month = apr, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p194-lee/p194-lee.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p194-lee/", abstract = "This paper presents BIFEST, an ATPG system that employs the built-in intermediate voltage test technique in an efficient ATPG process to deal with CMOS bridging faults. Fast and accurate calculations of the intermediate bridging voltages and the variant threshold tolerance margins on a resistive bridging fault model are presented. A PODEM-like, PPSFP-based ATPG process is developed to generate test patterns for faults that are conventionally logic-testable. The remaining faults are then dealt with by special circuits, called built-in intermediate voltage sensors (BIVSs). By this methodology, almost the same fault coverage as that employing {\em I DDQ\/} testing can be achieved with only logic monitoring required.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Integrated Circuits --- General (B.7.0)", } @Article{Thornton:1999:BSC, author = "M. A. Thornton and V. S. S. Nair", title = "Behavioral synthesis of combinational logic using spectral-based heuristics", journal = j-TODAES, volume = "4", number = "2", pages = "219--230", month = apr, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p219-thornton/p219-thornton.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p219-thornton/", abstract = "A prototype system developed to convert a behavioral representation of a Boolean function in OBDD form into an initial structural representation is described and experimental results are given. The system produces a multilevel circuit using heuristic rules based on properties of a subset of spectral coefficients. Since the behavioral description is in OBDD form, efficient methods are used to quickly compute the small subset of spectral coefficients needed for the application of the heuristics. The heuristics guide subsequent decompositions of the OBDD, resulting in an iterative construction of the structural form. At each stage of the translation, the form of the decomposition is chosen in order to achieve optimization goals.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "automatic synthesis; decision diagram; decision diagrams; design aids; logic design; spectral methods", subject = "Hardware --- Logic Design --- Design Aids (B.6.3)", } @Article{Cheng:1999:CGN, author = "Wei-Kai Cheng and Youn-Long Lin", title = "Code generation of nested loops for {DSP} processors with heterogeneous registers and structural pipelining", journal = j-TODAES, volume = "4", number = "3", pages = "231--256", month = jul, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p231-cheng/p231-cheng.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p231-cheng/", abstract = "We propose a microcode-optimizing method targeting a programmable DSP processor. Efficient generation of microcodes is essential to better utilize the computation power of a DSP processor. Since most state-of-the-art DSP processors feature some sort of irregular architectures and most DSP applications have nested loop constructs, their code generation is a nontrivial task. In this paper, we consider two features frequently found in contemporary DSP processors -- structural pipelining and heterogeneous registers. We propose a code generator that performs instruction scheduling and register allocation simultaneously. The proposed approach has been implemented and evaluated using a set of benchmark core algorithms. Simulation of the generated codes targeted towards the TI TMS320C40 DSP processor shows that our system is indeed more effective compared with a commercial optimizing DSP compiler.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code generation; DSP", subject = "Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}", } @Article{Li:1999:PEE, author = "Yau-Tsun Steven Li and Sharad Malik and Andrew Wolfe", title = "Performance estimation of embedded software with instruction cache modeling", journal = j-TODAES, volume = "4", number = "3", pages = "257--279", month = jul, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p257-li/p257-li.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p257-li/", abstract = "Embedded systems generally interact in some way with the outside world. This may involve measuring sensors and controlling actuators, communicating with other systems, or interacting with users. These functions impose real-time constraints on system design. Verification of these specifications requires computing an upper bound on the worst-case execution time (WCET) of a hardware/software system. Furthermore, it is critical to derive a tight upper bound on WCET in order to make efficient use of system resources. \par The problem of bounding WCET is particularly difficult on modern processors. These processors use cache-based memory systems that vary memory access time based on the dynamic memory access pattern of the program. This must be accurately modeled in order to tightly bound WCET. Several analysis methods have been proposed to bound WCET on processors with instruction caches. Existing approaches either search all possible program paths, an intractable problem, or they use highly pessimistic assumptions to limit the search space. In this paper we present a more effective method for modeling instruction cache activity and computing a tight bound on WCET. The method uses an integer linear programming formulation and does not require explicit enumeration of program paths. The method is implemented in the program {\tt cinderella} and we present some experimental results of this implementation.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Computer Systems Organization --- Performance of Systems (C.4): {\bf Modeling techniques}; Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}", } @Article{Shi:1999:SSL, author = "C.-J. Richard Shi and Michael W. Tian", title = "Simulation and sensitivity of linear analog circuits under parameter variations by {Robust} interval analysis", journal = j-TODAES, volume = "4", number = "3", pages = "280--312", month = jul, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p280-shi/p280-shi.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p280-shi/", abstract = "An interval-mathematic approach is presented for frequency-domain simulation and sensitivity analysis of linear analog circuits under parameter variations. With uncertain parameters represented as intervals, bounding frequency-domain responses is formulated as the problem of solving systems of linear interval equations. The formulation is based on a variant of modified nodal analysis, and is particularly amenable to interval analysis. Some characterization of the solution sets of systems of linear interval equations are derived. With these characterizations, an elegant and efficient algorithm is proposed to solve systems of linear interval equations. While the widely used Monte Carlo approach requires many circuit simulations to achieve even moderate accuracy, the computational cost of the proposed approach is about twice that of one circuit simulation. The computed response bounds contain provably, or are usually very close to, the actual response bounds. Further, sensitivity under parameter variations can be computed from the response bounds at minor computational cost. The algorithms are implemented in SPICE3F5, using sparse-matrix techniques and tested on several practical analog circuits.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "interval mathematics; process variations; sensitivity; uncertainty; worst-case analysis", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}; Mathematics of Computing --- Numerical Analysis --- Numerical Linear Algebra (G.1.3): {\bf Linear systems (direct and iterative methods)}; Mathematics of Computing --- Numerical Analysis --- Numerical Linear Algebra (G.1.3): {\bf Sparse, structured, and very large systems (direct and iterative methods)}; Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided manufacturing (CAM)}", } @Article{Wurth:1999:FMO, author = "Bernd Wurth and Ulf Schlichtmann and Klaus Eckl and Kurt J. Antreich", title = "Functional multiple-output decomposition with application to technology mapping for lookup table-based {FPGAs}", journal = j-TODAES, volume = "4", number = "3", pages = "313--350", month = jul, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p313-wurth/p313-wurth.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p313-wurth/", abstract = "Functional decomposition is an important technique for technology mapping to look up table-based FPGA architectures. We present the theory of and a novel approach to functional disjoint decomposition of multiple-output functions, in which common subfunctions are extracted during technology mapping. \par While a Boolean function usually has a very large number of subfunctions, we show that not all of them are useful for multiple-output decomposition. We use a partition of the set of bound set vertices as the basis to compute {\em preferable\/} decomposition functions, which are sufficient for an optimal multiple-output decomposition. \par We propose several new algorithms that deal with central issues of functional multiple-output decomposition. First, an efficient algorithm to solve the variable partitioning problem is described. Second, we show how to implicitly compute all preferable functions of a single-output function and how to identify all common preferable functions of a multiple-output function. Due to implicit computation in the crucial steps, the algorithm is very efficient. Experimental results show significant reductions in area.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "assignable functions; Boolean functions; computer-aided design of VLSI; decomposition; FPGA technology; implicit BDD-based methods; mapping synthesis; multiple-output decomposition; preferable functions; subfunction sharing gain; subfunction sharing potential; TOS; variable partitioning for decomposition", subject = "Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}", } @Article{Benini:1999:SSC, author = "L. Benini and G. {De Micheli} and E. Macii and M. Poncino and R. Scarsi", title = "Symbolic synthesis of clock-gating logic for power optimization of synchronous controllers", journal = j-TODAES, volume = "4", number = "4", pages = "351--375", month = oct, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p351-benini/p351-benini.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p351-benini/", abstract = "Recent results have shown that dynamic power management is effective in reducing the total power consumption of sequential circuits. In this paper, we propose a bottom-up approach for the automatic extraction and synthesis of dynamic power management circuitry starting from structural logic-level specifications. Our techniques leverage the compact BDD-based representation of Boolean and pseudo-Boolean functions to detect idle conditions where the clock can be stopped without compromising functional correctness. Moreover, symbolic techniques allow accurate probabilistic computations; in particular, they enable the use of non-equiprobable primary input distributions, a key step in the construction of models that match the behavior of real hardware devices with a high degree of fidelity. The results are encouraging, since power savings of up to 34\% have been obtained on standard benchmark circuits.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}", } @Article{Choi:1999:FDA, author = "Kyumyung Choi and Steven P. Levitan", title = "A flexible datapath allocation method for architectural synthesis", journal = j-TODAES, volume = "4", number = "4", pages = "376--404", month = oct, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p376-choi/p376-choi.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p376-choi/", abstract = "We present a robust datapath allocation method that is flexible enough to handle constraints imposed by a variety of target architectures. Key features of this method are its ability to handle accurate modeling of datapath units and the simultaneous optimization of direct objective functions. The proposed method consists of a new binding model construction scheme and an optimization technique based on simulated annealing. To illustrate the flexibility of this method, two datapath allocation procedures have been developed for two problem environments: (1) a procedure that incorporates interconnection area and delay estimates, where floor-planning is tightly integrated into datapath allocation; and (2) a procedure that handles registers, register files, and multiport memories for data storage, as well as random and linear topologies for interconnection architectures. Results from these two applications show our method produces competitive designs for benchmark circuits, as well as being flexible enough to be used for a variety of different domains.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "allocation and binding; high-level synthesis", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Mathematics of Computing --- Numerical Analysis --- Optimization (G.1.6); Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}", } @Article{Hong:1999:POU, author = "Inki Hong and Miodrag Potkonjak and Ramesh Karri", title = "Power optimization using divide-and-conquer techniques for minimization of the number of operations", journal = j-TODAES, volume = "4", number = "4", pages = "405--429", month = oct, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p405-hong/p405-hong.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p405-hong/", abstract = "We introduce an approach for power optimization using a set of compilation and architectural techniques. The key technical innovation is a novel divide-and-conquer compilation technique to minimize the number of operations for general computations. Our technique optimizes not only a significantly wider set of computations than the previously published techniques, but also outperforms (or performs at least as well as other techniques) on all examples. Along the architectural dimension, we investigate coordinated impact of compilation techniques on the number of processors which provide optimal trade-off between cost and power. We demonstrate that proper compilation techniques can significantly reduce power with bounded hardware cost. The effectiveness of all techniques and algorithms is documented on numerous real-life designs.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code generation; transformations", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}", } @Article{Potkonjak:1999:MAD, author = "Miodrag Potkonjak and Wayne Wolf", title = "A methodology and algorithms for the design of hard real-time multitasking {ASICs}", journal = j-TODAES, volume = "4", number = "4", pages = "430--459", month = oct, year = "1999", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p430-potkonjak/p430-potkonjak.pdf; http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p430-potkonjak/", abstract = "Traditional high-level synthesis concentrates on the implementation of a single task (e.g. filter, linear controller, A/D converter). However, many applications--multifunctional embedded controllers intelligent wireless end-points, and DSP and multimedia servers--are defined as sets of several computational tasks. This paper describes new techniques for the synthesis of ASIC implementations that realize multiple computational processes under hard real-time constraints. Our synthesis methodology establishes connections between two important computer engineering domains: operating systems and behavioral synthesis. Our hierarchical approach starts from an incompletely-specified preliminary solution and uses, interchangeably, operating system and behavioral synthesis techniques to derive increasingly more detailed and accurate design solutions. We have experimented with both optimal and heuristic algorithms to implement this methodology. The optimal algorithm uses several heuristics to speed up the average run time of an exhaustive branch-and-bound search. Force-directed optimization is the core of the heuristic synthesis method. Analysis of the proposed algorithms and the experiments shows that matching the number of bits and type of operational in tasks assigned to the same application-specific processor was the most important factor in obtaining area-efficient designs.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Algorithms implemented in hardware}", } @Article{DosSantos:2000:CMP, author = "Luiz C. V. {Dos Santos} and M. J. M. Heijligers and C. A. J. {Van Eijk} and J. {Van Eijndhoven} and J. A. G. Jess", title = "A code-motion pruning technique for global scheduling", journal = j-TODAES, volume = "5", number = "1", pages = "1--33", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 09:50:12 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p1-dos_santos/p1-dos_santos.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p1-dos_santos/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Fang:2000:MFP, author = "Wen-Jong Fang and Allen C.-H. Wu", title = "Multiway {FPGA} partitioning by fully exploiting design hierarchy", journal = j-TODAES, volume = "5", number = "1", pages = "34--50", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p34-fang/p34-fang.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p34-fang/", abstract = "In this paper, we present a new integrated synthesis and partitioning method for multiple-FPGA applications. Our approach bridges the gap between HDL synthesis and physical partitioning by fully exploiting the design hierarchy. We propose a novel multiple-FPGA synthesis and partitioning method which is performed in three phases: (1) fine-grained synthesis, (2) functional-based clustering, and (3) hierarchical set-covering partitioning. This method first synthesizes a design specification in a fine-grained way so that functional clusters can be preserved based on the structural nature of the design specification. Then, it applies a hierarchical set-covering partitioning method to form the final FPGA partitions. Experimental results on a number of benchmarks and industrial designs demonstrate that IO limits are the bottleneck for CLB utilization when applying a traditional multiple-FPGA synthesis method on flattened netlists. In contrast, by fully exploiting the design structural hierarchy during the multiple-FPGA partitioning, our proposed method produces fewer FPGA partitions with higher CLB and lower IO-pin utilizations.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "fine-grained synthesis; functional clustering; multi-way partitioning; multiple-FPGA synthesis", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Hardware --- Integrated Circuits --- Design Aids (B.7.2)", } @Article{Hsiung:2000:CCM, author = "Pao-Ann Hsiung", title = "{CMAPS}: a cosynthesis methodology for application-oriented parallel systems", journal = j-TODAES, volume = "5", number = "1", pages = "51--81", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p51-hsiung/p51-hsiung.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p51-hsiung/", abstract = "Currently, a lot of research is devoted to {\em system design}, and little work is done on {\em requirements analysis}. Besides going from specification to design, one of our main objectives is to show how an application problem can be transformed into specifications. Working from the hardware-software codesign perspective, a system is designed starting from an application problem itself, rather than the detailed behavioral specifications. Given an application problem specified as a directed acyclic graph of elementary problems, a hardware-software solution is derived such that the synthesized software, a parallel pseudoprogram, can be scheduled and executed on the synthesized software, a parallel pseudoprogram, can be scheduled and executed on the synthesized hardware, a set of system-level parallel computer specifications, with heuristically optimal performance. This is known as system-level cosynthesis of application-oriented general-purpose parallel systems for which a novel methodology called {\em Cosynthesis Methodology for Application-Oriented Parallel Systems\/} (CMAPS), is presented. Since parallel programs and multiprocessor architectures are largely interdependent, CMAPS explores the relationship between hardware designs and software algorithms by interleaving the modeling phases and the synthesis phases of both hardware and software. High scalability in terms of problem complexity and easy upgradability to new technologies are achieved through modularization of the input problem specification, of the software algorithms, and of the hardware subsystem models. The work presented in this paper will be beneficial to designers of general-purpose parallel computer systems which must be oriented toward solving some user-specified problem such as the global controller of an industry automation process or a multiprocessor video server. Some application examples are given to illustrate various codesign phases of CMAPS and its feasibility.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "application-oriented general-purpose multiprocessors; hardware-software modeling and cosynthesis; requirements analysis", subject = "Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Computer Systems Organization --- General (C.0): {\bf System architectures}; Computer Systems Organization --- General (C.0): {\bf Systems specification methodology}; Computer Systems Organization --- Processor Architectures --- Multiple Data Stream Architectures (Multiprocessors) (C.1.2); Computer Systems Organization --- Computer System Implementation --- General (C.5.0); Computer Systems Organization --- Processor Architectures --- Parallel Architectures (C.1.4)", } @Article{Mehta:2000:UFR, author = "Dinesh P. Mehta and Naveed Sherwani", title = "On the use of flexible, rectilinear blocks to obtain minimum-area floorplans in mixed block and cell designs", journal = j-TODAES, volume = "5", number = "1", pages = "82--97", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p82-mehta/p82-mehta.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p82-mehta/", abstract = "This paper presents three minimum-area floorplanning algorithms that use flexible arbitrary rectilinear shapes for the standard cell regions in MBC design. The first algorithm (pure HCST) introduces a grid traversal technique which guarantees a minimum-area floorplan. The second algorithm (Hybrid-BF) uses a combination of HCST and Breadth First (BF) traversals to give a practical solution that approximately places flexible blocks at specified locations called {\em seeds}. The third algorithm (Hybrid-MBF) improves on the shapes of the flexible blocks generated by Hybrid-BF by using a combination of HCST and a Modified Breadth First (MBF) traversal. All three algorithms are polynomial in the number of grid squares. Optimizated implementations of Hybrid-BF and Hybrid-MBF required less than two seconds on a SUN SPARCstation 10.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "floorplanning; mixed block and cell designs; rectilinear polygons", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Routing and layout}; Mathematics of Computing --- Discrete Mathematics --- Graph Theory (G.2.2): {\bf Graph algorithms}", } @Article{Sapatnekar:2000:PDO, author = "Sachin S. Sapatnekar and Weitong Chuang", title = "Power-delay optimizations in gate sizing", journal = j-TODAES, volume = "5", number = "1", pages = "98--114", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p98-sapatnekar/p98-sapatnekar.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p98-sapatnekar/", abstract = "The problem of power-delay tradeoffs in transistor sizing is examined using a nonlinear optimization formulation. Both the dynamic and the short-circuit power are considered, and a new modeling technique is used to calculate the short-circuit power. The notion of transition density is used, with an enhancement that considers the effect of gate delays on the transition density. When the short-circuit power is neglected, the minimum power circuit is identical to the minimum area circuit. However, under our more realistic models, our experimental results on several circuits show that the minimum power circuit is not necessarily the same as the minimum area circuit.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "optimization; power estimation; transistor sizing; VLSI layout", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}", } @Article{Benini:2000:SLPa, author = "Luca Benini and Giovanni de Micheli", title = "System-level power optimization: techniques and tools", journal = j-TODAES, volume = "5", number = "2", pages = "115--192", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p115-benini/p115-benini.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p115-benini/", abstract = "This tutorial surveys design methods for energy-efficient system-level design. We consider electronic systems consisting of a hardware platform and software layers. We consider the three major constituents of hardware that consume energy, namely computation, communication, and storage units, and we review methods of reducing their energy consumption. We also study models for analyzing the energy cost of software, and methods for energy-efficient software design and compilation. This survey is organized around three main phases of a system design: conceptualization and modeling design and implementation, and runtime management. For each phase, we review recent techniques for energy-efficient design of both hardware and software.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Integrated Circuits --- Design Aids (B.7.2); Hardware --- Performance and Reliability --- Performance Analysis and Design Aids (B.8.2); Computer Systems Organization --- Processor Architectures --- General (C.1.0); Software --- Software Engineering --- Design Tools and Techniques (D.2.2)", } @Article{Cong:2000:SGD, author = "Jason Cong and Yean-Yow Hwang", title = "Structural gate decomposition for depth-optimal technology mapping in {LUT-based} {FPGA} designs", journal = j-TODAES, volume = "5", number = "2", pages = "193--225", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p193-cong/p193-cong.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p193-cong/", abstract = "In this paper we study structural gate decomposition in general, simple gate networks for depth-optimal technology mapping using $K$-input Lookup-Tables ($K$-LUTs). We show that (1) structural gate decomposition in any $K$-bounded network results in an optimal mapping depth smaller than or equal to that of the original network, regardless of the decomposition method used; and (2) the problem of structural gate decomposition for depth-optimal technology mapping is NP-hard for $K$-unbounded networks when $ K \geq 3$ and remains NP-hard for $K$-bounded networks when $ K \geq 5$. Based on these results, we propose two new structural gate decomposition algorithms, named {\tt DOGMA} and {\tt DOGMA-m}, which combine the level-driven node-packing technique (used in FlowMap) and the network flow-based labeling technique (used in {\tt Chortle-d}) for depth-optimal technology mapping. Experimental results show that (1) among five structural gate decomposition algorithms, {\tt DOGMA-m} results in the best mapping solutions; and (2) compared with {\tt speed\_up} (an algebraic algorithm) and {\tt TOS} (a Boolean approach), {\tt DOGMA-m} completes, decomposition of all tested benchmarks in a short time while {\tt speed\_up} and {\tt TOS} fail in several cases. However, {\tt speed\_up} results in the smallest depth and area in the following technology mapping steps.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "computer-aided design of VSLI; decomposition; delay minimization; FPGA; logic optimization; programmable logic; simplification; synthesis; system design; technology mapping", subject = "Hardware --- Logic Design --- Design Styles (B.6.1); Hardware --- Logic Design --- Design Aids (B.6.3); Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Automatic synthesis}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1)", } @Article{Hwang:2000:PSS, author = "Chi-Hong Hwang and Allen C.-H. Wu", title = "A predictive system shutdown method for energy saving of event-driven computation", journal = j-TODAES, volume = "5", number = "2", pages = "226--241", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p226-hwang/p226-hwang.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p226-hwang/", abstract = "This paper presents a system-level power management technique for energy savings of event-driven application. We present a new predictive system-shutdown method to exploit sleep mode operations for energy saving. We use an exponential-average approach to predict the upcoming idle period. We introduce two mechanisms, prediction-miss correction and prewake-up, to improve the hit ratio and to reduce the delay overhead. Experiments on four different event-driven applications show that our proposed method achieves high hit ratios in a wide range of delay overheads, which results in a high degree of energy with low delay penalties.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "event-drive; power management; predictive; sleep mode; system shutdown", subject = "Computer Applications --- Computer-Aided Engineering (J.6)", } @Article{Sudarsanam:2000:SRA, author = "Ashok Sudarsanam and Sharad Malik", title = "Simultaneous reference allocation in code generation for dual data memory bank {ASIPs}", journal = j-TODAES, volume = "5", number = "2", pages = "242--264", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p242-sudarsanam/p242-sudarsanam.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p242-sudarsanam/", abstract = "We address the problem of code generation for DSP systems on a chip. In such systems, the amount of silicon devoted of program ROM is limited, so application software must be sufficiently dense. Additionally, the software must be written so as to meet various high-performance constraints, which may include hard real-time constraints. Unfortunately, current compiler technology is unable to generate high-quality code for DSPs, whose architectures are highly irregular. Thus, designers often resort to programming application software in assembly--a time-consuming task. In this paper, we focus on providing support for architectural feature of DSPs that makes code generation difficult, namely multiple data memory banks. This feature increases memory bandwidth by permitting multiple data memory accesses to occur in parallel when the referenced variables belong to different data memory banks and the registers involved conform to a strict set of conditions. We present an algorithm that attempts to maximize the benefit of this architectural feature. While previous approaches have decoupled the phases of register allocation and memory bank assignment, thereby compromising code quality, our algorithm performs these two phases simultaneously. Experimental results demonstrate that our algorithm not only generates high-quality compiled code, but also improves the quality of completely-referenced code.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code generation; code optimization; graph labelling; memory bank assignment; register allocation", subject = "Software --- Programming Languages --- Processors (D.3.4); Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}; Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}", } @Article{Irwin:2000:E, author = "Mary Jane Irwin", title = "Editorial", journal = j-TODAES, volume = "5", number = "3", pages = "265--266", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p265-irwin/p265-irwin.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p265-irwin/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bahar:2000:POT, author = "R. Iris Bahar and Ernest T. Lampe and Enrico Macii", title = "Power optimization of technology-dependent circuits based on symbolic computation of logic implications", journal = j-TODAES, volume = "5", number = "3", pages = "267--293", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p267-bahar/p267-bahar.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p267-bahar/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "aids; automation; design synthesis; logic design", subject = "Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Combinational logic}; Hardware --- Control Structures and Microprogramming --- Microprogram Design Aids (B.1.4): {\bf Optimization}; Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Optimization}; Hardware --- Performance and Reliability --- General (B.8.0); Computer Applications --- Physical Sciences and Engineering (J.2): {\bf Electronics}", } @Article{Balakrishnan:2000:AFS, author = "M. Balakrishnan and Heman Khanna", title = "Allocation of {FIFO} structures in {RTL} data paths", journal = j-TODAES, volume = "5", number = "3", pages = "294--310", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p294-balakrishnan/p294-balakrishnan.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p294-balakrishnan/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "data path; FIFO; ILP; RTL; synthesis", subject = "Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1); Mathematics of Computing --- Probability and Statistics (G.3): {\bf Queueing theory}", } @Article{Benini:2000:SLPb, author = "L. Benini and G. {De Micheli}", title = "Synthesis of low-power selectively-clocked systems from high-level specification", journal = j-TODAES, volume = "5", number = "3", pages = "311--321", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p311-benini/p311-benini.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p311-benini/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "algorithms; design; gated clock; high-level synthesis; low power", subject = "Hardware --- Control Structures and Microprogramming --- Control Structure Performance Analysis and Design Aids (B.1.2); Hardware --- Performance and Reliability --- General (B.8.0); Theory of Computation --- Computation by Abstract Devices --- Models of Computation (F.1.1): {\bf Unbounded-action devices}", } @Article{Blythe:2000:EOD, author = "Stephen A. Blythe and Robert A. Walker", title = "Efficient optimal design space characterization methodologies", journal = j-TODAES, volume = "5", number = "3", pages = "322--336", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p322-blythe/p322-blythe.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p322-blythe/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bounding; clock-length determination; design space exploration; efficient searching; high-level synthesis; module selection; scheduling", subject = "Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Performance and Reliability --- General (B.8.0); Computing Methodologies --- Simulation and Modeling --- General (I.6.0); Computer Applications --- Physical Sciences and Engineering (J.2): {\bf Electronics}", } @Article{Bogliolo:2000:RBR, author = "Alessandro Bogliolo and Luca Benini and Giovanni {De Micheli}", title = "Regression-based {RTL} power modeling", journal = j-TODAES, volume = "5", number = "3", pages = "337--372", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p337-bogliolo/p337-bogliolo.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p337-bogliolo/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "adaptive characterization; functional macros; regression models; RTL design; RTL power modeling", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2); Hardware --- Logic Design --- Design Aids (B.6.3); Hardware --- Performance and Reliability --- General (B.8.0); Computing Milieux --- Management of Computing and Information Systems --- Installation Management (K.6.2): {\bf Benchmarks}", } @Article{Bommu:2000:RBF, author = "Surendra Bommu and Niall O'Neill and Maciej Ciesielski", title = "Retiming-based factorization for sequential logic optimization", journal = j-TODAES, volume = "5", number = "3", pages = "373--398", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p373-bommu/p373-bommu.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p373-bommu/", abstract = "Current sequential optimization techniques apply a variety of logic transformations that mainly target the combinational logic component of the circuit. Retiming is typically applied as a postprocessing step to the gate-level implementation obtained after technology mapping. This paper introduces a new sequential logic transformation which integrates retiming with logic transformations at the technology-independent level. This transformation is based on implicit retiming across logic blocks and fanout stems during logic optimization. Its application to sequential network synthesis results in the optimization of logic across register boundaries. It can be used in conjunction with any measure of circuit quality for which a fast and reliable gain estimation method can be obtained. We implemented our new technique within the SIS framework and demonstrated its effectiveness in terms of cycle-time minimization on a set sequential benchmark circuits.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "finite stat machines; retiming; sequential synthesis", subject = "Hardware --- General (B.0); Hardware --- Logic Design (B.6)", } @Article{Carchiolo:2000:HSS, author = "Vincenza Carchiolo and Michele Malgeri and Giuseppe Mangioni", title = "Hardware\slash software synthesis of formal specifications in codesign of embedded systems", journal = j-TODAES, volume = "5", number = "3", pages = "399--432", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p399-carchiolo/p399-carchiolo.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p399-carchiolo/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "codesign; embedded system; hardware and software synthesis", subject = "Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2); Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}; Computer Systems Organization --- General (C.0); Software --- Software Engineering --- Requirements/Specifications (D.2.1); Theory of Computation --- Mathematical Logic and Formal Languages --- Formal Languages (F.4.3)", } @Article{Chang:2000:TDR, author = "Yao-Wen Chang and Kai Zhu and D. F. Wong", title = "Timing-driven routing for symmetrical array-based {FPGAs}", journal = j-TODAES, volume = "5", number = "3", pages = "433--450", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p433-chang/p433-chang.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p433-chang/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "computer-aided design of VLSI; field-programmable gate array; layout; synthesis", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Routing and layout}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Computer Applications --- Computer-Aided Engineering (J.6)", } @Article{Gelosh:2000:MLT, author = "Donald S. Gelosh and Dorothy E. Setliff", title = "Modeling layout tools to derive forward estimates of area and delay at the {RTL} level", journal = j-TODAES, volume = "5", number = "3", pages = "451--491", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p451-gelosh/p451-gelosh.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p451-gelosh/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "estimation; estimation techniques; layout; machine learning; VLSI CAD", subject = "Hardware --- Input/Output and Data Communications --- Performance Analysis and Design Aids** (B.4.4); Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}; Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}; Computing Methodologies --- Artificial Intelligence --- Learning (I.2.6): {\bf Concept learning}; Computing Methodologies --- Simulation and Modeling --- Simulation Output Analysis (I.6.6)", } @Article{Gogniat:2000:CBE, author = "G. Gogniat and M. Auguin and L. Bianco and A. Pegatoquet", title = "A codesign back-end approach for embedded system design", journal = j-TODAES, volume = "5", number = "3", pages = "492--509", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p492-gogniat/p492-gogniat.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p492-gogniat/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "codesign; communications synthesis; HW&slash; SW integration; template architecture", subject = "Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}; Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Advanced technologies}", } @Article{Gupta:2000:CIP, author = "Avaneendra Gupta and John P. Hayes", title = "{CLIP}: integer-programming-based optimal layout synthesis of {$2$D CMOS} cells", journal = j-TODAES, volume = "5", number = "3", pages = "510--547", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p510-gupta/p510-gupta.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p510-gupta/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "circuit clustering; CMOS networks; diffusion sharing; integer linear programming; integer programming; layout optimization; leaf cell synthesis; module generation; transistor chains; two-dimensional layout", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Memory technologies}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Layout}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}; Mathematics of Computing --- Numerical Analysis --- Optimization (G.1.6): {\bf Integer programming}; Software --- Programming Languages --- Language Classifications (D.3.2): {\bf Specialized application languages}; Computer Applications --- Computer-Aided Engineering (J.6)", } @Article{Hsiao:2000:DST, author = "Michael S. Hsiao and Elizabeth M. Rudnick and Janak H. Patel", title = "Dynamic state traversal for sequential circuit test generation", journal = j-TODAES, volume = "5", number = "3", pages = "548--565", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p548-hsiao/p548-hsiao.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p548-hsiao/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "automatic test pattern generation (ATPG); finite-state-machine traversal; genetic algorithms; sequential circuits; simulation-based; testing", subject = "Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1); Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}; Computer Applications --- Computer-Aided Engineering (J.6); Computing Methodologies --- Artificial Intelligence --- Problem Solving, Control Methods, and Search (I.2.8): {\bf Heuristic methods}", } @Article{Jha:2000:HLL, author = "Pradip K. Jha and Nikil D. Dutt", title = "High-level library mapping for memories", journal = j-TODAES, volume = "5", number = "3", pages = "566--603", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p566-jha/p566-jha.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p566-jha/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "high-level synthesis; memory libraries; technology-mapping", subject = "Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Primary memory}; Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Memory design}; Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Automatic synthesis}", } @Article{Lalgudi:2000:OCE, author = "Kumar N. Lalgudi and Marios C. Papaefthymiou and Miodrag Potkonjak", title = "Optimizing computations for effective block-processing", journal = j-TODAES, volume = "5", number = "3", pages = "604--630", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p604-lalgudi/p604-lalgudi.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p604-lalgudi/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "combinatorial optimization; computation dataflow graphs; embedded systems; high-level synthesis; integer linear programming; retiming; scheduling; vectorization", subject = "Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Signal processing systems}; Computing Methodologies --- Pattern Recognition --- Applications (I.5.4): {\bf Signal processing}; Mathematics of Computing --- Numerical Analysis --- Optimization (G.1.6): {\bf Integer programming}; Mathematics of Computing --- Discrete Mathematics --- General (G.2.0); Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Sequencing and scheduling}; Computing Methodologies --- Artificial Intelligence --- Problem Solving, Control Methods, and Search (I.2.8): {\bf Scheduling}; Computer Applications --- Computer-Aided Engineering (J.6)", } @Article{Long:2000:FFA, author = "David E. Long and Mahesh A. Iyer and Miron Abramovici", title = "{FILL} and {FUNI}: algorithms to identify illegal states and sequentially untestable faults", journal = j-TODAES, volume = "5", number = "3", pages = "631--657", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p631-long/p631-long.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p631-long/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "automatic test generation; illegal states; sequential circuits; untestable faults", subject = "Hardware --- Performance and Reliability --- General (B.8.0); Hardware --- Control Structures and Microprogramming --- General (B.1.0); Hardware --- Arithmetic and Logic Structures --- General (B.2.0); Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Arithmetic and Logic Structures --- High-Speed Arithmetic (B.2.4): {\bf Algorithms}; Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}; Hardware --- Logic Design --- Design Aids (B.6.3); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1)", } @Article{Marculescu:2000:SSM, author = "Diana Marculescu and Radu Marculescu and Massoud Pedram", title = "Stochastic sequential machine synthesis with application to constrained sequence generation", journal = j-TODAES, volume = "5", number = "3", pages = "658--681", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p658-marculescu/p658-marculescu.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p658-marculescu/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "algorithms; design; performance; theory", subject = "Data --- Coding and Information Theory (E.4): {\bf Data compaction and compression}; Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Logic Design --- Design Aids (B.6.3); Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf VLSI (very large scale integration)}; Hardware --- Performance and Reliability --- General (B.8.0); Theory of Computation --- Computation by Abstract Devices --- Models of Computation (F.1.1); Mathematics of Computing --- Probability and Statistics (G.3): {\bf Stochastic processes}", } @Article{Panda:2000:CVC, author = "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru Nicolau", title = "On-chip vs. off-chip memory: the data partitioning problem in embedded processor-based systems", journal = j-TODAES, volume = "5", number = "3", pages = "682--704", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p682-panda/p682-panda.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p682-panda/", abstract = "Efficient utilization of on-chip memory space is extremely important in modern embedded system applications based on processor cores. In addition to a data cache that interfaces with slower off-chip memory, a fast on-chip SRAM, called Scratch-Pad memory, is often used in several applications, so that critical data can be stored there with a guaranteed fast access time. We present a technique for efficiently exploiting on-chip Scratch-Pad memory by partitioning the application's scalar and arrayed variables into off-chip DRAM and on-chip Scratch-Pad SRAM, with the goal of minimizing the total execution time of embedded applications. We also present extensions of our proposed memory assignment strategy to handle context switching between multiple programs, as well as a generalized memory hierarchy. Our experiments on code kernels from typical applications show that our technique results in significant performance improvements.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Measurement; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "data cache; data partitioning; memory synthesis; on-chip memory; scratch-pad memory; system design; system synthesis", subject = "Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Cache memories}; Software --- Programming Languages --- Processors (D.3.4): {\bf Compilers}", } @Article{Raimi:2000:EML, author = "Richard Raimi and Ramin Hojati and Kedar S. Namjoshi", title = "Environment modeling and language universality", journal = j-TODAES, volume = "5", number = "3", pages = "705--725", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p705-raimi/p705-raimi.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p705-raimi/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "abstraction; environment modeling; language universality; model checking", subject = "Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1); Computer Systems Organization --- Performance of Systems (C.4); Computer Applications --- Computer-Aided Engineering (J.6); Theory of Computation --- Computation by Abstract Devices --- Models of Computation (F.1.1): {\bf Automata}; Software --- Software Engineering --- Software/Program Verification (D.2.4): {\bf Model checking}; Theory of Computation --- Computation by Abstract Devices --- Models of Computation (F.1.1): {\bf Unbounded-action devices}", } @Article{Yan:2000:TLB, author = "Jin-Tai Yan", title = "Three-layer bubble-sorting-based {nonManhattan} channel routing", journal = j-TODAES, volume = "5", number = "3", pages = "726--734", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p726-yan/p726-yan.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p726-yan/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bubble-sorting algorithm; channel routing; three-layer nonManhattan routing model", subject = "Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Routing and layout}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Verification}; Hardware --- Performance and Reliability --- General (B.8.0); Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Input/Output and Data Communications --- Input/Output Devices (B.4.2): {\bf Channels and controllers}", } @Article{Yang:2000:ERC, author = "Cheng-Hsing Yang and Sao-Jie Chen and Jan-Ming Ho and Chia-Chun Tsai", title = "Efficient routability check algorithms for segmented channel routing", journal = j-TODAES, volume = "5", number = "3", pages = "735--747", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p735-yang/p735-yang.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p735-yang/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "field programmable gate arrays (FPGAs); routing; segmented channel", subject = "Hardware --- Input/Output and Data Communications --- Input/Output Devices (B.4.2): {\bf Channels and controllers}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Gate arrays}; Computer Applications --- Computer-Aided Engineering (J.6); Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Placement and routing}; Theory of Computation --- Analysis of Algorithms and Problem Complexity --- Nonnumerical Algorithms and Problems (F.2.2): {\bf Routing and layout}", } @Article{Marwedel:2000:GE, author = "Peter Marwedel", title = "Guest {Editorial}", journal = j-TODAES, volume = "5", number = "4", pages = "749--751", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p749-marwedel/p749-marwedel.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p749-marwedel/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Computing Milieux --- Computers and Society --- Organizational Impacts (K.4.3)", } @Article{Aditya:2000:CSM, author = "Shail Aditya and Scott A. Mahlke and B. Ramakrishna Rau", title = "Code size minimization and retargetable assembly for custom {EPIC} and {VLIW} instruction formats", journal = j-TODAES, volume = "5", number = "4", pages = "752--773", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p752-aditya/p752-aditya.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p752-aditya/", abstract = "PICO is a fully automated system for designing the architecture and the microarchitecture of VLIW and EPIC processors. A serious concern with this class of processors, due to their very long instructions, is their code size. One focus of this paper is to describe a series of code size minimization techniques used within PICO, some of which are applied during the automatic design of the instruction format, while others are applied during program assembly. The design of a retargetable assembler to support these techniques also poses certain novel challenges, which constitute the second focus of this paper. Contrary to widely held perceptions, we demonstrate that it is entirely possible to design VLIW and EPIC processors that are capable of issuing large numbers of operational per cycle, but whose code size is only moderately larger than that for a sequential CISC processor.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Measurement", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code size minimization; custom templates; design automation; EPIC; instruction format design; noop compression; retargetable assembly; VLIW", subject = "Computer Systems Organization --- Processor Architectures --- Single Data Stream Architectures (C.1.1): {\bf RISC/CISC, VLIW architectures}; Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}; Software --- Programming Languages --- Processors (D.3.4): {\bf Retargetable compilers}; Hardware --- Control Structures and Microprogramming --- Control Structure Performance Analysis and Design Aids (B.1.2)", } @Article{VanEijk:2000:CAC, author = "Koen {Van Eijk} and Bart Mesman and Carlos A. Alba Pinto and Qin Zhao and Marco Bekooij and Jef {Van Meerbergen} and Jochen Jess", title = "Constraint analysis for code generation: basic techniques and applications in {FACTS}", journal = j-TODAES, volume = "5", number = "4", pages = "774--793", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 09:50:12 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p774-van_eijk/p774-van_eijk.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p774-van_eijk/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Leupers:2000:GBC, author = "Rainer Leupers and Steven Bashford", title = "Graph-based code selection techniques for embedded processors", journal = j-TODAES, volume = "5", number = "4", pages = "794--814", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p794-leupers/p794-leupers.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p794-leupers/", abstract = "Code selection is an important task in code generation for programmable processors, where the goal is to find an efficient mapping of machine-independent intermediate code to processor-specific machine instructions. Traditional approaches to code selection are based on tree parsing which enables fast and optimal code selection for intermediate code given as a set of data-flow trees. While this approach is generally useful in compilers for general-purpose processors, it may lead to poor code quality in the case of embedded processors. The reason is that the special architectural features of embedded processors require performing code selection on data-flow graphs, which are a more general representation of intermediate code. In this paper, we present data-flow graph-based code selection techniques for two architectural families of embedded processors: media processors with support for SIMD instructions and fixed-point DSPs with irregular data paths. Both techniques exploit the fact that, in the area of embedded systems, high code quality is a much more important goal than high compilation speed. We demonstrate that certain architectural features can only be utilized by graph-based code selection, while in other cases this approach leads to a significant increase in code quality as compared to tree-based code selection.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code selection; data-flow graphs; embedded processors; irregular data paths; SIMD instructions", subject = "Software --- Programming Languages --- Processors (D.3.4): {\bf Code generation}", } @Article{Pees:2000:RCS, author = "Stefan Pees and Andreas Hoffmann and Heinrich Meyr", title = "Retargetable compiled simulation of embedded processors using a machine description language", journal = j-TODAES, volume = "5", number = "4", pages = "815--834", month = jan, year = "2000", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p815-pees/p815-pees.pdf; http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p815-pees/", abstract = "Fast processor simulators are needed for the software development of embedded processors, for HW/SW cosimulation systems, and for profiling and design of application-specific processors. Such fast simulators can be generated based on the machine description language LISA. Using this language to model processor architectures enables the generation of compiled simulators on various abstraction levels, assemblers, and compiler back ends. The article discusses the requirements of software development tools on processor models and presents the approach based on the LISA language. Furthermore, the implementation of a retargetable environment consisting of compiled simulator, debugger, and assembler is presented. Measurements for a verified, cycle-based LISA model of the TI TMS320C62$ \times $ DSP show that this approach achieves between 37$ \times $ and 170$ \times $ higher simulation speed compared to a commercial simulator using a standard technique and the same accuracy level.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Languages; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compiled simulation; DSP processors; HW/SW cosimulation; instruction set simulators; machine description languages; processor modeling and simulation; system-on-chip", subject = "Computing Methodologies --- Simulation and Modeling --- Model Development (I.6.5): {\bf Modeling methodologies}; Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}; Hardware --- Control Structures and Microprogramming --- Control Structure Performance Analysis and Design Aids (B.1.2): {\bf Simulation**}", } @Article{Bakshi:2001:PCH, author = "Smita Bakshi and Daniel D. Gajski", title = "Performance-constrained hierarchical pipelining for behaviors, loops, and operations", journal = j-TODAES, volume = "6", number = "1", pages = "1--25", month = apr, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 09:50:12 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p1-bakshi/", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chakrabarty:2001:OTA, author = "Krishnendu Chakrabarty", title = "Optimal test access architectures for system-on-a-chip", journal = j-TODAES, volume = "6", number = "1", pages = "26--49", month = jan, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p26-chakrabarty/p26-chakrabarty.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p26-chakrabarty/", abstract = "Test access is a major problem for core-based system-on-a-chip (SOC) designs. Since embedded cores in an SOC are not directly accessible via chip inputs and outputs, special access mechanisms are required to test them at the system level. An efficient test access architecture should also reduce test cost by minimizing test application time. We address several issues related to the design of optimal test access architectures that minimize testing time., including the assignment of cores to test buses, distribution of test data width between multiple test buses, and analysis of test data width required to satisfy an upper bound on the testing time. Even though the decision versions of all these problems are shown to be NP-complete, they can be solved exactly for practical instances using integer linear programming (ILP). As a case study, the ILP models for two hypothetical but nontrivial systems are solved using a public-domain ILP software package.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Reliability", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1); Hardware --- Integrated Circuits --- Design Aids (B.7.2); Hardware --- Integrated Circuits --- Reliability and Testing** (B.7.3)", } @Article{Chen:2001:ALP, author = "Rita Yu Chen and Mary Jane Irwin and Raminder S. Bajwa", title = "Architecture-level power estimation and design experiments", journal = j-TODAES, volume = "6", number = "1", pages = "50--66", month = jan, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p50-chen/p50-chen.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p50-chen/", abstract = "Architecture-level power estimation has received more attention recently because of its efficiency. This article presents a technique used to do power analysis of processors at the architecture level. It provides cycle-by-cycle power consumption data of the architecture on the basis of the instruction/data flow stream. To characterize the power dissipation of control units, a novel hierarchical method has been developed. Using this technique, a power estimator is implemented for a commercial processor. The accuracy of the estimator is validated by comparing the power values it produces against measurements made by a gate-level power simulator for the same benchmark set. Our estimation approach is shown to provide very efficient and accurate power analysis at the architecture level. The energy models built for first-pass estimation (such as ALU, MAC unit, register files) are reusable for future architecture design modification. In this article, we demonstrate the application of the technique. Furthermore, this technique can evaluate various kinds of software to achieve hardware/software codesign for low power.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "architecture tradeoff; architecture-level power estimation; computer-aided design of VLSI; control unit; energy model; energy table; functional unit; hardware/software codesign; instruction format transition; low power design; output signal transition; power analysis and estimation; switch capacitance", subject = "Computer Applications --- Computer-Aided Engineering (J.6)", } @Article{Hsiung:2001:PPO, author = "Pao-Ann Hsiung", title = "{POSE}: a parallel object-oriented synthesis environment", journal = j-TODAES, volume = "6", number = "1", pages = "67--92", month = jan, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p67-hsiung/p67-hsiung.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p67-hsiung/", abstract = "Design automation tools and methodologies always encounter a problem of how systems may be designed efficiently, including issues such as static modeling and dynamic manipulation of system parts. With the rapid progress of design technology, the continuously increasing number of different choices per system part and the growing complexity of today's systems, the efficiency of the design environment is not only a major concern now, but will also be a demanding problem in the near future. In contrast to heuristic methods, a novel environment called POSE is proposed that increases efficiency during design without losing optimality in the final design results. System parts are modeled using the popular object-oriented modeling technique and are dynamically manipulated using the parallel design technique. A complete integration of object-oriented and parallel techniques is one of the major feature of POSE. Common problems related to parallel design such as {\em emptiness\/} and {\em deadlock\/} are also elegantly solved within POSE. Experimental results and formal analysis based on POSE all show its practical and theoretical usefulness. POSE can be used at any level of synthesis as long as off-the-shelf building-blocks manipulation is required. POSE can be applied especially to {\em system-level\/} synthesis, whose targets can be parallel computer architectures, systems-on-chip, or embedded systems. We will show how POSE has been applied to ICOS, a recently proposed synthesis methodology. Furthermore, POSE can be easily integrated with other heuristic design methodologies to allow increased design efficiency.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design-completion check; hardware synthesis; object-oriented technology; parallel design; synthesis rollback", subject = "Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Hardware --- Miscellaneous (B.m): {\bf Design management}", } @Article{Huang:2001:CSP, author = "Ing-Jer Huang", title = "Co-synthesis of pipelined structures and instruction reordering constraints for instruction set processors", journal = j-TODAES, volume = "6", number = "1", pages = "93--121", month = jan, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p93-huang/p93-huang.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p93-huang/", abstract = "This paper presents a hardware/software co-synthesis approach to pipelined ISP (instruction set processor) design. The approach synthesizes the pipeline structure from a given instruction set architecture (behavioral) specification. In addition, it generates a set of reordering constraints that guides the compiler back-end (reorderer) to properly schedule instructions so that possible pipeline hazards are avoided and throughput is improved. \par Co-synthesis takes place while resolving pipeline hazards, which can be attributed to interim-instruction dependencies (IIDs). An extended taxonomy of IIDs have been proposed for the systematic analysis of pipeline hazards. Hardware/software methods are developed to resolve IIDs. Algorithms based on taxonomy and resolutions are constructed and integrated into the pipeline synthesis process to explore hardware and software design space. Application benchmarks are used to evaluate possible designs and guide the design decision. The power of the co-synthesis tool PIPER is demonstrated through pipeline synthesis of one illustrative example and two ISPs, including an industrial one (TDY-43). In comparison with other related approaches, our approach achieves higher throughput and provides a systematic way to explore the hardware/software trade-off.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compiler instruction optimization\; instruction set processor; pipeline hazards; pipeline taxonomy; synthesis", subject = "Hardware --- Control Structures and Microprogramming --- Control Structure Performance Analysis and Design Aids (B.1.2): {\bf Automatic synthesis**}", } @Article{Mariatos:2001:MAC, author = "E. P. Mariatos and A. N. Birbas and M. K. Birbas", title = "A mapping algorithm for computer-assisted exploration in the design of embedded systems", journal = j-TODAES, volume = "6", number = "1", pages = "122--147", month = jan, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", note = "See note \cite{Chen:2007:NMA}.", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p122-mariatos/p122-mariatos.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p122-mariatos/", abstract = "We present a technique for automatic exploration of architectural alternatives in the design of complex electronic embedded systems and systems-on-a-chip. The technique transforms the problem into a set of simple model-to-model operations and a mapping algorithm that becomes the core of the entire design process. The mapping algorithm is formulated as an assignment-type problem (ATP), which is, in turn, solved by a straightforward optimization method. The result is a design assistance tool, which is demonstrated through a telecommunication systems example.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Design; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "codesign; embedded system design space exploration; specification mapping", subject = "Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3); Software --- Software Engineering --- Design Tools and Techniques (D.2.2): {\bf Computer-aided software engineering (CASE)}", } @Article{Panda:2001:DMO, author = "P. R. Panda and F. Catthoor and N. D. Dutt and K. Danckaert and E. Brockmeyer and C. Kulkarni and A. Vandercappelle and P. G. Kjeldsberg", title = "Data and memory optimization techniques for embedded systems", journal = j-TODAES, volume = "6", number = "2", pages = "149--206", month = jan, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p149-panda/p149-panda.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p149-panda/", abstract = "We present a survey of the state-of-the-art techniques used in performing data and memory-related optimizations in embedded systems. The optimizations are targeted directly or indirectly at the memory subsystem, and impact one or more out of three important cost metrics: area, performance, and power dissipation of the resulting implementation. \par We first examine architecture-independent optimizations in the form of code transformations. We next cover a broad spectrum of optimization techniques that address memory architectures at varying levels of granularity, ranging from register files to on-chip memory, data caches, and dynamic memory (DRAM). We end with memory addressing related issues.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "address generation; allocation; architecture exploration; code transformation; data cache; data optimization; DRAM; high-level synthesis; memory architecture customization; memory power dissipation; register file; size estimation; SRAM; survey", subject = "Hardware --- Memory Structures --- General (B.3.0); Hardware --- Register-Transfer-Level Implementation --- Design (B.5.1): {\bf Memory design}; Hardware --- Register-Transfer-Level Implementation --- Design Aids (B.5.2): {\bf Optimization}; Hardware --- Integrated Circuits --- Types and Design Styles (B.7.1): {\bf Memory technologies}; Software --- Programming Languages --- Processors (D.3.4): {\bf Optimization}", } @Article{Shenoy:2001:ASL, author = "Nagaraj Shenoy and Alok Choudhary and Prithviraj Banerjee", title = "An algorithm for synthesis of large time-constrained heterogeneous adaptive systems", journal = j-TODAES, volume = "6", number = "2", pages = "207--225", month = apr, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p207-shenoy/p207-shenoy.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p207-shenoy/", abstract = "Large time-constrained applications are highly computer-intensive and are often implemented as a complex organization of pipelined data parallel tasks on a pool of embedded processors, DSP processors, and FPGAs. The large number of design alternatives available at each task level, the application as a whole, and the special needs of the reconfigurable devices (such as the FPGA) make the manual synthesis of such systems very tedious. \par The automatic synthesis algorithm in this paper combines exact (MILP-based) and heuristic techniques to solve this problem, which basically involves (1) propagation of timing constraints; (2) pipelining the loops to meet throughput requirements; (3) resource selection and scheduling, keeping the processing requirements and the timing constraints in view; (4) scheduling the resources across the tasks to ensure maximum utilization; and (5) hiding the reconfiguration delays of the FPGAs. \par While the use of MILP techniques helps in getting high-quality results, combining them with heuristics ensures acceptable synthesis times, striking a good balance between quality of results and synthesis time. Our experimental evaluation of the algorithm shows an average 40\% in resource cost reduction (compared to manual synthesis) with synthesis times from minutes to as low as a few seconds in some cases.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Design; Experimentation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "delay/cost table; hierarchical control data-flow graph; list scheduling; mixed integer linear programming; pipelining; reconfigurable computing; time-constrained synthesis", subject = "Computer Applications --- Computer-Aided Engineering (J.6): {\bf Computer-aided design (CAD)}; Computer Systems Organization --- Special-Purpose and Application-Based Systems (C.3): {\bf Real-time and embedded systems}", } @Article{Su:2001:IRA, author = "Chauchin Su and Yue-Tsang Chen and Shyh-Jye Jou", title = "Intrinsic response for analog module testing using an analog testability bus", journal = j-TODAES, volume = "6", number = "2", pages = "226--243", month = apr, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p226-su/p226-su.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p226-su/", abstract = "A parasitic effect removal methodology is proposed to handle the large parasitic effects in analog testability buses. The removal is done by an on-chip test generation technique and an intrinsic response extraction algorithm. On-chip test generation creates test signals on-chip to avoid the parasitic effects of the test application bus. The intrinsic response extraction cross-checks and cancels the parasitic effects of both test application and response observation paths. The tests using both SPICE simulation and MNABST-1 P1149.4 test chip reveal that the proposed algorithm can not only remove the parasitic effects of the test buses but also tolerate test signal variations. Furthermore, it is robust enough to handle loud environmental noise and the nonlinearity of the switching devices.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Experimentation; Theory", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "analog testability bus; analog testing; boundary scan; design for testability; intrinsic response", subject = "Hardware --- Performance and Reliability --- Reliability, Testing, and Fault-Tolerance (B.8.1)", } @Article{Huang:2001:VSE, author = "Shi-Yu Huang and Kwang-Ting Cheng and Kuang-Chien Chen", title = "Verifying sequential equivalence using {ATPG} techniques", journal = j-TODAES, volume = "6", number = "2", pages = "244--275", month = apr, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p244-huang/p244-huang.pdf; http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p244-huang/", abstract = "In this paper we address the problem of verifying the equivalence of two sequential circuits. State-of-the-art sequential optimization techniques such as retiming and sequential redundancy removal can handle designs with up to hundreds or even thousands of flip-flops. However, the BDD-based approaches for verifying sequential equivalence can easily run into memory explosion for such designs. In an attempt to handle larger circuits, we modify test pattern-generation techniques for verification. The suggested approach utilizes the popular efficient backward-justification technique used in most sequential ATPG programs. We present several techniques to enhance the efficiency of this approach by (1) identifying equivalent flip-flop pairs using an induction-based algorithm, and (2) generalizing the idea of exploring the structural similarity between circuits to perform verification in stages. This ATPG-based framework is suitable for verifying circuits either with or without a reset state. In order to extend this approach to verify retimed circuits, we introduce a delay-compensation-based algorithm for preprocessing the circuits. The experimental results of verifying the correctness of circuits after sequential redundancy removal and retiming with up to several hundred flip-flops are presented.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Verification}; Hardware --- Logic Design --- Design Styles (B.6.1): {\bf Sequential circuits}", } @Article{VanPraet:2001:PMC, author = "J. {Van Praet} and D. Lanneer and W. Geurts and G. Goossens", title = "Processor modeling and code selection for retargetable compilation", journal = j-TODAES, volume = "6", number = "3", pages = "277--307", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kagaris:2001:NHC, author = "D. Kagaris and S. Tragoudas", title = "{Von Neumann} hybrid cellular automata for generating deterministic test sequences", journal = j-TODAES, volume = "6", number = "3", pages = "308--321", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liao:2001:CPT, author = "Swanwa Liao and Mario A. Lopez and Dinesh Mehta", title = "Constrained polygon transformations for incremental floorplanning", journal = j-TODAES, volume = "6", number = "3", pages = "322--342", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chu:2001:CFS, author = "Chris Chu and D. F. Wong", title = "Closed form solutions to simultaneous buffer insertion\slash sizing and wire sizing", journal = j-TODAES, volume = "6", number = "3", pages = "343--371", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hu:2001:ELA, author = "Xiaobo Sharon Hu and Danny Z. Chen and Rajeshkumar Sambandam", title = "Efficient list-approximation techniques for floorplan area minimization", journal = j-TODAES, volume = "6", number = "3", pages = "372--400", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nourani:2001:ITI, author = "Mehrdad Nourani and Joan Carletta and Christos Papachristou", title = "Integrated test of interacting controllers and datapaths", journal = j-TODAES, volume = "6", number = "3", pages = "401--422", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Parulkar:2001:IRC, author = "Ishwar Parulkar and Sandeep K. Gupta and Melvin A. Breuer", title = "Introducing redundant computations in {RTL} data paths for reducing {BIST} resources", journal = j-TODAES, volume = "6", number = "3", pages = "423--445", month = jul, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:45 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dasgupta:2001:SRG, author = "Parthasarathi Dasgupta and Susmita Sur-Kolay", title = "Slicible rectangular graphs and their optimal floorplans", journal = j-TODAES, volume = "6", number = "4", pages = "447--470", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hartanto:2001:DSS, author = "Ismed Hartanto and Srikanth Venkataraman and W. Kent Fuchs and Elizabeth M. Rudnick and Janak H. Patel and Sreejit Chakravarty", title = "Diagnostic simulation of stuck-at faults in sequential circuits using compact lists", journal = j-TODAES, volume = "6", number = "4", pages = "471--489", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Narasimhan:2001:FAC, author = "M. Narasimhan and J. Ramanujam", title = "A fast approach to computing exact solutions to the resource-constrained scheduling problem", journal = j-TODAES, volume = "6", number = "4", pages = "490--500", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Karri:2001:IRT, author = "Ramesh Karri and Balakrishnan Iyer", title = "Introspection: a register transfer level technique for cocurrent error detection and diagnosis in data dominated designs", journal = j-TODAES, volume = "6", number = "4", pages = "501--515", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Boyer:2001:ODS, author = "Fran{\c{c}}ois R. Boyer and El Mostapha Aboulhamid and Yvon Savaria and Michel Boyer", title = "Optimal design of synchronous circuits using software pipelining techniques", journal = j-TODAES, volume = "6", number = "4", pages = "516--532", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Voeten:2001:FLT, author = "Jeroen Voeten", title = "On the fundamental limitations of transformational design", journal = j-TODAES, volume = "6", number = "4", pages = "533--552", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shiue:2001:DMD, author = "Wen-Tsong Shiue and Sathishkumar Udayanarayanan and Chaitali Chakrabarti", title = "Data memory design and exploration for low-power embedded systems", journal = j-TODAES, volume = "6", number = "4", pages = "553--568", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ashar:2001:UCD, author = "Pranav Ashar and Aarti Gupta and Sharad Malik", title = "Using complete-$1$-distinguishability for {FSM} equivalence checking", journal = j-TODAES, volume = "6", number = "4", pages = "569--590", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2001:ODC, author = "Tai-Hung Liu and Adnan Aziz and Vigyan Singhal", title = "Optimizing designs containing black boxes", journal = j-TODAES, volume = "6", number = "4", pages = "591--601", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Roop:2001:FST, author = "Partha S. Roop and A. Sowmya and S. Ramesh", title = "Forced simulation: a technique for automating component reuse in embedded systems", journal = j-TODAES, volume = "6", number = "4", pages = "602--628", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Flores:2001:ESM, author = "Paulo F. Flores and Hor{\'a}cio C. Neto and Jo{\~a}o P. Marques-Silva", title = "An exact solution to the minimum size test pattern problem", journal = j-TODAES, volume = "6", number = "4", pages = "629--644", month = oct, year = "2001", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Feb 19 14:35:44 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chowdhary:2002:GTM, author = "Amit Chowdhary and John P. Hayes", title = "General technology mapping for field-programmable gate arrays based on lookup tables", journal = j-TODAES, volume = "7", number = "1", pages = "1--32", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Michael:2002:ATD, author = "M. Michael and S. Tragoudas", title = "{ATPG} tools for delay faults at the functional level", journal = j-TODAES, volume = "7", number = "1", pages = "33--57", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lysecky:2002:PIB, author = "Roman Lysecky and Frank Vahid", title = "Prefetching for improved bus wrapper performance in cores", journal = j-TODAES, volume = "7", number = "1", pages = "58--90", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dutt:2002:CAI, author = "Shantanu Dutt and Wenyong Deng", title = "Cluster-aware iterative improvement techniques for partitioning large {VLSI} circuits", journal = j-TODAES, volume = "7", number = "1", pages = "91--121", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Goodby:2002:MSP, author = "Laurence Goodby and Alex Orailo{\u{g}}lu and Paul M. Chau", title = "Microarchitectural synthesis of performance-constrained, low-power {VLSI} designs", journal = j-TODAES, volume = "7", number = "1", pages = "122--136", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{GuerraeSilva:2002:SMA, author = "Lu{\'\i}s {Guerra e Silva} and Jo{\~a}o Marques-Silva and L. Miguel Silveira and Karem A. Sakallah", title = "Satisfiability models and algorithms for circuit delay computation", journal = j-TODAES, volume = "7", number = "1", pages = "137--158", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:28:44 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Darte:2002:CEL, author = "Alain Darte and Robert Schreiber and B. Ramakrishna Rau and Fr{\'e}d{\'e}ric Vivien", title = "Constructing and exploiting linear schedules with prescribed parallelism", journal = j-TODAES, volume = "7", number = "1", pages = "159--172", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jagannathan:2002:FAC, author = "Ashok Jagannathan and Sung-Woo Hur and John Lillis", title = "A fast algorithm for context-aware buffer insertion", journal = j-TODAES, volume = "7", number = "1", pages = "173--188", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vemuri:2002:ERO, author = "Ranga Vemuri and Srinivas Katkoori and Meenakshi Kaul and Jay Roy", title = "An efficient register optimization algorithm for high-level synthesis from hierarchical behavioral specifications", journal = j-TODAES, volume = "7", number = "1", pages = "189--216", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2002:OTB, author = "Shi-Zheng Eric Lin and Chieh Changfan and Yu-Chin Hsu and Fur-Shing Tsai", title = "Optimal time borrowing analysis and timing budgeting optimization for latch-based designs", journal = j-TODAES, volume = "7", number = "1", pages = "217--230", month = jan, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:03 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dasgupta:2002:MBP, author = "Parthasarathi Dasgupta and Peichen Pan and Subhas C. Nandy and Bhargab B. Bhattacharya", title = "Monotone bipartitioning problem in a planar point set with applications to {VLSI}", journal = j-TODAES, volume = "7", number = "2", pages = "231--248", month = apr, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Corno:2002:IAS, author = "F. Corno and P. Prinetto and M. Rebaudengo and M. Sonza Reorda and G. Squillero", title = "Initializability analysis of synchronous sequential circuits", journal = j-TODAES, volume = "7", number = "2", pages = "249--264", month = apr, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2002:LTL, author = "Ki-Wook Kim and Taewhan Kim and Ting-Ting Hwang and Sung-Mo Kang and C. L. Liu", title = "Logic transformation for low-power synthesis", journal = j-TODAES, volume = "7", number = "2", pages = "265--283", month = apr, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tessier:2002:FPA, author = "Russell Tessier", title = "Fast placement approaches for {FPGAs}", journal = j-TODAES, volume = "7", number = "2", pages = "284--305", month = apr, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2002:TMA, author = "Min Zhao and Sachin S. Sapatnekar", title = "Technology mapping algorithms for domino logic", journal = j-TODAES, volume = "7", number = "2", pages = "306--335", month = apr, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Araujo:2002:GAR, author = "Guido Araujo and Guilherme Ottoni and Marcelo Cintra", title = "Global array reference allocation", journal = j-TODAES, volume = "7", number = "2", pages = "336--357", month = apr, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tsao:2002:UDC, author = "Chung-wen Albert Tsao and Cheng-kok Koh", title = "{UST\slash DME}: a clock tree router for general skew constraints", journal = j-TODAES, volume = "7", number = "3", pages = "359--379", month = jul, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kountouris:2002:ESC, author = "Apostolos A. Kountouris and Christophe Wolinski", title = "Efficient scheduling of conditional behaviors for high-level synthesis", journal = j-TODAES, volume = "7", number = "3", pages = "380--412", month = jul, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vahid:2002:PSP, author = "Frank Vahid", title = "Partitioning sequential programs for {CAD} using a three-step approach", journal = j-TODAES, volume = "7", number = "3", pages = "413--429", month = jul, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lapinskii:2002:CAH, author = "Viktor S. Lapinskii and Margarida F. Jacome and Gustavo A. De Veciana", title = "Cluster assignment for high-performance embedded {VLIW} processors", journal = j-TODAES, volume = "7", number = "3", pages = "430--454", month = jul, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Saxena:2002:ESL, author = "Vikram Saxena and Farid N. Najm and Ibrahim N. Hajj", title = "Estimation of state line statistics in sequential circuits", journal = j-TODAES, volume = "7", number = "3", pages = "455--473", month = jul, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Glebov:2002:FNA, author = "A. Glebov and S. Gavrilov and D. Blaauw and V. Zolotov", title = "False-noise analysis using logic implications", journal = j-TODAES, volume = "7", number = "3", pages = "474--498", month = jul, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:04 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sarrafzadeh:2002:GE, author = "Majid Sarrafzadeh and Rajeev Jayaraman", title = "Guest editorial", journal = j-TODAES, volume = "7", number = "4", pages = "499--500", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vemuri:2002:BBL, author = "Navin Vemuri and Priyank Kalla and Russell Tessier", title = "{BDD}-based logic synthesis for {LUT}-based {FPGAs}", journal = j-TODAES, volume = "7", number = "4", pages = "501--525", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Fan:2002:RDG, author = "Hongbing Fan and Jiping Liu and Yu-Liang Wu and C. K. Wong", title = "Reduction design for generic universal switch blocks", journal = j-TODAES, volume = "7", number = "4", pages = "526--546", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dandalis:2002:RTP, author = "Andreas Dandalis and Viktor K. Prasanna", title = "Run-time performance optimization of an {FPGA}-based deduction engine for {SAT} solvers", journal = j-TODAES, volume = "7", number = "4", pages = "547--562", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2002:BSF, author = "Haibo Wang and Sarma B. K. Vrudhula", title = "Behavioral synthesis of field programmable analog array circuits", journal = j-TODAES, volume = "7", number = "4", pages = "563--604", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kastner:2002:IGH, author = "R. Kastner and A. Kaplan and S. Ogrenci Memik and E. Bozorgzadeh", title = "Instruction generation for hybrid reconfigurable systems", journal = j-TODAES, volume = "7", number = "4", pages = "605--627", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2002:PDP, author = "Guang-Ming Wu and Jai-Ming Lin and Yao-Wen Chang", title = "Performance-driven placement for dynamically reconfigurable {FPGAs}", journal = j-TODAES, volume = "7", number = "4", pages = "628--642", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Singh:2002:ECC, author = "Amit Singh and Ganapathy Parthasarathy and Ma{\l}gorzata Marek-Sadowska", title = "Efficient circuit clustering for area and power reduction in {FPGAs}", journal = j-TODAES, volume = "7", number = "4", pages = "643--663", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dutt:2002:SBB, author = "Shantanu Dutt and Vinay Verma and Hasan Arslan", title = "A search-based bump-and-refit approach to incremental routing for {ECO} applications in {FPGAs}", journal = j-TODAES, volume = "7", number = "4", pages = "664--693", month = oct, year = "2002", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tragoudas:2003:PDF, author = "S. Tragoudas and N. Denny", title = "Path delay fault testing using test points", journal = j-TODAES, volume = "8", number = "1", pages = "1--10", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2003:AFF, author = "Yao-Wen Chang and Kai Zhu and Guang-Ming Wu and D. F. Wong and C. K. Wong", title = "Analysis of {FPGA\slash FPIC} switch modules", journal = j-TODAES, volume = "8", number = "1", pages = "11--37", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jone:2003:DTI, author = "W.-B. Jone and J. S. Wang and Hsueh-I Lu and I. P. Hsu and J.-Y. Chen", title = "Design theory and implementation for low-power segmented bus systems", journal = j-TODAES, volume = "8", number = "1", pages = "38--54", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yao:2003:FRC, author = "Bo Yao and Hongyu Chen and Chung-Kuan Cheng and Ronald Graham", title = "Floorplan representations: {Complexity} and connections", journal = j-TODAES, volume = "8", number = "1", pages = "55--80", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Riepe:2003:TPN, author = "Michael A. Riepe and Karem A. Sakallah", title = "Transistor placement for noncomplementary digital {VLSI} cell synthesis", journal = j-TODAES, volume = "8", number = "1", pages = "81--107", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Blanton:2003:PIP, author = "R. D. (Shawn) Blanton and John P. Hayes", title = "On the properties of the input pattern fault model", journal = j-TODAES, volume = "8", number = "1", pages = "108--124", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{VanAchteren:2003:SSD, author = "Tanja {Van Achteren} and Francky Catthoor and Rudy Lauwereins and Geert Deconinck", title = "Search space definition and exploration for nonuniform data reuse opportunities in data-dominant applications", journal = j-TODAES, volume = "8", number = "1", pages = "125--139", month = jan, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:05 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Edwards:2003:TCC, author = "Stephen A. Edwards", title = "Tutorial: {Compiling} concurrent languages for sequential processors", journal = j-TODAES, volume = "8", number = "2", pages = "141--187", month = apr, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2003:RBP, author = "Guang-Ming Wu and Yun-Chih Chang and Yao-Wen Chang", title = "Rectilinear block placement using {B*}-trees", journal = j-TODAES, volume = "8", number = "2", pages = "188--202", month = apr, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2003:MDO, author = "Ki-Wook Kim and Seong-Ook Jung and Taewhan Kim and Sung-Mo Kang", title = "Minimum delay optimization for domino logic circuits---a coupling-aware approach", journal = j-TODAES, volume = "8", number = "2", pages = "203--213", month = apr, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pinar:2003:CSI, author = "Ali Pinar and C. L. Liu", title = "Compacting sequences with invariant transition frequencies", journal = j-TODAES, volume = "8", number = "2", pages = "214--221", month = apr, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Singhal:2003:SOA, author = "Vigyan Singhal and Carl Pixley and Adnan Aziz and Shaz Qadeer and Robert Brayton", title = "Sequential optimization in the absence of global reset", journal = j-TODAES, volume = "8", number = "2", pages = "222--251", month = apr, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2003:COV, author = "Chingren Lee and Jenq Kuen Lee and Tingting Hwang and Shi-Chun Tsai", title = "Compiler optimization on {VLIW} instruction scheduling for low power", journal = j-TODAES, volume = "8", number = "2", pages = "252--268", month = apr, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lopez-Vallejo:2003:HSP, author = "Marisa L{\'o}pez-Vallejo and Juan Carlos L{\'o}pez", title = "On the hardware-software partitioning problem: {System} modeling and partitioning techniques", journal = j-TODAES, volume = "8", number = "3", pages = "269--297", month = jul, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Obenaus:2003:GFP, author = "Stefan Thomas Obenaus and Ted H. Szymanski", title = "{Gravity}: {Fast} placement for {$3$-D} {VLSI}", journal = j-TODAES, volume = "8", number = "3", pages = "298--315", month = jul, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2003:CRD, author = "X. Yang and M. Wang and R. Kastner and S. Ghiasi and M. Sarrafzadeh", title = "Congestion reduction during placement with provably good approximation bound", journal = j-TODAES, volume = "8", number = "3", pages = "316--333", month = jul, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Constantinides:2003:SSA, author = "G. A. Constantinides and P. Y. K. Cheung and W. Luk", title = "Synthesis of saturation arithmetic architectures", journal = j-TODAES, volume = "8", number = "3", pages = "334--354", month = jul, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kuchcinski:2003:CDS, author = "Krzysztof Kuchcinski", title = "Constraints-driven scheduling and resource assignment", journal = j-TODAES, volume = "8", number = "3", pages = "355--383", month = jul, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2003:ACG, author = "J.-Y. Lee and I.-C. Park", title = "Address code generation for {DSP} instruction-set architectures", journal = j-TODAES, volume = "8", number = "3", pages = "384--395", month = jul, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 7 11:12:06 MDT 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Rawat:2003:I, author = "Shishpal Rawat and Hans-Joachim Wunderlich", title = "Introduction", journal = j-TODAES, volume = "8", number = "4", pages = "397--398", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Goel:2003:STA, author = "Sandeep Kumar Goel and Erik Jan Marinissen", title = "{SOC} test architecture design for efficient utilization of test bandwidth", journal = j-TODAES, volume = "8", number = "4", pages = "399--429", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{El-Maleh:2003:TVD, author = "Aiman H. El-Maleh and Yahya E. Osais", title = "Test vector decomposition-based static compaction algorithms for combinational circuits", journal = j-TODAES, volume = "8", number = "4", pages = "430--459", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Reddy:2003:TDV, author = "Sudhakar M. Reddy and Kohei Miyase and Seiji Kajihara and Irith Pomeranz", title = "On test data volume reduction for multiple scan chain designs", journal = j-TODAES, volume = "8", number = "4", pages = "460--469", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2003:TDC, author = "Lei Li and Krishnendu Chakrabarty and Nur A. Touba", title = "Test data compression using dictionaries with selective entries and fixed-length indices", journal = j-TODAES, volume = "8", number = "4", pages = "470--490", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Singh:2003:MST, author = "Adit D. Singh and Markus Seuring and Michael G{\"o}ssel and Egor S. Sogomonyan", title = "Multimode scan: {Test} per clock {BIST} for {IP} cores", journal = j-TODAES, volume = "8", number = "4", pages = "491--505", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nummer:2003:THP, author = "Muhammad Nummer and Manoj Sachdev", title = "Testing high-performance pipelined circuits with slow-speed testers", journal = j-TODAES, volume = "8", number = "4", pages = "506--521", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Parthasarathy:2003:PTA, author = "Kumar Parthasarathy and Turker Kuyel and Dana Price and Le Jin and Degang Chen and Randall Geiger", title = "{BIST} and production testing of {ADCs} using imprecise stimulus", journal = j-TODAES, volume = "8", number = "4", pages = "522--545", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2003:CLF, author = "Zhuo Li and Xiang Lu and Wangqi Qiu and Weiping Shi and D. M. H. Walker", title = "A circuit level fault model for resistive bridges", journal = j-TODAES, volume = "8", number = "4", pages = "546--559", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Niggemeyer:2003:DAM, author = "Dirk Niggemeyer and Elizabeth M. Rudnick", title = "A data acquisition methodology for on-chip repair of embedded memories", journal = j-TODAES, volume = "8", number = "4", pages = "560--576", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Neuberger:2003:MBU, author = "Gustavo Neuberger and Fernanda de Lima and Luigi Carro and Ricardo Reis", title = "A multiple bit upset tolerant {SRAM} memory", journal = j-TODAES, volume = "8", number = "4", pages = "577--590", month = oct, year = "2003", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Oct 31 06:04:08 MST 2003", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bunker:2004:FHS, author = "Annette Bunker and Ganesh Gopalakrishnan and Sally A. Mckee", title = "Formal hardware specification languages for protocol compliance verification", journal = j-TODAES, volume = "9", number = "1", pages = "1--32", month = jan, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jan 28 17:18:10 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2004:PMA, author = "Hao Li and Srinivas Katkoori and Wai-Kei Mak", title = "Power minimization algorithms for {LUT}-based {FPGA} technology mapping", journal = j-TODAES, volume = "9", number = "1", pages = "33--51", month = jan, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jan 28 17:18:10 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cho:2004:FMB, author = "Jeonghun Cho and Yunheung Paek and David Whalley", title = "Fast memory bank assignment for fixed-point digital signal processors", journal = j-TODAES, volume = "9", number = "1", pages = "52--74", month = jan, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jan 28 17:18:10 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Das:2004:MDR, author = "Sandip Das and Susmita Sur-Kolay and Bhargab B. Bhattacharya", title = "{Manhattan}-diagonal routing in channels and switchboxes", journal = j-TODAES, volume = "9", number = "1", pages = "75--104", month = jan, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jan 28 17:18:10 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2004:BBA, author = "Lieh-Ming Wu and Kuochen Wang and Chuang-Yi Chiu", title = "A {BNF}-based automatic test program generator for compatible microprocessor verification", journal = j-TODAES, volume = "9", number = "1", pages = "105--132", month = jan, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jan 28 17:18:10 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kjeldsberg:2004:SRE, author = "P. G. Kjeldsberg and F. Catthoor and E. J. Aas", title = "Storage requirement estimation for optimized design of data intensive applications", journal = j-TODAES, volume = "9", number = "2", pages = "133--158", month = apr, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sabade:2004:BTM, author = "Sagar S. Sabade and Duncan M. Walker", title = "{I$_{\mbox {DDX}}$}-based test methods: a survey", journal = j-TODAES, volume = "9", number = "2", pages = "159--198", month = apr, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ma:2004:SCU, author = "Yuchun Ma and Xianlong Hong and Sheqin Dong and Yici Cai and Chung-Kuan Cheng and Jun Gu", title = "Stairway compaction using corner block list and its applications with rectilinear blocks", journal = j-TODAES, volume = "9", number = "2", pages = "199--211", month = apr, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Murthy:2004:BMP, author = "Praveen K. Murthy and Shuvra S. Bhattacharyya", title = "Buffer merging---a powerful technique for reducing memory requirements of synchronous dataflow specifications", journal = j-TODAES, volume = "9", number = "2", pages = "212--237", month = apr, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Doboli:2004:TLL, author = "Alex Doboli and Nagu Dhanwada and Adrian Nunez-Aldana and Ranga Vemuri", title = "A two-layer library-based approach to synthesis of analog systems from {VHDL-AMS} specifications", journal = j-TODAES, volume = "9", number = "2", pages = "238--271", month = apr, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sundararajan:2004:NAI, author = "Vijay Sundararajan and Sachin S. Sapatnekar and Keshab K. Parhi", title = "A new approach for integration of min-area retiming and min-delay padding for simultaneously addressing short-path and long-path constraints", journal = j-TODAES, volume = "9", number = "3", pages = "273--289", month = jul, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lepak:2004:SSI, author = "Kevin M. Lepak and Min Xu and Jun Chen and Lei He", title = "Simultaneous shield insertion and net ordering for capacitive and inductive coupling minimization", journal = j-TODAES, volume = "9", number = "3", pages = "290--309", month = jul, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vicente:2004:APT, author = "Juan D. Vicente and Juan Lanchares and Rom{\'a}n Hermida", title = "Annealing placement by thermodynamic combinatorial optimization", journal = j-TODAES, volume = "9", number = "3", pages = "310--332", month = jul, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dandalis:2004:ACE, author = "Andreas Dandalis and Viktor K. Prasanna", title = "An adaptive cryptographic engine for {Internet} protocol security architectures", journal = j-TODAES, volume = "9", number = "3", pages = "333--353", month = jul, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2004:FVE, author = "Jun Yang and Rajiv Gupta and Chuanjun Zhang", title = "Frequent value encoding for low power data buses", journal = j-TODAES, volume = "9", number = "3", pages = "354--384", month = jul, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dasdan:2004:EAF, author = "Ali Dasdan", title = "Experimental analysis of the fastest optimum cycle ratio and mean algorithms", journal = j-TODAES, volume = "9", number = "4", pages = "385--418", month = oct, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ghosh:2004:COE, author = "Arijit Ghosh and Tony Givargis", title = "Cache optimization for embedded processor cores: an analytical approach", journal = j-TODAES, volume = "9", number = "4", pages = "419--440", month = oct, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gupta:2004:CPC, author = "Sumit Gupta and Rajesh Kumar Gupta and Nikil D. Dutt and Alexandru Nicolau", title = "Coordinated parallelizing compiler optimizations and high-level synthesis", journal = j-TODAES, volume = "9", number = "4", pages = "441--470", month = oct, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cota:2004:RCN, author = "{\'E}rika Cota and Luigi Carro and Marcelo Lubaszewski", title = "Reusing an on-chip network for the test of core-based systems", journal = j-TODAES, volume = "9", number = "4", pages = "471--499", month = oct, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Krishna:2004:AHE, author = "C. V. Krishna and Abhijit Jas and Nur A. Touba", title = "Achieving high encoding efficiency with partial dynamic {LFSR} reseeding", journal = j-TODAES, volume = "9", number = "4", pages = "500--516", month = oct, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hung:2004:SCR, author = "William N. N. Hung and Xiaoyu Song and El Mostapha Aboulhamid and Andrew Kennings and Alan Coppola", title = "Segmented channel routability via satisfiability", journal = j-TODAES, volume = "9", number = "4", pages = "517--528", month = oct, year = "2004", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Nov 4 08:12:30 MST 2004", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dutt:2005:E, author = "Nikil Dutt", title = "Editorial", journal = j-TODAES, volume = "10", number = "1", pages = "1--2", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cong:2005:TMA, author = "Jason Cong and Hui Huang and Xin Yuan", title = "Technology mapping and architecture evaluation for $ k / m$-macrocell-based {FPGAs}", journal = j-TODAES, volume = "10", number = "1", pages = "3--23", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ruan:2005:BEL, author = "Shanq-Jang Ruan and Kun-Lin Tsai and Edwin Naroska and Feipei Lai", title = "Bipartitioning and encoding in low-power pipelined circuits", journal = j-TODAES, volume = "10", number = "1", pages = "24--32", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Memik:2005:SAO, author = "Seda Ogrenci Memik and Ryan Kastner and Elaheh Bozorgzadeh and Majid Sarrafzadeh", title = "A scheduling algorithm for optimization and early planning in high-level synthesis", journal = j-TODAES, volume = "10", number = "1", pages = "33--57", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Adya:2005:CTM, author = "S. N. Adya and I. L. Markov", title = "Combinatorial techniques for mixed-size placement", journal = j-TODAES, volume = "10", number = "1", pages = "58--90", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nourani:2005:RHE, author = "Mehrdad Nourani and Mohammad H. Tehranipour", title = "{RL-Huffman} encoding for test compression and power reduction in scan applications", journal = j-TODAES, volume = "10", number = "1", pages = "91--115", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jan:2005:GMR, author = "Gene Eu Jan and Ki-Yin Chang and Su Gao and Ian Parberry", title = "A $4$-geometry maze router and its application on multiterminal nets", journal = j-TODAES, volume = "10", number = "1", pages = "116--135", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Arato:2005:AAH, author = "P{\'e}ter Arat{\'o} and Zolt{\'a}n {\'A}d{\'a}m Mann and Andr{\'a}s Orb{\'a}n", title = "Algorithmic aspects of hardware\slash software partitioning", journal = j-TODAES, volume = "10", number = "1", pages = "136--156", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kagaris:2005:UMP, author = "Dimitri Kagaris", title = "A unified method for phase shifter computation", journal = j-TODAES, volume = "10", number = "1", pages = "157--167", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kao:2005:EAF, author = "Chi-Chou Kao and Yen-Tai Lai", title = "An efficient algorithm for finding the minimal-area {FPGA} technology mapping", journal = j-TODAES, volume = "10", number = "1", pages = "168--186", month = jan, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 14 10:34:36 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chabini:2005:SOR, author = "Noureddine Chabini and El Mostapha Aboulhamid and Isma{\"\i}l Chabini and Yvon Savaria", title = "Scheduling and optimal register placement for synchronous circuits derived using software pipelining techniques", journal = j-TODAES, volume = "10", number = "2", pages = "187--204", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cao:2005:SSL, author = "Aiqun Cao and Naran Sirisantana and Cheng-Kok Koh and Kaushik Roy", title = "Synthesis of skewed logic circuits", journal = j-TODAES, volume = "10", number = "2", pages = "205--228", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kadayif:2005:OIT, author = "I. Kadayif and A. Sivasubramaniam and M. Kandemir and G. Kandiraju and G. Chen", title = "Optimizing instruction {TLB} energy using software and hardware techniques", journal = j-TODAES, volume = "10", number = "2", pages = "229--257", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2005:ETT, author = "Xiao Liu and Michael S. Hsiao and Sreejit Chakravarty and Paul J. Thadikaran", title = "Efficient techniques for transition testing", journal = j-TODAES, volume = "10", number = "2", pages = "258--278", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Poon:2005:DPM, author = "Kara K. W. Poon and Steven J. E. Wilton and Andy Yan", title = "A detailed power model for field-programmable gate arrays", journal = j-TODAES, volume = "10", number = "2", pages = "279--302", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bhattacharya:2005:OWP, author = "Soumendu Bhattacharya and Abhijit Chatterjee", title = "Optimized wafer-probe and assembled package test design for analog circuits", journal = j-TODAES, volume = "10", number = "2", pages = "303--329", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mohanty:2005:EED, author = "Saraju P. Mohanty and N. Ranganathan", title = "Energy-efficient datapath scheduling using multiple voltages and dynamic clocking", journal = j-TODAES, volume = "10", number = "2", pages = "330--353", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Davoodi:2005:VSU, author = "Azadeh Davoodi and Ankur Srivastava", title = "Voltage scheduling under unpredictabilities: a risk management paradigm", journal = j-TODAES, volume = "10", number = "2", pages = "354--368", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2005:EAV, author = "Zhong Wang and Xiaobo Sharon Hu", title = "Energy-aware variable partitioning and instruction scheduling for multibank memory architectures", journal = j-TODAES, volume = "10", number = "2", pages = "369--388", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cong:2005:LSC, author = "Jason Cong and Joseph R. Shinnerl and Min Xie and Tim Kong and Xin Yuan", title = "Large-scale circuit placement", journal = j-TODAES, volume = "10", number = "2", pages = "389--430", month = apr, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 26 10:39:39 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Paul:2005:HLM, author = "Joann M. Paul and Donald E. Thomas and Andrew S. Cassidy", title = "High-level modeling and simulation of single-chip programmable heterogeneous multiprocessors", journal = j-TODAES, volume = "10", number = "3", pages = "431--461", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Roy:2005:FSV, author = "Arnab Roy and S. K. Panda and Rajeev Kumar and P. P. Chakrabarti", title = "A framework for systematic validation and debugging of pipeline simulators", journal = j-TODAES, volume = "10", number = "3", pages = "462--491", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Banerjee:2005:OFT, author = "Ansuman Banerjee and Pallab Dasgupta", title = "The open family of temporal logics: {Annotating} temporal operators with input constraints", journal = j-TODAES, volume = "10", number = "3", pages = "492--522", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Koushanfar:2005:BST, author = "Farinaz Koushanfar and Inki Hong and Miodrag Potkonjak", title = "Behavioral synthesis techniques for intellectual property protection", journal = j-TODAES, volume = "10", number = "3", pages = "523--545", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gupta:2005:RAS, author = "Puneet Gupta and Andrew B. Kahng and Stefanus Mantik", title = "Routing-aware scan chain ordering", journal = j-TODAES, volume = "10", number = "3", pages = "546--560", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xiang:2005:AIP, author = "Hua Xiang and Xiaoping Tang and Martin D. F. Wong", title = "An algorithm for integrated pin assignment and buffer planning", journal = j-TODAES, volume = "10", number = "3", pages = "561--572", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2005:PDD, author = "Jaehwan John Lee and Vincent John {Mooney III}", title = "An $ o(\mbox {min}(m, n)) $ parallel deadlock detection algorithm", journal = j-TODAES, volume = "10", number = "3", pages = "573--586", month = jul, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 22 11:16:52 MDT 2005", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Harris:2005:I, author = "Ian G. Harris", title = "Introduction", journal = j-TODAES, volume = "10", number = "4", pages = "587--588", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Suhaib:2005:XIM, author = "Syed M. Suhaib and Deepak A. Mathaikutty and Sandeep K. Shukla and David Berner", title = "{XFM}: an incremental methodology for developing formal models", journal = j-TODAES, volume = "10", number = "4", pages = "589--609", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Fujita:2005:ECB, author = "Masahiro Fujita", title = "Equivalence checking between behavioral and {RTL} descriptions with virtual controllers and datapaths", journal = j-TODAES, volume = "10", number = "4", pages = "610--626", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Feng:2005:UDP, author = "Tao Feng and Li-C Wang and Kwang-Ting (Tim) Cheng and Chih-Chang (Andy) Lin", title = "Using $2$-domain partitioned {OBDD} data structure in an enhanced symbolic simulator", journal = j-TODAES, volume = "10", number = "4", pages = "627--650", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Higgins:2005:SDA, author = "Jason T. Higgins and Mark D. Aagaard", title = "Simplifying the design and automating the verification of pipelines with structural hazards", journal = j-TODAES, volume = "10", number = "4", pages = "651--672", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shamshiri:2005:ILT, author = "Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabdein Navabi", title = "Instruction-level test methodology for {CPU} core self-testing", journal = j-TODAES, volume = "10", number = "4", pages = "673--689", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Al-Yamani:2005:TCE, author = "Ahmad A. Al-Yamani and Edward J. McCluskey", title = "Test chip experimental results on high-level structural test", journal = j-TODAES, volume = "10", number = "4", pages = "690--701", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ciordas:2005:EBM, author = "Calin Ciordas and Twan Basten and Andrei R{\u{a}}dulescu and Kees Goossens and Jef {Van Meerbergen}", title = "An event-based monitoring service for networks on chip", journal = j-TODAES, volume = "10", number = "4", pages = "702--723", month = oct, year = "2005", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 13 07:41:02 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dutt:2006:E, author = "Nikil Dutt", title = "Editorial", journal = j-TODAES, volume = "11", number = "1", pages = "1--2", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Givargis:2006:ZCI, author = "Tony Givargis", title = "Zero cost indexing for improved processor cache performance", journal = j-TODAES, volume = "11", number = "1", pages = "3--25", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Constantinides:2006:WLO, author = "George A. Constantinides", title = "Word-length optimization for differentiable nonlinear systems", journal = j-TODAES, volume = "11", number = "1", pages = "26--43", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Su:2006:AMS, author = "Qing Su and Jamil Kawa and Charles Chiang and Yehia Massoud", title = "Accurate modeling of substrate resistive coupling for floating substrates", journal = j-TODAES, volume = "11", number = "1", pages = "44--51", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Davoodi:2006:ETG, author = "Azadeh Davoodi and Ankur Srivastava", title = "Effective techniques for the generalized low-power binding problem", journal = j-TODAES, volume = "11", number = "1", pages = "52--69", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Schaumont:2006:ICE, author = "Patrick Schaumont and Doris Ching and Ingrid Verbauwhede", title = "An interactive codesign environment for domain-specific coprocessors", journal = j-TODAES, volume = "11", number = "1", pages = "70--87", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jiang:2006:RCD, author = "Iris Hui-Ru Jiang and Song-Ra Pan and Yao-Wen Chang and Jing-Yang Jou", title = "Reliable crosstalk-driven interconnect optimization", journal = j-TODAES, volume = "11", number = "1", pages = "88--103", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kulkarni:2006:CTA, author = "Dhananjay Kulkarni and Walid A. Najjar and Robert Rinker and Fadi J. Kurdahi", title = "Compile-time area estimation for {LUT}-based {FPGAs}", journal = j-TODAES, volume = "11", number = "1", pages = "104--122", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shrivastava:2006:CFC, author = "Aviral Shrivastava and Partha Biswas and Ashok Halambi and Nikil Dutt and Alex Nicolau", title = "Compilation framework for code size reduction using reduced bit-width {ISAs (rISAs)}", journal = j-TODAES, volume = "11", number = "1", pages = "123--146", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{You:2006:CLP, author = "Yi-Ping You and Chingren Lee and Jenq Kuen Lee", title = "Compilers for leakage power reduction", journal = j-TODAES, volume = "11", number = "1", pages = "147--164", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shao:2006:LST, author = "Zili Shao and Bin Xiao and Chun Xue and Qingfeng Zhuge and Edwin H.-M. Sha", title = "Loop scheduling with timing and switching-activity minimization for {VLIW DSP}", journal = j-TODAES, volume = "11", number = "1", pages = "165--185", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mohanty:2006:IMS, author = "Saraju P. Mohanty and N. Ranganathan and Sunil K. Chappidi", title = "{ILP} models for simultaneous energy and transient power minimization during behavioral synthesis", journal = j-TODAES, volume = "11", number = "1", pages = "186--212", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ozdal:2006:TLB, author = "Muhammet Mustafa Ozdal and Martin D. F. Wong", title = "Two-layer bus routing for high-speed printed circuit boards", journal = j-TODAES, volume = "11", number = "1", pages = "213--227", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kandemir:2006:IEB, author = "M. Kandemir and J. Ramanujam and U. Sezer", title = "Improving the energy behavior of block buffering using compiler optimizations", journal = j-TODAES, volume = "11", number = "1", pages = "228--250", month = jan, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 12 07:15:39 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ayala-Rincon:2006:PTS, author = "M. Ayala-Rinc{\'o}n and C. H. Llanos and R. P. Jacobi and R. W. Hartenstein", title = "Prototyping time- and space-efficient computations of algebraic operations over dynamically reconfigurable systems modeled by rewriting-logic", journal = j-TODAES, volume = "11", number = "2", pages = "251--281", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Absar:2006:RAI, author = "Javed Absar and Francky Catthoor", title = "Reuse analysis of indirectly indexed arrays", journal = j-TODAES, volume = "11", number = "2", pages = "282--305", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dasdan:2006:HIT, author = "Ali Dasdan and Ivan Hom", title = "Handling inverted temperature dependence in static timing analysis", journal = j-TODAES, volume = "11", number = "2", pages = "306--324", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2006:ETO, author = "Zuoyuan Li and Xianlong Hong and Qiang Zhou and Jinian Bian and Hannah H. Yang and Vijay Pitchumani", title = "Efficient thermal-oriented {$3$D} floorplanning and thermal via planning for two-stacked-die integration", journal = j-TODAES, volume = "11", number = "2", pages = "325--345", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Padmanaban:2006:IGM, author = "Saravanan Padmanaban and Spyros Tragoudas", title = "Implicit grading of multiple path delay faults", journal = j-TODAES, volume = "11", number = "2", pages = "346--361", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2006:OSM, author = "Deming Chen and Jason Cong and Junjuan Xu", title = "Optimal simultaneous module and multivoltage assignment for low power", journal = j-TODAES, volume = "11", number = "2", pages = "362--386", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhu:2006:CZD, author = "Haikun Zhu and Chung-Kuan Cheng and Ronald Graham", title = "On the construction of zero-deficiency parallel prefix circuits with minimum depth", journal = j-TODAES, volume = "11", number = "2", pages = "387--409", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kandemir:2006:REC, author = "Mahmut Taylan Kandemir", title = "Reducing energy consumption of multiprocessor {SoC} architectures by exploiting memory bank locality", journal = j-TODAES, volume = "11", number = "2", pages = "410--441", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Su:2006:CTD, author = "Fei Su and Sule Ozev and Krishnendu Chakrabarty", title = "Concurrent testing of digital microfluidics-based biochips", journal = j-TODAES, volume = "11", number = "2", pages = "442--464", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Atienza:2006:SDM, author = "David Atienza and Jose M. Mendias and Stylianos Mamagkakis and Dimitrios Soudris and Francky Catthoor", title = "Systematic dynamic memory management design methodology for reduced memory footprint", journal = j-TODAES, volume = "11", number = "2", pages = "465--489", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2006:LVA, author = "Wei Li and Daniel Blakely and Scott {Van Sooy} and Keven Dunn and David Kidd and Robert Rogenmoser and Dian Zhou", title = "{LVS} verification across multiple power domains for a quad-core microprocessor", journal = j-TODAES, volume = "11", number = "2", pages = "490--500", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cheatham:2006:SFT, author = "Jason A. Cheatham and John M. Emmert and Stan Baumgart", title = "A survey of fault tolerant methodologies for {FPGAs}", journal = j-TODAES, volume = "11", number = "2", pages = "501--533", month = apr, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:18 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pedram:2006:ISI, author = "Massoud Pedram", title = "Introduction to special issue: {Novel} paradigms in system-level design", journal = j-TODAES, volume = "11", number = "3", pages = "535--536", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pinto:2006:SLD, author = "Alessandro Pinto and Alvise Bonivento and Allberto L. Sangiovanni-Vincentelli and Roberto Passerone and Marco Sgroi", title = "System level design paradigms: {Platform-based} design and communication synthesis", journal = j-TODAES, volume = "11", number = "3", pages = "537--563", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Marculescu:2006:CCR, author = "Radu Marculescu and Umit Y. Ogras and Nicholas H. Zamora", title = "Computation and communication refinement for multiprocessor {SoC} design: a system-level perspective", journal = j-TODAES, volume = "11", number = "3", pages = "564--592", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pop:2006:AOD, author = "Paul Pop and Petru Eles and Zebo Peng and Traian Pop", title = "Analysis and optimization of distributed real-time embedded systems", journal = j-TODAES, volume = "11", number = "3", pages = "593--625", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mishra:2006:ADL, author = "Prabhat Mishra and Aviral Shrivastava and Nikil Dutt", title = "Architecture description language {(ADL)-driven} software toolkit generation for architectural exploration of programmable {SOCs}", journal = j-TODAES, volume = "11", number = "3", pages = "626--658", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lysecky:2006:WP, author = "Roman Lysecky and Greg Stitt and Frank Vahid", title = "{Warp Processors}", journal = j-TODAES, volume = "11", number = "3", pages = "659--681", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Su:2006:MPF, author = "Fei Su and Krishnendu Chakrabarty", title = "Module placement for fault-tolerant microfluidics-based biochips", journal = j-TODAES, volume = "11", number = "3", pages = "682--710", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hanchate:2006:GTF, author = "Narender Hanchate and Nagarajan Ranganathan", title = "A game-theoretic framework for multimetric optimization of interconnect delay, power, and crosstalk noise during wire sizing", journal = j-TODAES, volume = "11", number = "3", pages = "711--739", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2006:SPC, author = "Gang Chen and Jason Cong", title = "Simultaneous placement with clustering and duplication", journal = j-TODAES, volume = "11", number = "3", pages = "740--772", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bhanja:2006:SFG, author = "Sanjukta Bhanja and Karthikeyan Lingasubramanian and N. Ranganathan", title = "A stimulus-free graphical probabilistic switching model for sequential circuits using dynamic {Bayesian} networks", journal = j-TODAES, volume = "11", number = "3", pages = "773--796", month = jul, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Aug 23 10:13:19 MDT 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cao:2006:POS, author = "Aiqun Cao and Ruibing Lu and Chen Li and Cheng-Kok Koh", title = "Postlayout optimization for synthesis of {Domino} circuits", journal = j-TODAES, volume = "11", number = "4", pages = "797--821", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nacul:2006:STC, author = "Andr{\'e} C. N{\'a}cul and Tony Givargis", title = "Synthesis of time-constrained multitasking embedded software", journal = j-TODAES, volume = "11", number = "4", pages = "822--847", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kang:2006:STA, author = "Kunhyuk Kang and Bipul C. Paul and Kaushik Roy", title = "Statistical timing analysis using levelized covariance propagation considering systematic and random variations of process parameters", journal = j-TODAES, volume = "11", number = "4", pages = "848--879", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kuo:2006:DID, author = "Wu-An Kuo and Tingting Hwang and Allen C.-H. Wu", title = "Decomposition of instruction decoders for low-power designs", journal = j-TODAES, volume = "11", number = "4", pages = "880--889", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2006:CML, author = "Yi-Yu Liu and Kuo-Hua Wang and Tingting Hwang", title = "Crosstalk minimization in logic synthesis for {PLAs}", journal = j-TODAES, volume = "11", number = "4", pages = "890--915", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Goren:2006:TSG, author = "Sezer G{\"o}ren and F. Joel Ferguson", title = "Test sequence generation for controller verification and test with high coverage", journal = j-TODAES, volume = "11", number = "4", pages = "916--938", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2006:MWR, author = "Zhong-Zhen Wu and Shih-Chieh Chang", title = "Multiple wire reconnections based on implication flow graph", journal = j-TODAES, volume = "11", number = "4", pages = "939--952", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2006:PDT, author = "Chi-Shong Wang and Chingwei Yeh", title = "Performance-driven technology mapping with {MSG} partition and selective gate duplication", journal = j-TODAES, volume = "11", number = "4", pages = "953--973", month = oct, year = "2006", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 15 06:47:05 MST 2006", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gangwar:2007:IIC, author = "Anup Gangwar and M. Balakrishnan and Anshul Kumar", title = "Impact of intercluster communication mechanisms on {ILP} in clustered {VLIW} architectures", journal = j-TODAES, volume = "12", number = "1", pages = "1:1--1:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "VLIW processors have started gaining acceptance in the embedded systems domain. However, monolithic register file VLIW processors with a large number of functional units are not viable. This is because of the need for a large number of ports to support FU requirements, which makes them expensive and extremely slow. A simple solution is to break the register file into a number of smaller register files with a subset of FUs connected to it. These architectures are termed clustered VLIW processors. In this article, we first build a case for clustered VLIW processors with four or more clusters by showing that the achievable ILP in most of the media applications for a 16 ALU and 8 LD/ST VLIW processor is around 20. We then provide a classification of the intercluster interconnection design space, and show that a large part of this design space is currently unexplored. Next, using our performance evaluation methodology, we evaluate a subset of this design space and show that the most commonly used type of interconnection, RF-to-RF, fails to meet achievable performance by a large factor, while certain other types of interconnections can lower this gap considerably. We also establish that this behavior is heavily application dependent, emphasizing the importance of application-specific architecture exploration. We also present results about the statistical behavior of these different architectures by varying the number of clusters in our framework from 4 to 16. These results clearly show the advantages of one specific architecture over others. Finally, based on our results, we propose a new interconnection network, which should lower this performance gap.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "ASIP; clustered VLIW processors; performance evaluation; VLIW", } @Article{Zamora:2007:SLP, author = "Nicholas H. Zamora and Xiaoping Hu and Radu Marculescu", title = "System-level performance\slash power analysis for platform-based design of multimedia applications", journal = j-TODAES, volume = "12", number = "1", pages = "2:1--2:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The objective of this article is to introduce the use of Stochastic Automata Networks (SANs) as an effective formalism for application-architecture modeling in system-level average-case analysis for platform-based design. By platform, we mean a family of heterogeneous architectures that satisfy a set of architectural constraints imposed to allow re-use of hardware and software components. More precisely, we show how SANs can be used early in the design cycle to identify the best performance/power trade-offs among several application-architecture combinations. Having this information available not only helps avoid lengthy simulations for predicting power and performance figures, but also enables efficient mapping of different applications onto a chosen platform. We illustrate the benefits of our methodology by using the ``Picture-in-Picture'' video decoder as a driver application.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "average-case analysis; design space exploration; hardware/software codesign; Markov chains; performance models; platform-based design; stochastic automata networks (SANs)", } @Article{Sham:2007:ARD, author = "Chiu-Wing Sham and Evangeline F. Y. Young", title = "Area reduction by deadspace utilization on interconnect optimized floorplan", journal = j-TODAES, volume = "12", number = "1", pages = "3:1--3:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Interconnect optimization has become the major concern in floorplanning. Many approaches would use simulated annealing (SA) with a cost function composed of a weighted sum of area, wirelength, and interconnect cost. These approaches can reduce the interconnect cost efficiently but the area penalty of the interconnect optimized floorplan is usually quite large. In this article, we propose an approach called deadspace utilization (DSU) to reclaim the unused area of an interconnect optimized floorplan by linear programming. Since modules are not necessarily rectangular in shape in floorplanning, some deadspace can be redistributed to the modules to increase the area occupied by each module. If the area of each module can be expanded by the same ratio, the whole floorplan can be compacted by that ratio to give a smaller floorplan. However, we will limit the compaction ratio to prevent overcongestion. Experiments show that we can apply this deadspace utilization technique to reduce the area and total wirelength of an interconnect optimized floorplan further while the routability can be maintained at the same time.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "area reduction; Floorplanning", } @Article{Li:2007:SBC, author = "Lei Li and Zhanglei Wang and Krishnendu Chakrabarty", title = "Scan-{BIST} based on cluster analysis and the encoding of repeating sequences", journal = j-TODAES, volume = "12", number = "1", pages = "4:1--4:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a built-in self-test (BIST) approach for full-scan designs that extracts the most frequently occurring sequences from deterministic test patterns. The extracted sequences are stored on-chip, and are used during test application. Three sets of test patterns are applied to the circuit under test during a BIST test session; these include pseudorandom patterns, semirandom patterns, and deterministic patterns. The semirandom patterns are generated based on the stored sequences and they are more likely to detect hard-to-detect faults than pseudorandom patterns. The deterministic patterns are encoded using either the stored sequences or the LFSR reseeding technique to reduce test data volume. We use the cluster analysis technique for sequence extraction to reduce the amount of data to be stored. Experimental results for the ISCAS-89 benchmark circuits show that the proposed approach often requires less on-chip storage and test data volume than other recent BIST methods.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Built-in self-test (BIST); clustering test data volume; test compression", } @Article{Cai:2007:WAD, author = "Yuan Cai and Marcus T. Schmitz and Bashir M. Al-Hashimi and Sudhakar M. Reddy", title = "Workload-ahead-driven online energy minimization techniques for battery-powered embedded systems with time-constraints", journal = j-TODAES, volume = "12", number = "1", pages = "5:1--5:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes a new online voltage scaling (VS) technique for battery-powered embedded systems with real-time constraints. The VS technique takes into account the execution times and discharge currents of tasks to further reduce the battery charge consumption when compared to the recently reported slack forwarding technique [Ahmed and Chakrabarti 2004], while maintaining low online complexity of O (1). Furthermore, we investigate the impact of online rescheduling and remapping on the battery charge consumption for tasks with data dependency which has not been explicitly addressed in the literature and propose a novel rescheduling/remapping technique. Finally, we take leakage power into consideration and extend the proposed online techniques to include adaptive body biasing (ABB) which is used to reduce the leakage power. We demonstrate and compare the efficiency of the presented techniques using seven real-life benchmarks and numerous automatically generated examples.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "adaptive body biasing; battery; Dynamic voltage scaling; embedded systems", } @Article{Zhu:2007:HMF, author = "Xinping Zhu and Sharad Malik", title = "A hierarchical modeling framework for on-chip communication architectures of multiprocessing {SoCs}", journal = j-TODAES, volume = "12", number = "1", pages = "6:1--6:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In multiprocessor-based SoCs, optimizing the communication architecture is often as important, if not more important, than optimizing the computation architecture. While there are mature platforms and techniques for the modeling and evaluation of architectures of processing elements, the same is not true for the communication architectures. This article presents an application-driven retargetable prototyping platform that fills this gap. This environment aims to facilitate the design exploration of the communication subsystem through application-level execution-driven simulations and quantitative analysis. Based on an analysis of a wide range of on-chip communication architectures, we describe how a specific hierarchical class library can be used to develop new on-chip communication architectures, or variants of existing ones with relatively little incremental effort. We demonstrate this through three case studies including two commercial on-chip bus systems and an on-chip packet switching network. Here we show that, through careful analysis and construction, it is possible for the modeling environment to support the common features of these architectures as part of the library and permit instantiation of the individual architectures as variants of the library design. Consequently, system-level design choices regarding the communication architecture can be made with high confidence in the early stages of design. In addition to improving design quality, this methodology also results in significantly shortening design-time.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bus; design exploration; multiprocessor system; network-on-chip; object-oriented modeling; on-chip communication architecture; packet-switching network; Retargetable simulation", } @Article{Majumder:2007:HPV, author = "Subhashis Majumder and Susmita Sur-Kolay and Bhargab B. Bhattacharya and Swarup Kumar Das", title = "Hierarchical partitioning of {VLSI} floorplans by staircases", journal = j-TODAES, volume = "12", number = "1", pages = "7:1--7:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article addresses the problem of recursively bipartitioning a given floorplan F using monotone staircases. At each level of the hierarchy, a monotone staircase from one corner of F to its opposite corner is identified, such that (i) the two parts of the bipartition are nearly equal in area (or in the number of blocks), and (ii) the number of nets crossing the staircase is minimal. The problem of area-balanced bipartitioning is shown to be NP-hard, and a maxflow-based heuristic is proposed. Such a hierarchy may be useful to repeater placement in deep-submicron physical design, and also to global routing.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "balanced bipartitioning; Floorplanning; global routing; network flow; NP-completeness", } @Article{Lee:2007:ISS, author = "Jong-Eun Lee and Kiyoung Choi and Nikil D. Dutt", title = "Instruction set synthesis with efficient instruction encoding for configurable processors", journal = j-TODAES, volume = "12", number = "1", pages = "8:1--8:??", month = jan, year = "2007", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:29 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Application-specific instructions can significantly improve the performance, energy-efficiency, and code size of configurable processors. While generating new instructions from application-specific operation patterns has been a common way to improve the instruction set (IS) of a configurable processor, automating the design of ISs for given applications poses new challenges---how to create as well as utilize new instructions in a systematic manner, and how to choose the best set of application-specific instructions considering the various effects the new instructions may have on the data path and the compilation? To address these problems, we present a novel IS synthesis framework that optimizes the IS through an efficient instruction encoding for the given application as well as for the given data path architecture. We first build a library of new instructions created with various encoding alternatives taking into account the data path architecture constraints, and then select the best set of instructions while satisfying the instruction bitwidth constraint. We formulate the problem using integer linear programming and also present an effective heuristic algorithm. Experimental results using our technique generate ISs that show improvements of up to about 40\% over the native IS for several application benchmarks running on typical embedded RISC processors.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Application-specific instruction set processor (ASIP); bitwidth-economical; configurable processor; instruction encoding; ISA customization and specialization", } @Article{Dutt:2007:E, author = "Nikil Dutt", title = "Editorial", journal = j-TODAES, volume = "12", number = "2", pages = "9:1--9:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230801", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2007:DIC, author = "Chao Wang and Zijiang Yang and Franjo Ivan{\v{c}}i{\'c} and Aarti Gupta", title = "Disjunctive image computation for software verification", journal = j-TODAES, volume = "12", number = "2", pages = "10:1--10:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230802", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Existing BDD-based symbolic algorithms designed for hardware designs do not perform well on software programs. We propose novel techniques based on unique characteristics of software programs. Our algorithm divides an image computation step into a disjunctive set of easier ones that can be performed in isolation. We use hypergraph partitioning to minimize the number of live variables in each disjunctive component, and variable scopes to simplify transition relations and reachable state subsets. Our experiments on nontrivial C programs show that BDD-based symbolic algorithms can directly handle software models with a much larger number of state variables than for hardware designs.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "binary decision diagram; formal verification; image computation; Model checking; reachability analysis", } @Article{Mochocki:2007:TOA, author = "Bren Mochocki and Xiaobo Sharon Hu and Gang Quan", title = "Transition-overhead-aware voltage scheduling for fixed-priority real-time systems", journal = j-TODAES, volume = "12", number = "2", pages = "11:1--11:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230803", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Time transition overhead is a critical problem for hard real-time systems that employ dynamic voltage scaling (DVS) for power and energy management. While it is a common practice of much previous work to ignore transition overhead, these algorithms cannot guarantee deadlines and/or are less effective in saving energy when transition overhead is significant and not appropriately dealt with. In this article we introduce two techniques, one offline and one online, to correctly account for transition overhead in preemptive fixed-priority real-time systems. We present several DVS scheduling algorithms that implement these methods that can guarantee task deadlines under arbitrarily large transition time overheads and reduce energy consumption by as much as 40\% when compared to previous methods.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Dynamic voltage scaling; fixed priority; low power; scheduling; transition overhead", } @Article{Chang:2007:PLP, author = "Hongliang Chang and Sachin S. Sapatnekar", title = "Prediction of leakage power under process uncertainties", journal = j-TODAES, volume = "12", number = "2", pages = "12:1--12:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230804", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present a method to analyze the total leakage current of a circuit under process variations, considering interdie and intradie variations as well as the effect of the spatial correlations of intradie variations. The approach considers both the subthreshold and gate tunneling leakage power, as well as their interactions. With process variations, each leakage component is approximated by a lognormal distribution, and the total chip leakage is computed as a sum of the correlated lognormals. Since the lognormals to be summed are large in number and have complicated correlation structures due to both spatial correlations and the correlation among different leakage mechanisms, we propose an efficient method to reduce the number of correlated lognormals for summation to a manageable quantity. We do so by identifying dominant states of leakage currents and taking advantage of the spatial correlation model and input states at the gates. An improved approach utilizing the principal components computed from spatially correlated process parameters is also proposed to further improve runtime efficiency. We show that the proposed methods are effective in predicting the probability distribution of total chip leakage, and that ignoring spatial correlations can underestimate the standard deviation of full-chip leakage power.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Circuit; leakage; process variation; yield", } @Article{Mohanty:2007:MBE, author = "Sumit Mohanty and Viktor K. Prasanna", title = "A model-based extensible framework for efficient application design using {FPGA}", journal = j-TODAES, volume = "12", number = "2", pages = "13:1--13:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230805", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "For an FPGA designer, several choices are available in terms of target FPGA devices, IP-cores, algorithms, synthesis options, runtime reconfiguration, degrees of parallelism, among others, while implementing a design. Evaluation of design alternatives in the early stages of the design cycle is important because the choices made can have a critical impact on the performance of the final design. However, a large number of alternatives not only results in a large number of designs, but also makes it a hard problem to efficiently manage, simulate, and evaluate them. In this article, we present a framework for FPGA-based application design that addresses the aforementioned issues. This framework supports a hierarchical modeling approach that integrates application and device modeling techniques and allows development of a library of models for design reuse. The framework integrates a high-level performance estimator for rapid estimation of the latency, area, and energy of the designs. In addition, a design space exploration tool allows efficient evaluation of candidate designs against the given performance requirements. The framework also supports extension through integration of widely used tools for FPGA-based design while presenting a unified environment for different target FPGAs. We demonstrate our framework through the modeling and performance estimation of a signal processing kernel and the design of end-to-end applications.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design tool; extensible; Modeling; reuse", } @Article{Tang:2007:PDF, author = "Weiyu Tang and Arun Kejariwal and Alexander V. Veidenbaum and Alexandru Nicolau", title = "A predictive decode filter cache for reducing power consumption in embedded processors", journal = j-TODAES, volume = "12", number = "2", pages = "14:1--14:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230806", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With advances in semiconductor technology, power management has increasingly become a very important design constraint in processor design. In embedded processors, instruction fetch and decode consume more than 40\% of processor power. This calls for development of power minimization techniques for the fetch and decode stages of the processor pipeline. For this, filter cache has been proposed as an architectural extension for reducing the power consumption. A filter cache is placed between the CPU and the instruction cache (I-cache) to provide the instruction stream. A filter cache has the advantages of shorter access time and lower power consumption. However, the downside of a filter cache is a possible performance loss in case of cache misses. \par In this article, we present a novel technique---decode filter cache (DFC)---for minimizing power consumption with minimal performance impact. The DFC stores decoded instructions. Thus, a hit in the DFC eliminates instruction fetch and its subsequent decoding. The bypassing of both instruction fetch and decode reduces processor power. We present a runtime approach for predicting whether the next fetch source is present in the DFC. In case a miss is predicted, we reduce the miss penalty by accessing the I-cache directly. We propose to classify instructions as cacheable or noncacheable, depending on the decode width. For efficient use of the cache space, a sectored cache design is used for the DFC so that both cacheable and noncacheable instructions can coexist in the DFC sector. Experimental results show that the DFC reduces processor power by 34\% on an average and our next fetch prediction mechanism reduces miss penalty by more than 91\%.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Cache; embedded processors; power optimization", } @Article{Issenin:2007:DDR, author = "Ilya Issenin and Erik Brockmeyer and Miguel Miranda and Nikil Dutt", title = "{DRDU}: a data reuse analysis technique for efficient scratch-pad memory management", journal = j-TODAES, volume = "12", number = "2", pages = "15:1--15:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230807", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In multimedia and other streaming applications, a significant portion of energy is spent on data transfers. Exploiting data reuse opportunities in the application, we can reduce this energy by making copies of frequently used data in a small local memory and replacing speed- and power-inefficient transfers from main off-chip memory by more efficient local data transfers. In this article we present an automated approach for analyzing these opportunities in a program that allows modification of the program to use custom scratch-pad memory configurations comprising a hierarchical set of buffers for local storage of frequently reused data. Using our approach we are able to both reduce energy consumption of the memory subsystem when using a scratch-pad memory by about a factor of two, on average, and improve memory system performance compared to a cache of the same size.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compiler analysis; data reuse analysis; memory hierarchy; Scratch-pad memory management", } @Article{Hosseinabady:2007:LTA, author = "Mohammad Hosseinabady and Pejman Lotfi-Kamran and Zainalabedin Navabi", title = "Low test application time resource binding for behavioral synthesis", journal = j-TODAES, volume = "12", number = "2", pages = "16:1--16:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230808", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent advances in process technology have led to a rapid increase in the density of integrated circuits (ICs). Increased density and the need to test for new types of defects in nanometer technologies have resulted in a tremendous increase in test application time (TAT). This article presents a test synthesis method to reduce test application time for testing the datapath of a design. The test application time is reduced by applying a test-time-aware resource sharing algorithm on a scheduled control data flow graph (CDFG) of a design.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "CDFG; high-level synthesis; test synthesis; Testability", } @Article{Elshoukry:2007:CPA, author = "Mohammed Elshoukry and Mohammad Tehranipoor and C. P. Ravikumar", title = "A critical-path-aware partial gating approach for test power reduction", journal = j-TODAES, volume = "12", number = "2", pages = "17:1--17:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230809", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power reduction during test application is important from the viewpoint of chip reliability and for obtaining correct test results. One of the ways to reduce scan test power is to block transitions propagating from the outputs of scan cells through combinational logic. In order to accomplish this, some researchers have proposed setting primary inputs to appropriate values or adding extra gates at the outputs of scan cells. In this article, we point out the limitations of such full gating techniques in terms of area overhead and performance degradation. We propose an alternate solution where a partial set of scan cells is gated. A subset of scan cells is selected to give maximum reduction in test power within a given area constraint. An alternate formulation of the problem is to treat maximum permitted test power as a constraint and achieve a test power that is within this limit using the fewest number of gated scan cells, thereby leading to the least impact in area overhead. Our problem formulation also comprehends performance constraints and prevents the inclusion of gating points on critical paths. The area overhead is predictable and closely corresponds to the average power reduction.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Low-power testing; partial gating; scan cell gating; scan testing", } @Article{Pomeranz:2007:FDT, author = "Irith Pomeranz and Sudhakar M. Reddy", title = "Forming {N}-detection test sets without test generation", journal = j-TODAES, volume = "12", number = "2", pages = "18:1--18:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230810", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We describe a procedure for forming $n$-detection test sets for $ n > 1$ without applying a test generation procedure to target faults. The proposed procedure accepts a one-detection test set. It extracts test cubes for target faults from the one-detection test set, and merges the test cubes to obtain new test vectors. By extracting and merging different test cubes in different iterations of this process, an $n$-detection test set is obtained. Merging of test cubes does not require test generation or fault simulation. Fault simulation is required for extracting test cubes for target faults. We demonstrate that the resulting test set is as effective in detecting untargeted faults as an $n$-detection test set generated by a deterministic test generation procedure. We also discuss the application of the proposed procedure starting from a random test set (instead of a one-detection test set).", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "$n$-detection test sets; Bridging faults; stuck-at faults; test generation", } @Article{Fan:2007:ECD, author = "Hongbing Fan and Jiping Liu and Yu-Liang Wu and Chak-Chung Cheung", title = "The exact channel density and compound design for generic universal switch blocks", journal = j-TODAES, volume = "12", number = "2", pages = "19:1--19:??", month = apr, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1230800.1230811", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:08:48 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A switch block of $k$ sides $W$ terminals on each side is said to be universal (a $ (k, W)$-USB) if it is routable for every set of 2-pin nets of channel density at most $W$. The generic optimum universal switch block design problem is to design a $ (k, W)$-USB with the minimum number of switches for every pair of $ (k, W)$. This problem was first proposed and solved for $ k = 4$ in Chang et al. [1996], and then solved for even $W$ or for $ k \leq 6$ in Shuy et al. [2000] and Fan et al. [2002b]. No optimum $ (k, W)$-USB is known for $ k \geq 7$ and odd $ W \geq 3$. But it is already known that when $W$ is a large odd number, a near-optimum $ (k, W)$-USB can be obtained by a disjoint union of $ (W f_2 (k)) / 2$ copies of the optimum $ (k, 2)$-USB and a noncompound $ (k, f_2 (k))$-USB, where the value of $ f_2 (k)$ is unknown for $ k \geq 8$. In this article, we show that $ f_2 (k) = k + 3 - i / 3$, where $ 1 \leq i \leq 6$ and $ i \equiv k \pmod 6$, and present an explicit design for the noncompound $ (k, f_2 (k))$-USB. Combining these two results we obtain the exact designs of $ (k, W)$-USBs for all $ k \geq 7$ and odd $ W \geq 3$. The new $ (k, W)$-USB designs also yield an efficient detailed routing algorithm.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "FPGA architecture; routing algorithm; universal switch block", } @Article{Lim:2007:ISI, author = "Sung Kyu Lim and Massoud Pedram", title = "Introduction to special issue on demonstrable software systems and hardware platforms", journal = j-TODAES, volume = "12", number = "3", pages = "20:1--20:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255457", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hsu:2007:ESC, author = "Chia-Jui Hsu and Ming-Yung Ko and Shuvra S. Bhattacharyya and Suren Ramasubbu and Jos{\'e} Luis Pino", title = "Efficient simulation of critical synchronous dataflow graphs", journal = j-TODAES, volume = "12", number = "3", pages = "21:1--21:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255458", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "System-level modeling, simulation, and synthesis using electronic design automation (EDA) tools are key steps in the design process for communication and signal processing systems, and the synchronous dataflow (SDF) model of computation is widely used in EDA tools for these purposes. Behavioral representations of modern wireless communication systems typically result in critical SDF graphs: These consist of hundreds of components (or more) and involve complex intercomponent connections with highly multirate relationships (i.e., with large variations in average rates of data transfer or component execution across different subsystems). Simulating such systems using conventional SDF scheduling techniques generally leads to unacceptable simulation time and memory requirements on modern workstations and high-end PCs. In this article, we present a novel simulation-oriented scheduler (SOS) that strategically integrates several techniques for graph decomposition and SDF scheduling to provide effective, joint minimization of time and memory requirements for simulating critical SDF graphs. We have implemented SOS in the advanced design system (ADS) from Agilent Technologies. Our results from this implementation demonstrate large improvements in simulating real-world, large-scale, and highly multirate wireless communication systems (e.g., 3GPP, Bluetooth, 802. 16e, CDMA 2000, XM radio, EDGE, and Digital TV).", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Scheduling; simulation; synchronous dataflow", } @Article{Herrera:2007:FHS, author = "Fernando Herrera and Eugenio Villar", title = "A framework for heterogeneous specification and design of electronic embedded systems in {SystemC}", journal = j-TODAES, volume = "12", number = "3", pages = "22:1--22:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255459", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work proposes a methodology which enables heterogeneous specification of complex, electronic systems in SystemC supporting the integration of components under different models of computation (MoCs). This feature is necessary in order to deal with the growing complexity, concurrency, and heterogeneity of electronic embedded systems. The specification methodology is based on the SystemC standard language. Nevertheless, the use of SystemC for heterogeneous system specification is not straightforward. The first problem to be addressed is the efficient and predictable mapping of untimed events required by abstract MoCs over the discrete-event MoC on which the SystemC simulation kernel is based. This mapping is essential in order to understand the simulation results provided by the SystemC model of those MoCs. The specification methodology proposes the set of rules and guidelines required by each specific MoC. Moreover, the methodology supports a smooth integration of several MoCs in the same system specification. A set of facilities is provided covering the deficiencies of the language. These facilities constitute the methodology-specific library called HetSC. The methodology and associated library have been demonstrated to be useful for the specification of complex, heterogeneous embedded systems supporting essential design tasks such as performance analysis and SW generation.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Heterogeneous specification; models of computation; SystemC", } @Article{Lee:2007:CCA, author = "Hyung Gyu Lee and Naehyuck Chang and Umit Y. Ogras and Radu Marculescu", title = "On-chip communication architecture exploration: a quantitative evaluation of point-to-point, bus, and network-on-chip approaches", journal = j-TODAES, volume = "12", number = "3", pages = "23:1--23:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255460", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditionally, design-space exploration for systems-on-chip (SoCs) has focused on the computational aspects of the problem at hand. However, as the number of components on a single chip and their performance continue to increase, a shift from computation-based to communication-based design becomes mandatory. As a result, the communication architecture plays a major role in the area, performance, and energy consumption of the overall system. This article presents a comprehensive evaluation of three on-chip communication architectures targeting multimedia applications. Specifically, we compare and contrast the network-on-chip (NoC) with point-to-point (P2P) and bus-based communication architectures in terms of area, performance, and energy consumption. As the main contribution, we present complete P2P, bus-, and NoC-based implementations of a real multimedia application (i.e. the MPEG-2 encoder), and provide direct measurements using an FPGA prototype and actual video clips, rather than simulation and synthetic workloads. We also support the experimental findings through a theoretical analysis. Both experimental and analysis results show that the NoC architecture scales very well in terms of area, performance, energy, and design effort, while the P2P and bus-based architectures scale poorly on all accounts except for performance and area, respectively.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "FPGA prototype; MPEG-2 encoder; Networks-on-chip; point-to-point; system-on-chip", } @Article{Ha:2007:PHS, author = "Soonhoi Ha and Sungchan Kim and Choonseung Lee and Youngmin Yi and Seongnam Kwon and Young-Pyo Joo", title = "{PeaCE}: a hardware-software codesign environment for multimedia embedded systems", journal = j-TODAES, volume = "12", number = "3", pages = "24:1--24:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255461", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Existent hardware-software (HW-SW) codesign tools mainly focus on HW-SW cosimulation to build a virtual prototyping environment that enables software design and system verification without need of making a hardware prototype. Not only HW-SW cosimulation, but also HW-SW codesign methodology involves system specification, functional simulation, design-space exploration, and hardware-software cosynthesis. The PeaCE codesign environment is the first full-fledged HW-SW codesign environment that provides seamless codesign flow from functional simulation to system synthesis. Targeting for multimedia applications with real-time constraints, PeaCE specifies the system behavior with a heterogeneous composition of three models of computation and utilizes features of the formal models maximally during the whole design process. It is also a reconfigurable framework in the sense that third-party design tools can be integrated to build a customized tool chain. Experiments with industry-strength examples prove the viability of the proposed technique.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design-space exploration; embedded systems; Hardware-software codesign; hardware-software cosimulation; model-based design", } @Article{Atienza:2007:HSE, author = "David Atienza and Pablo G. {Del Valle} and Giacomo Paci and Francesco Poletti and Luca Benini and Giovanni {De Micheli} and Jose M. Mendias and Roman Hermida", title = "{HW-SW} emulation framework for temperature-aware design in {MPSoCs}", journal = j-TODAES, volume = "12", number = "3", pages = "26:1--26:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255463", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "New tendencies envisage multiprocessor systems-on-chips (MPSoCs) as a promising solution for the consumer electronics market. MPSoCs are complex to design, as they must execute multiple applications (games, video) while meeting additional design constraints (energy consumption, time-to-market). Moreover, the rise of temperature in the die for MPSoCs can seriously affect their final performance and reliability. In this article, we present a new hardware-software emulation framework that allows designers a complete exploration of the thermal behavior of final MPSoC designs early in the design flow. The proposed framework uses FPGA emulation as the key element to model hardware components of the considered MPSoC platform at multimegahertz speeds. It automatically extracts detailed system statistics that are used as input to our software thermal library running in a host computer. This library calculates at runtime the temperature of on-chip components, based on the collected statistics from the emulated system and final floorplan of the MPSoC. This enables fast testing of various thermal management techniques. Our results show speedups of three orders of magnitude compared to cycle-accurate MPSoC simulators.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "emulation; FPGA; MPSoC; temperature; Thermal-aware design", } @Article{Wu:2007:EPM, author = "Wei Wu and Lingling Jin and Jun Yang and Pu Liu and Sheldon X.-D. Tan", title = "Efficient power modeling and software thermal sensing for runtime temperature monitoring", journal = j-TODAES, volume = "12", number = "3", pages = "26:1--26:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255462", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The evolution of microprocessors has been hindered by increasing power consumption and heat dissipation on die. An excessive amount of heat creates reliability problems, reduces the lifetime of a processor, and elevates the cost of cooling and packaging considerably. It is therefore imperative to be able to monitor the temperature variations across the die in a timely and accurate manner. \par Most current techniques rely on on-chip thermal sensors to report the temperature of the processor. Unfortunately, significant variation in chip temperature both spatially and temporally exposes the limitation of the sensors. We present a compensating approach to tracking chip temperature through an OS resident software module that generates live power and thermal profiles of the processor. We developed such a software thermal sensor (STS) in a Linux system with a Pentium 4 Northwood core. We employed highly efficient numerical methods in our model to minimize the overhead of temperature calculation. We also developed an efficient algorithm for functional unit power modeling. Our power and thermal models are calibrated and validated against on-chip sensor readings, thermal images of the Northwood heat spreader, and the thermometer measurements on the package. The resulting STS offers detailed power and temperature breakdowns of each functional unit at runtime, enabling more efficient online power and thermal monitoring and management at a higher level, such as the operating system.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Power; thermal", } @Article{Huang:2007:ESC, author = "Po-Kuan Huang and Soheil Ghiasi", title = "Efficient and scalable compiler-directed energy optimization for realtime applications", journal = j-TODAES, volume = "12", number = "3", pages = "27:1--27:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255464", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With continuing shrinkage of technology feature sizes, the share of leakage in total energy consumption of digital systems continues to grow. Coordinated supply voltage and body bias throttling enables the compiler to better optimize the total energy consumption of the system in future technology nodes. We present a compilation technique that targets realtime applications running on embedded processors with combined dynamic voltage scaling (DVS) and adaptive body biasing (ABB) capabilities. Considering the delay and energy penalty of switching between operating modes of the processor, our compiler judiciously inserts mode-switch instructions in selected locations of the code and generates executable binary that is guaranteed to meet the deadline constraint. More importantly, our algorithm runs very fast and comes reasonably close to the theoretical limit of energy optimization using DVS + ABB. At 65nm technology, we improve the energy dissipation of the generated code by an average of 33. 20\% under deadline constraints. While our technique's improvement in energy dissipation over conventional DVS is marginal (6. 91\%) at 130nm, the average improvement continues to grow to 13. 19\%, 22. 97\%, and 33. 21\% for 90nm, 65nm, and 45nm technology nodes, respectively. Compared to a recent ILP-based competitor, we improve the runtime by more than three orders of magnitude, while producing improved results.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "energy-aware compiler; Leakage; technology scaling", } @Article{Shi:2007:CSO, author = "Yiyu Shi and Paul Mesa and Hao Yu and Lei He", title = "Circuit-simulated obstacle-aware {Steiner} routing", journal = j-TODAES, volume = "12", number = "3", pages = "28:1--28:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255465", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article develops circuit-simulated routing algorithms. We model the routing graph by an RC network with terminals as inputs, and show that the faster an output reaches its peak, the higher the possibility for the corresponding Hanan or escape node to become a Steiner point. This enables us to select Steiner points and then apply any minimum spanning tree algorithm to obtain obstacle-free or obstacle-aware Steiner routing. Compared with existing algorithms, our algorithms have significant gain on either wirelength or runtime for obstacle-free routing, and on both wirelength and runtime for obstacle-aware routing.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "OARSMT; Routing; RSMT; simulation", } @Article{Chakrapani:2007:PSC, author = "Lakshmi N. Chakrapani and Pinar Korkmaz and Bilge E. S. Akgul and Krishna V. Palem", title = "Probabilistic system-on-a-chip architectures", journal = j-TODAES, volume = "12", number = "3", pages = "29:1--29:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255466", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Parameter variations, noise susceptibility, and increasing energy dissipation of cmos devices have been recognized as major challenges in circuit and microarchitecture design in the nanometer regime. Among these, parameter variations and noise susceptibility are increasingly causing cmos devices to behave in an ``unreliable'' or ``probabilistic'' manner. To address these challenges, a shift in design paradigm from current-day deterministic designs to ``statistical'' or ``probabilistic'' designs is deemed inevitable. To respond to this need, in this article, we introduce and study an entirely novel family of probabilistic architectures: the probabilistic system-on-a-chip (psoc). psoc architectures are based on cmos devices rendered probabilistic due to noise, referred to as probabilistic CMOS or PCMOS devices. We demonstrate that in addition to harnessing the probabilistic behavior of pcmos devices, psoc architectures yield significant improvements, both in energy consumed as well as performance in the context of probabilistic or randomized applications with broad utility. All of our application and architectural savings are quantified using the product of the energy and performance, denoted (energy $ \times $ performance): The pcmos-based gains are as high as a substantial multiplicative factor of over 560 when compared to a competing energy-efficient cmos-based realization. Our architectural design is application specific and involves navigating design space spanning the algorithm (application), its architecture (psoc), and the probabilistic technology (pcmos).", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Embedded systems; probabilistic computing", } @Article{Hsieh:2007:FDC, author = "Ang-Chih Hsieh and Tzu-Teng Lin and Tsuang-Wei Chang and Tingting Hwang", title = "A functionality-directed clustering technique for low-power {MTCMOS} design---computation of simultaneously discharging current", journal = j-TODAES, volume = "12", number = "3", pages = "30:1--30:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255467", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multithreshold CMOS (MTCMOS) is a circuit style that can effectively reduce leakage power consumption. Sleep transistor sizing is the key issue when a MTCMOS circuit is designed. If the size of sleep transistor is large enough, the circuit performance can surely be maintained but the area and dynamic power consumption of the sleep transistor may increase. On the other hand, if the sleep transistor size is too small, there will be significant performance degradation because of the increased resistance to ground. Previous approaches [Kao et al. 1998; Anis et al. 2002] to designing sleep transistor size are based mainly on mutually-exclusive discharge patterns. However, these approaches considered only the topology of a circuit (i.e., interconnections of nodes in the circuit-graph saving the functionality of node). We observed that any two possible simultaneously switching gates may not discharge at the same time in terms of functionality. Thus, we propose an algorithm to determine how to cluster cells to share sleep transistors, while taking both topology and functionality into consideration. Moreover, one placement refinement algorithm that takes clustering information into account will be presented. At the logic level, the results show that the proposed clustering method can achieve an average of 22\% reduction in terms of the number of unit-size sleep transistors as compared to a method that does not consider functionality. At the physical level, two placement results are discussed. The first is produced by a traditional placement tool plus topology check (functionality check) for insertion of sleep transistors. It shows that the functionality check algorithm produces 9\% less chip area as compared with the topology check algorithm. The second result is produced by a placement refinement algorithm where the initial placement is done in the first placement experiment. It shows that the placement refinement algorithm achieves 5\% more reduction in area at the expense of 4\% increase in wire length. Totally, around 14\% reduction is achieved by utilizing the clustering information.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "DSTN; low power; MTCMOS; sleep transistor", } @Article{Dastidar:2007:VST, author = "Tathagato Rai Dastidar and P. P. Chakrabarti", title = "A verification system for transient response of analog circuits", journal = j-TODAES, volume = "12", number = "3", pages = "31:1--31:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255468", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a method for application of formal techniques like model checking and equivalence checking for validation of the transient response of nonlinear analog circuits. We propose a temporal logic called Ana CTL (computational tree logic for analog circuit verification) which is suitable for specifying properties specific to analog circuits. The application of Ana CTL for validation of transient behavior of arbitrarily nonlinear analog circuits is presented. The transient response of a circuit under all possible input waveforms is represented as a finite state machine (FSM), by bounding and discretizing the continuous state space of an analog circuit. We have developed algorithms to run Ana CTL queries on this discretized model using search-based methods which reduce the runtime considerably by avoiding creation of the whole FSM. The application of these methods on several real-life analog circuits is presented and we show that this system is a useful aid for detecting and debugging early design errors. \par We also present methods for checking the equivalence of transient response of two analog circuits. The behavior of two different analog circuits can rarely be exactly similar. Hence, we introduce a notion of approximate equivalence. A query language for checking different notions of user-definable approximate equivalence is presented which extends the syntax of the Ana CTL model checking language. In its extended form, Ana CTL can be used combining model checking with equivalence checking.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Ana CTL; Analog circuits; equivalence checking; model checking; query language; transient response", } @Article{Chang:2007:PRE, author = "Kai-Hui Chang and Igor L. Markov and Valeria Bertacco", title = "Postplacement rewiring by exhaustive search for functional symmetries", journal = j-TODAES, volume = "12", number = "3", pages = "32:1--32:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255469", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose two new algorithms for rewiring: a postplacement optimization that reconnects pins of a given netlist without changing the logic function and gate locations. In the first algorithm, we extract small subcircuits consisting of several gates from the design and reconnect pins according to the symmetries of the subcircuits. To enhance the power of symmetry detection, we also propose a graph-based symmetry detector that can identify permutational and phase-shift symmetries on multiple input and output wires, as well as hybrid symmetries, creating abundant opportunities for rewiring. Our second algorithm, called long-range rewiring, is based on reconnecting equivalent pins and can augment the first approach for further optimization. We apply our techniques for wirelength optimization and observe that they provide wirelength reduction comparable to that achieved by detailed placement.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "placement; rewiring; VLSI", } @Article{Mathaikutty:2007:EMD, author = "Deepak Mathaikutty and Hiren Patel and Sandeep Shukla and Axel Jantsch", title = "{EWD}: a metamodeling driven customizable multi-{MoC} system modeling framework", journal = j-TODAES, volume = "12", number = "3", pages = "33:1--33:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255470", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present the EWD design environment and methodology, a modeling and simulation framework suited for complex and heterogeneous embedded systems with varying degrees of expressibility and modeling fidelity. This environment promotes the use of multiple models of computation (MoCs) to support heterogeneity and metamodeling for conformance tests of syntactic and static semantics during the process of modeling. Therefore, EWD is a multiple MoC modeling and simulation framework that ensures conformance of the MoC formalisms during model construction using a metamodeling approach. In addition, EWD provides a suite of translation tools that generate executable models for two simulation frameworks to demonstrate its language-independent modeling framework. The EWD methodology uses the Generic Modeling Environment for customization of the MoC-specific modeling syntax into a visual representation. To embed the execution semantics of the MoCs into the models, we have built parsing and translation tools that leverage an XML-based interoperability language. This interoperability language is then translated into executable Standard ML or Haskell models that can also be analyzed by existing simulation frameworks such as SML-Sys or ForSyDe. In summary, EWD is a metamodeling driven multitarget design environment with multi-MoC modeling capability.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "denotational semantics; ForSyDe; functional language; heterogeneous system design; interoperable modeling language; metamodel; Metamodeling; MoC; Ptolemy II; SystemC", } @Article{Stitt:2007:BS, author = "Greg Stitt and Frank Vahid", title = "Binary synthesis", journal = j-TODAES, volume = "12", number = "3", pages = "34:1--34:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255471", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent high-level synthesis approaches and C-based hardware description languages attempt to improve the hardware design process by allowing developers to capture desired hardware functionality in a well-known high-level source language. However, these approaches have yet to achieve wide commercial success due in part to the difficulty of incorporating such approaches into software tool flows. The requirement of using a specific language, compiler, or development environment may cause many software developers to resist such approaches due to the difficulty and possible instability of changing well-established robust tool flows. Thus, in the past several years, synthesis from binaries has been introduced, both in research and in commercial tools, as a means of better integrating with tool flows by supporting all high-level languages and software compilers. Binary synthesis can be more easily integrated into a software development tool-flow by only requiring an additional backend tool, and it even enables completely transparent dynamic translation of executing binaries to configurable hardware circuits. In this article, we survey the key technologies underlying the important emerging field of binary synthesis. We compare binary synthesis to several related areas of research, and we then describe the key technologies required for effective binary synthesis: decompilation techniques necessary for binary synthesis to achieve results competitive with source-level synthesis, hardware/software partitioning methods necessary to find critical binary regions suitable for synthesis, synthesis methods for converting regions to custom circuits, and binary update methods that enable replacement of critical binary regions by circuits.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Binary synthesis; configurable logic; FPGA; hardware/software codesign; hardware/software partitioning; synthesis from software binaries; warp processors", } @Article{Galanis:2007:SES, author = "Michalis D. Galanis and Gregory Dimitroulakos and Spyros Tragoudas and Costas E. Goutis", title = "Speedups in embedded systems with a high-performance coprocessor datapath", journal = j-TODAES, volume = "12", number = "3", pages = "35:1--35:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255472", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents the speedups achieved in a generic single-chip microprocessor system by employing a high-performance datapath. The datapath acts as a coprocessor that accelerates computational-intensive kernel sections thereby increasing the overall performance. We have previously introduced the datapath which is composed of Flexible Computational Components (FCCs). These components can realize any two-level template of primitive operations. The automated coprocessor synthesis method from high-level software description and its integration to a design flow for executing applications on the system is presented. For evaluating the effectiveness of our coprocessor approach, analytical study in respect to the type of the custom datapath and to the microprocessor architecture is performed. The overall application speedups of several real-life applications relative to the software execution on the microprocessor are estimated using the design flow. These speedups range from 1. 75 to 5. 84, with an average value of 3. 04, while the overhead in circuit area is small. The design flow achieved the acceleration of the applications near to theoretical speedup bounds. A comparison with another high-performance datapath showed that the proposed coprocessor achieves smaller area-time products by an average of 23\% for the generated datapaths. Additionally, the FCC coprocessor achieves better performance in accelerating kernels relative to software-programmable DSP cores.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "chaining; coprocessor datapath; design flow; kernels; Performance improvements; synthesis", } @Article{Roy:2007:EPA, author = "Suchismita Roy and P. P. Chakrabarti and Pallab Dasgupta", title = "Event propagation for accurate circuit delay calculation using {SAT}", journal = j-TODAES, volume = "12", number = "3", pages = "36:1--36:??", month = aug, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1255456.1255473", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:12 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A SAT-based modeling for event propagation in gate-level digital circuits, which is used for accurate calculation of critical delay in combinational and sequential circuits, is presented in this article. The accuracy of the critical delay estimation process depends on the accuracy with which the circuit in operation is modeled. A high level of precision in the modeling of the internal events in a circuit for the sake of greater accuracy causes a combinatorial blowup in the size of the problem, resulting in a scalability bottleneck for which most existing techniques effect a trade-off by restricting themselves to less precise models. SAT based techniques have a good track record in efficiency and scalability when the problem sizes become too large for most other methods. This article proposes a SAT-based technique for symbolic event propagation within a circuit which facilitates the estimation of the critical delay of circuits with a greater degree of accuracy, while at the same time scaling efficiently to large circuits. We report very encouraging results on the ISCAS85 and ISCAS89 benchmark circuits using the proposed technique.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Critical delay; event propagation; SAT", } @Article{Yuh:2007:TFU, author = "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang", title = "Temporal floorplanning using the three-dimensional transitive closure {subGraph}", journal = j-TODAES, volume = "12", number = "4", pages = "37:1--37:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278350", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Improving logic capacity by time-sharing, dynamically reconfigurable Field Gate Programmable Arrays (FPGAs) are employed to handle designs of high complexity and functionality. In this paper, we use a novel graph-based topological floorplan representation, named 3D-subTCG (3-Dimensional Transitive Closure subGraph), to deal with the 3-dimensional (temporal) floorplanning/placement problem, arising from dynamically reconfigurable FPGAs. The 3D-subTCG uses three transitive closure graphs to model the temporal and spatial relations between modules. We derive the feasibility conditions for the precedence constraints induced by the execution of the dynamically reconfigurable FPGAs. Because the geometric relationship is transparent to the 3D-subTCG and its induced operations (i.e., we can directly detect the relationship between any two tasks from the representation), we can easily detect any violation of the temporal precedence constraints on 3D-subTCG. We also derive important properties of the 3D-subTCG to reduce the solution space and shorten the running time for 3D (temporal) foorplanning/placement. Experimental results show that our 3D-subTCG-based algorithm is very effective and efficient.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "partially dynamical reconfiguration; Reconfigurable computing; temporal floorplanning", } @Article{Liu:2007:IEM, author = "Jinfeng Liu and Pai H. Chou", title = "Idle energy minimization by mode sequence optimization", journal = j-TODAES, volume = "12", number = "4", pages = "38:1--38:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278351", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents techniques for reducing idle energy by mode-sequence optimization (MSO) under timing constraints. Our component-level CoMSO algorithm computes energy-optimal mode-transition sequences for different lengths of idle intervals. Our system-level SyMSO algorithm shifts tasks within slack intervals while satisfying all timing and resource constraints in the given schedule. Experimental results on a commercial software-defined radio show that these new techniques can reduce idle energy by 50--70\%, or 30--50\% of total system energy over previous offline-optimal but unsequenced techniques based on localized break-even-time analysis, thanks to rich options offered by mode sequencing.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "communication speed selection; communication/computation trade-offs; embedded multi-processor; Functional partitioning; low-power design", } @Article{Gorjiara:2007:UFE, author = "Bita Gorjiara and Nader Bagherzadeh and Pai H. Chou", title = "Ultra-fast and efficient algorithm for energy optimization by gradient-based stochastic voltage and task scheduling", journal = j-TODAES, volume = "12", number = "4", pages = "39:1--39:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278352", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This paper presents a new technique, called Adaptive Stochastic Gradient Voltage-and-Task Scheduling (ASG-VTS), for power optimization of multicore hard realtime systems. ASG-VTS combines stochastic and energy-gradient techniques to simultaneously solve the slack distribution and task reordering problem. It produces very efficient results with few mode transitions. Our experiments show that ASG-VTS reduces number of mode transitions by 4. 8 times compared to traditional energy-gradient-based approaches. Also, our heuristic algorithm can quickly find a solution that is as good as the optimal for a real-life GSM encoder/decoder benchmark. The runtime of ASG-VTS is 150 times and 1034 times faster than energy-gradient based and optimal ILP algorithms, respectively. Since the runtime of ASG-VTS is very low, it is ideal for design space exploration in system-level design tools. We have also developed a web-based interface for ASG-VTS algorithm.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Power management; slack distribution; voltage and task scheduling", } @Article{Vanbroekhoven:2007:PDS, author = "Peter Vanbroekhoven and Gerda Janssens and Maurice Bruynooghe and Francky Catthoor", title = "A practical dynamic single assignment transformation", journal = j-TODAES, volume = "12", number = "4", pages = "40:1--40:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278353", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This paper presents a novel method to construct a dynamic single assignment (DSA) form of array intensive, pointer free C programs. A program in DSA form does not perform any destructive update of scalars and array elements; that is, each element is written at most once. As DSA makes the dependencies between variable references explicit, it facilitates complex analyses and optimizations of programs. Existing transformations into DSA perform a complex data flow analysis with exponential analysis time, and they work only for a limited class of input programs. Our method removes irregularities from the data flow by adding copy assignments to the program, so that it can use simple data flow analyses. The presented DSA transformation scales very well with growing program sizes and overcomes a number of important limitations of existing methods. We have implemented the method and it is being used in the context of memory optimization and verification of those optimizations. Experiments show that in practice, the method scales well indeed, and that added copy operations can be removed in case they are unwanted.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "arrays; Data flow analysis; parallelization; reaching definitions; single assignment", } @Article{Kobayashi:2007:MOS, author = "Yuki Kobayashi and Murali Jayapala and Praveen Raghavan and Francky Catthoor and Masaharu Imai", title = "Methodology for operation shuffling and {L0} cluster generation for low energy heterogeneous {VLIW} processors", journal = j-TODAES, volume = "12", number = "4", pages = "41:1--41:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278354", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Clustering L0 buffers is effective for energy reduction in the instruction memory hierarchy of embedded VLIW processors. However, the efficiency of the clustering depends on the schedule of the target application. Especially in heterogeneous or data clustered VLIW processors, determining energy efficient scheduling is more constraining. \par This article proposes a realistic technique supported by a tool flow to explore operation shuffling for improving generation of L0 clusters. The tool flow explores assignment of operations for each cycle and generates various schedules. This approach makes it possible to reduce energy consumption for various processor architectures. However, the computational complexity is large because of the huge exploration space. Therefore, some heuristics are also developed, which reduce the size of the exploration space while the solution quality remains reasonable. Furthermore, we also propose a technique to support VLIW processors with multiple data clusters, which is essential to apply the methodology to real world processors. \par The experimental results indicate potential gains of up to 27. 6\% in energy in L0 buffers, through operation shuffling for heterogeneous processor architectures as well as a homogeneous architecture. Furthermore, the proposed heuristics drastically reduce the exploration search space by about 90\%, while the results are comparable to full search, with average differences of less than 1\%. The experimental results indicate that energy efficiency can be improved in most of the media benchmarks by the proposed methodology, where the average gain is around 10\% in comparison with generating clusters without operation shuffling.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Compilers for low energy; loop buffers; VLIW processors", } @Article{Maslov:2007:TSR, author = "D. Maslov and G. W. Dueck and D. M. Miller", title = "Techniques for the synthesis of reversible {Toffoli} networks", journal = j-TODAES, volume = "12", number = "4", pages = "42:1--42:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278355", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present certain new techniques for the synthesis of reversible networks of Toffoli gates, as well as improvements to previous methods. Gate count and technology oriented cost metrics are used. Two new synthesis procedures employing Reed--Muller spectra are introduced and shown to complement earlier synthesis approaches. The previously proposed template simplification method is enhanced through the introduction of a faster and more efficient template application algorithm, an updated classification of the templates, and the addition of new templates of sizes 7 and 9. A resynthesis approach is introduced wherein a sequence of gates is chosen from a network, and the reversible specification it realizes is resynthesized as an independent problem in hopes of reducing the network cost. Empirical results are presented to show that the methods are efficient in terms of the realization of reversible benchmark specifications.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "circuit optimization; quantum computing; reversible logic synthesis", } @Article{Bouchebaba:2007:MMO, author = "Youcef Bouchebaba and Bruno Girodias and Gabriela Nicolescu and El Mostapha Aboulhamid and Bruno Lavigueur and Pierre Paulin", title = "{MPSoC} memory optimization using program transformation", journal = j-TODAES, volume = "12", number = "4", pages = "43:1--43:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278356", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multiprocessor system-on-a-chip (MPSoC) architectures have received a lot of attention in the past years, but few advances in compilation techniques target these architectures. This is particularly true for the exploitation of data locality. Most of the compilation techniques for parallel architectures discussed in the literature are based on a single loop nest. This article presents new techniques that consist in applying loop fusion and tiling to several loop nests and to parallelize the resulting code across different processors. These two techniques reduce the number of memory accesses. However, they increase dependencies and thereby reduce the exploitable parallelism in the code. This article tries to address this contradiction. To optimize the memory space used by temporary arrays, smaller buffers are used as a replacement. Different strategies are studied to optimize the processing time spent accessing these buffers. The experiments show that these techniques yield a significant reduction in the number of data cache misses (30\%) and in processing time (50\%).", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compiler transformations; data cache; Data locality; embedded systems", } @Article{Das:2007:FVT, author = "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar", title = "Functional verification of task partitioning for multiprocessor embedded systems", journal = j-TODAES, volume = "12", number = "4", pages = "44:1--44:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278357", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the advent of multiprocessor embedded platforms, application partitioning and mapping have gained primacy as a design step. The output of this design step is a multithreaded partitioned application where each thread is mapped to a processing element (processor or ASIC) in the multiprocessor platform. This partitioned application must be verified to be consistent with the native unpartitioned application. This verification task is called application (or task) partitioning verification. \par This work proposes a code-block-level containment-checking-based methodology for application partitioning verification. We use a UML-based code-block-level modeling language which is rich enough to model most designs. We formulate the application partitioning verification problem as a special case of the containment checking problem, which we call the complete containment checking problem. We propose a state space reduction technique specific to the containment checking, reachability analysis, and deadlock detection problems. We propose novel data structures and token propagation methodologies which enhance the efficiency of containment checking. We present an efficient containment checking algorithm for the application partitioning verification problem. We develop a containment checking tool called TraceMatch and present experimental results. We present a comparison of the state space reduction achieved by TraceMatch with that achieved by formal analysis and verification tools like Spin, PEP, PROD, and LoLA.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Containment checking; multiprocessor embedded systems; state space reduction; UML activity diagrams", } @Article{Huang:2007:CSS, author = "Shih-Hsu Huang and Yow-Tyng Nieh", title = "Clock skew scheduling with race conditions considered", journal = j-TODAES, volume = "12", number = "4", pages = "45:1--45:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278358", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we provide a fresh viewpoint to the interactions between clock skew scheduling and delay insertion. A race-condition-aware (RCA) clock skew scheduling is proposed to determine the clock skew schedule by taking race conditions (i.e., hold violations) into account. Our objective is not only to optimize the clock period, but also to minimize heuristically the required inserted delay. Compared with previous work, our major contribution includes the following two aspects. First, our approach achieves exactly the same results, but has significant improvement in time complexity. Second, our viewpoint can be generalized to other sequential timing optimization techniques.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "logic synthesis; performance optimization; Sequential circuits; timing optimization", } @Article{Wang:2007:ETR, author = "Gang Wang and Wenrui Gong and Brian Derenzi and Ryan Kastner", title = "Exploring time\slash resource trade-offs by solving dual scheduling problems with the ant colony optimization", journal = j-TODAES, volume = "12", number = "4", pages = "46:1--46:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278359", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Design space exploration during high-level synthesis is often conducted through ad hoc probing of the solution space using some scheduling algorithm. This is not only time consuming but also very dependent on designer's experience. We propose a novel design exploration method that exploits the duality of time- and resource-constrained scheduling problems. Our exploration automatically constructs a time/area tradeoff curve in a fast, effective manner. It is a general approach and can be combined with any high-quality scheduling algorithm. In our work, we use the max-min ant colony optimization technique to solve both time- and resource-constrained scheduling problems. Our algorithm provides significant solution-quality savings (average 17. 3\% reduction of resource counts) with similar runtime compared to using force-directed scheduling exhaustively at every time step. It also scales well across a comprehensive benchmark suite constructed with classic and real-life samples.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "ant colony optimization; Design space exploration; instruction scheduling; max-min ant system", } @Article{Ghosh:2007:LPT, author = "Swaroop Ghosh and Swarup Bhunia and Kaushik Roy", title = "Low-Power and Testable Circuit Synthesis Using {Shannon} Decomposition", journal = j-TODAES, volume = "12", number = "4", pages = "47:1--47:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278360", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:34:15 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/bibnet/authors/s/shannon-claude-elwood.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Structural transformation of a design to enhance its testability while satisfying design constraints on power and performance can result in improved test cost and test confidence. In this article, we analyze the testability in a new style of logic design based on Shannon's decomposition and supply gating. We observe that the tree structure of a logic circuit due to Shannon's decomposition makes it intrinsically more testable than a conventionally synthesized circuit, while at the same time providing an improvement in active power. We have analyzed four different aspects of the testability of a circuit: (a) IDDQ test sensitivity, (b) test power during scan-based testing, (c) test length (for both ATPG-generated deterministic and random patterns), and (d) noise immunity. Simulation results on a set of MCNC benchmarks show promising results on all these aspects (an average improvement of 94\% in IDDQ sensitivity, 50\% in test power, 19\% (21\%) in test length for deterministic (random) patterns, and 50\% in coupling noise immunity). We have also demonstrated that the new logic structure can improve parametric yield (6\% on average) of a circuit under process variations when considering a bound on circuit leakage.", acknowledgement = ack-nhfb, acmid = "1278360", articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Design-for-test; dynamic supply gating; IDDQ; noise immunity; Shannon expansion; test coverage; test power", subject-dates = "Claude Elwood Shannon (1916--2001)", } @Article{Ostler:2007:IHT, author = "Chris Ostler and Karam S. Chatha and Vijay Ramamurthi and Krishnan Srinivasan", title = "{ILP} and heuristic techniques for system-level design on network processor architectures", journal = j-TODAES, volume = "12", number = "4", pages = "48:1--48:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278361", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Network processors incorporate several architectural features, including symmetric multiprocessing (SMP), block multithreading, and multiple memory elements, to support the high-performance requirements of current day applications. This article presents automated system-level design techniques for application development on such architectures. We propose integer linear programming formulations and heuristic techniques for process allocation and data mapping on SMP and block-multithreading-based network processors. The techniques incorporate process transformations and multithreading-aware data mapping to maximize the throughput of the application. The article presents experimental results that evaluate the techniques by implementing network processing applications on the Intel IXP 2400 architecture.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "block multithreading; multiprocessor", } @Article{Gopalakrishnan:2007:OPD, author = "Sivaram Gopalakrishnan and Priyank Kalla", title = "Optimization of polynomial datapaths using finite ring algebra", journal = j-TODAES, volume = "12", number = "4", pages = "49:1--49:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278362", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents an approach to area optimization of arithmetic datapaths at register-transfer level (RTL). The focus is on those designs that perform polynomial computations (add, mult) over finite word-length operands (bit-vectors). We model such polynomial computations over $m$-bit vectors as algebra over finite integer rings of residue classes $ Z_2^m$. Subsequently, we use the number-theoretic and algebraic properties of such rings to transform a given datapath computation into another, bit-true equivalent computation. We also derive a cost model to estimate, at RTL, the area cost of the computation. Using the transformation procedure along with the cost model, we devise algorithmic procedures to search for a lower-cost implementation. We show how these theoretical concepts can be applied to RTL optimization of arithmetic datapaths within practical CAD settings. Experiments conducted over a variety of benchmarks demonstrate substantial optimizations using our approach.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "arithmetic datapaths; finite ring algebra; High-level synthesis; modulo arithmetic; polynomial datapaths", } @Article{Hu:2007:IHM, author = "Q. Hu and P. G. Kjeldsberg and A. Vandecappelle and M. Palkovic and F. Catthoor", title = "Incremental hierarchical memory size estimation for steering of loop transformations", journal = j-TODAES, volume = "12", number = "4", pages = "50:1--50:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278363", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern embedded multimedia and telecommunications systems need to store and access huge amounts of data. This becomes a critical factor for the overall energy consumption, area, and performance of the systems. Loop transformations are essential to improve the data access locality and regularity in order to optimally design or utilize a memory hierarchy. However, due to abstract high-level cost functions, current loop transformation steering techniques do not take the memory platform sufficiently into account. They usually also result in only one final transformation solution. On the other hand, the loop transformation search space for real-life applications is huge, especially if the memory platform is still not fully fixed. Use of existing loop transformation techniques will therefore typically lead to suboptimal end-products. It is critical to find all interesting loop transformation instances. This can only be achieved by performing an evaluation of the effect of later design stages at the early loop transformation stage. \par This article presents a fast incremental hierarchical memory-size requirement estimation technique. It estimates the influence of any given sequence of loop transformation instances on the mapping of application data onto a hierarchical memory platform. As the exact memory platform instantiation is often not yet defined at this high-level design stage, a platform-independent estimation is introduced with a Pareto curve output for each loop transformation instance. Comparison among the Pareto curves helps the designer, or a steering tool, to find all interesting loop transformation instances that might later lead to low-power data mapping for any of the many possible memory hierarchy instances. Initially, the source code is used as input for estimation. However, performing the estimation repeatedly from the source code is too slow for large search space exploration. An incremental approach, based on local updating of the previous result, is therefore used to handle sequences of different loop transformations. Experiments show that the initial approach takes a few seconds, which is two orders of magnitude faster than state-of-the-art solutions but still too costly to be performed interactively many times. The incremental approach typically takes just a few milliseconds, which is another two orders of magnitude faster than the initial approach. This huge speedup allows us for the first time to handle real-life industrial-size applications and get realistic feedback during loop transformation exploration.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code transformation; Data optimization; high-level synthesis; memory architecture exploration; memory size estimation", } @Article{You:2007:CCP, author = "Yi-Ping You and Chung-Wen Huang and Jenq Kuen Lee", title = "Compilation for compact power-gating controls", journal = j-TODAES, volume = "12", number = "4", pages = "51:1--51:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278364", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power leakage constitutes an increasing fraction of the total power consumption in modern semiconductor technologies due to the continuing size reductions and increasing speeds of transistors. Recent studies have attempted to reduce leakage power using integrated architecture and compiler power-gating mechanisms. This approach involves compilers inserting instructions into programs to shut down and wake up components, as appropriate. While early studies showed this approach to be effective, there are concerns about the large amount of power-control instructions being added to programs due to the increasing amount of components equipped with power-gating controls in SoC design platforms. In this article we present a sink-n-hoist framework for a compiler to generate balanced scheduling of power-gating instructions. Our solution attempts to merge several power-gating instructions into a single compound instruction, thereby reducing the amount of power-gating instructions issued. We performed experiments by incorporating our compiler analysis and scheduling policies into SUIF compiler tools and by simulating the energy consumption using Wattch toolkits. The experimental results demonstrate that our mechanisms are effective in reducing the amount of power-gating instructions while further reducing leakage power compared to previous methods.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "balanced scheduling; Compilers for low power; data-flow analysis; leakage-power reduction; power-gating mechanisms", } @Article{Chen:2007:NMA, author = "Gang Chen and Xiaoyu Song and Feng Liu and Qingping Tan and Fei He", title = "A note on {``A mapping algorithm for computer-assisted exploration in the design of embedded systems''}", journal = j-TODAES, volume = "12", number = "4", pages = "52:1--52:??", month = sep, year = "2007", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1278349.1278365", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:09:35 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", note = "See \cite{Mariatos:2001:MAC}.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dutt:2008:Ea, author = "Nikil Dutt", title = "Editorial", journal = j-TODAES, volume = "13", number = "1", pages = "1:1--1:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297667", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hsiao:2008:ISS, author = "Michael S. Hsiao and Robert B. Jones", title = "Introduction to special section on high-level design, validation, and test", journal = j-TODAES, volume = "13", number = "1", pages = "2:1--2:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297668", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cabodi:2008:BID, author = "Gianpiero Cabodi and Marco Murciano and Sergio Nocco and Stefano Quer", title = "Boosting interpolation with dynamic localized abstraction and redundancy removal", journal = j-TODAES, volume = "13", number = "1", pages = "3:1--3:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297669", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "SAT--based Unbounded Model Checking based on Craig Interpolants is often able to overcome BDDs and other SAT--based techniques on large verification instances. Based on refutation proofs generated by SAT solvers, interpolants provide compact circuit representations of state sets, as they abstract away several nonrelevant details of the proofs. We propose three main contributions, aimed at controlling interpolant size and traversal depth. First of all, we introduce interpolant--based dynamic abstraction to reduce the support of computed interpolants. Subsequently, we propose new advances in interpolant compaction by redundancy removal. Finally, we introduce interpolant computation exploiting circuit quantification, instead of SAT refutation proofs. These techniques heavily rely on an effective application of the incremental SAT paradigm. The experimental results proposed in this paper are specifically oriented to prove properties, rather than disproving them, i.e., they target complete verification instead of simply hunting bugs. They show how this methodology is able to stretch the applicability of interpolant--based Model Checking to larger and deeper verification instances.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "abstraction; Interpolant; redundancy removal", } @Article{Boule:2008:ABA, author = "Marc Boul{\'e} and Zeljko Zilic", title = "Automata-based assertion-checker synthesis of {PSL} properties", journal = j-TODAES, volume = "13", number = "1", pages = "4:1--4:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297670", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Assertion-based verification with languages such as PSL is gaining in importance. From assertions, one can generate hardware assertion checkers for use in emulation, simulation acceleration and silicon debug. We present techniques for checker generation of the complete set of PSL properties, including all variants of operators, both strong and weak. A full automata-based approach allows an entire assertion to be represented by a single automaton, hence allowing optimizations that can not be done in a modular approach where subcircuits are created only for individual operators. For this purpose, automata algorithms are developed for the base cases, and a complete set of rewrite rules is derived for other operators. Automata splitting is introduced for an efficient implementation of the eventually! operator.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "assertion checkers; Assertion-Based Verification; automata; emulation; hardware; PSL", } @Article{Rahaman:2008:CTB, author = "H. Rahaman and J. Mathew and D. K. Pradhan and A. M. Jabir", title = "{C}-testable bit parallel multipliers over {$ {\rm GF}(2^m) $}", journal = j-TODAES, volume = "13", number = "1", pages = "5:1--5:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297671", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a C-testable design of polynomial basis (PB) bit-parallel (BP) multipliers over GF(2 m ) for 100\% coverage of stuck-at faults. Our design method also includes the method for test vector generation, which is simple and efficient. C-testability is achieved with three control inputs and approximately 6\% additional hardware. Only 8 constant vectors are required irrespective of the sizes of the fields and primitive polynomial. We also present a Built-In Self-Test (BIST) architecture for generating the test vectors efficiently, which eliminates the need for the extra control inputs. Since these circuits have critical applications as parts of cryptography (e.g., Elliptic Curve Crypto (ECC) systems) hardware, the BIST architecture may provide with added level of security, as the tests would be done internally and without the requirement of probing by external testing equipment. Finally we present experimental results comprising the area, delay and power of the testable multipliers of various sizes with the help of the Synopsys{\reg} tools using UMC 0. 18 micron CMOS technology library.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "built-in self-test; C-testable; cryptography; digital signal processing; error control code; fault; Galois field; multiplier; polynomials; stuck-at fault; testing; TPG; VLSI design", } @Article{Taktak:2008:TAD, author = "Sami Taktak and Jean-Lou Desbarbieux and Emmanuelle Encrenaz", title = "A tool for automatic detection of deadlock in wormhole networks on chip", journal = j-TODAES, volume = "13", number = "1", pages = "6:1--6:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297672", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present an extension of Duato's necessary and sufficient condition a routing function must satisfy in order to be deadlock-free, to support environment constraints inducing extra-dependencies between messages. We also present an original algorithm to automatically check the deadlock-freeness of a network with a given routing function. A prototype tool has been developed and automatic deadlock checking of large scale networks with various routing functions have been successfully achieved. We provide comparative results with standard approach, highlighting the benefits of our method.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Deadlock; interconnection networks; networks on chip; wormhole routing", } @Article{Zhou:2008:NER, author = "Hai Zhou", title = "A new efficient retiming algorithm derived by formal manipulation", journal = j-TODAES, volume = "13", number = "1", pages = "7:1--7:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297673", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A new efficient algorithm is derived for the minimal period retiming by formal manipulation. Contrary to all previous algorithms, which used fixed period feasibility checking to binary-search a candidate range, the derived algorithm checks the optimality of a feasible period directly. It is much simpler and more efficient than previous algorithms. Experimental results showed that it is even faster than ASTRA, an efficient heuristic algorithm. Since the derived algorithm is incremental by nature, it also opens the opportunity to be combined with other optimization techniques.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "algorithm derivation; Clockperiod minimization; retiming", } @Article{Krishnaswamy:2008:PTM, author = "Smita Krishnaswamy and George F. Viamontes and Igor L. Markov and John P. Hayes", title = "Probabilistic transfer matrices in symbolic reliability analysis of logic circuits", journal = j-TODAES, volume = "13", number = "1", pages = "8:1--8:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297674", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose the probabilistic transfer matrix (PTM) framework to capture nondeterministic behavior in logic circuits. PTMs provide a concise description of both normal and faulty behavior, and are well-suited to reliability and error susceptibility calculations. A few simple composition rules based on connectivity can be used to recursively build larger PTMs (representing entire logic circuits) from smaller gate PTMs. PTMs for gates in series are combined using matrix multiplication, and PTMs for gates in parallel are combined using the tensor product operation. PTMs can accurately calculate joint output probabilities in the presence of reconvergent fanout and inseparable joint input distributions. To improve computational efficiency, we encode PTMs as algebraic decision diagrams (ADDs). We also develop equivalent ADD algorithms for newly defined matrix operations such as {\tt eliminate\_variables} and {\tt eliminate\_redundant\_variables}, which aid in the numerical computation of circuit PTMs. We use PTMs to evaluate circuit reliability and derive polynomial approximations for circuit error probabilities in terms of gate error probabilities. PTMs can also analyze the effects of logic and electrical masking on error mitigation. We show that ignoring logic masking can overestimate errors by an order of magnitude. We incorporate electrical masking by computing error attenuation probabilities, based on analytical models, into an extended PTM framework for reliability computation. We further define a susceptibility measure to identify gates whose errors are not well masked. We show that hardening a few gates can significantly improve circuit reliability.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "fault tolerance; Symbolic analysis", } @Article{Tzeng:2008:VPS, author = "Chao-Wen Tzeng and Jheng-Syun Yang and Shi-Yu Huang", title = "A versatile paradigm for scan chain diagnosis of complex faults using signal processing techniques", journal = j-TODAES, volume = "13", number = "1", pages = "9:1--9:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297675", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scan chains are popularly used as the channels for silicon testing and debugging. However, they have also been identified as one of the culprits of silicon failure more recently. To cope with this problem, several scan chain diagnosis approaches have been proposed in the past. The existing methods, however, suffer from one common drawback---that is, they rely on fault models and matching heuristics to locate the faults. Such a paradigm may run into difficulty when the fault under diagnosis does not match the fault model exactly, for example, when there is a bridging between a flip-flop and a logic cell, or the fault is temporal and only manifests itself intermittently. In light of this, we propose in this article a more versatile model-free paradigm for locating the faulty flip-flops in a scan chain, incorporating a number of signal processing techniques, such as filtering and edge detection. These techniques performed on the test responses of the failing chip under diagnosis directly can effectively reveal the fault location(s) in a scan chain. As compared to the previous works, our approach is better capable of handling intermittent faults and bridging faults, even under nonideal conditions, for example, when the core logic is also faulty. Experimental results on several real designs indicate that this approach can indeed catch some nasty faults that previous methods could not catch.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design for testability; Diagnosis; fault; profiling; scan chain", } @Article{Johnson:2008:IME, author = "F. Ryan Johnson and Joann M. Paul", title = "Interrupt modeling for efficient high-level scheduler design space exploration", journal = j-TODAES, volume = "13", number = "1", pages = "10:1--10:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297676", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Single Chip Heterogeneous Multiprocessors executing a wide variety of software are increasingly common in consumer electronics. Because of the mix of real-time and best effort software across the entire chip, a key design element of these systems is the choice of scheduling strategy. Without task migration, the benefits of single chip processing cannot be fully realized. Previously, high-level modeling environments have not been capable of modeling asynchronous events such as interrupts and preemptive scheduling while preserving the performance benefits of high level simulation. This paper shows how extensions to Modeling Environment for Software and Hardware (MESH) enable precise modeling of these asynchronous events while running more than 1000 faster than cycle-accurate simulation. We discuss how we achieved this and illustrate its use in modeling preemptive scheduling. We evaluate the potential of migrating running tasks between processors to improve performance in a multimedia cell phone example. We show that by allowing schedulers to rebalance processor loads as new tasks arrive significant performance gains can be achieved over statically partitioned and dynamic scheduling approaches. In our example, we show that system response time can be improved by as much as 1. 96 times when a preemptive migratory scheduler is used, despite the overhead incurred by scheduling tasks across multiple processors and transferring state during the migration of running tasks. The contribution of this work is to provide a framework for evaluating preemptive scheduling policies and task migration in a high level simulator, by combining the new ability to model interrupts with dramatically increased efficiency in the high-level modeling of scheduling and communication MESH already provides.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Heterogeneous chip multiprocessors; MESH; scenario oriented design", } @Article{Ogras:2008:AOP, author = "Umit Y. Ogras and Radu Marculescu", title = "Analysis and optimization of prediction-based flow control in networks-on-chip", journal = j-TODAES, volume = "13", number = "1", pages = "11:1--11:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297677", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Networks-on-Chip (NoC) communication architectures have emerged recently as a scalable solution to on-chip communication problems. While the NoC architectures may offer higher bandwidth compared to traditional bus-based communication, their performance can degrade significantly in the absence of effective flow control algorithms. Unfortunately, flow control algorithms developed for macronetworks, either rely on local information, or suffer from large communication overhead and unpredictable delays. Hence, using them in the NoC context is problematic at best. For this reason, we propose a predictive closed-loop flow control mechanism and make the following contributions: First, we develop traffic source and router models specifically targeted to NoCs. Then, we utilize these models to predict the possible congestion in the network. Based on this information, the proposed scheme controls the packet injection rate at traffic sources in order to regulate the total number of packets in the network. We also illustrate the proposed traffic source model and the applicability of the proposed flow controller to actual designs using real NoC implementations. Finally, simulations and experimental study using our FPGA prototype show that the proposed controller delivers a better performance compared to the traditional switch-to-switch flow control algorithms under various real and synthetic traffic patterns.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "congestion control; flow control; Multi-processor systems; networks-on-chip", } @Article{Chang:2008:TCS, author = "Kuei-Chung Chang and Jih-Sheng Shen and Tien-Fu Chen", title = "Tailoring circuit-switched network-on-chip to application-specific system-on-chip by two optimization schemes", journal = j-TODAES, volume = "13", number = "1", pages = "12:1--12:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297678", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the number of cores on a chip increases, power consumed by the communication structures takes a significant portion of the overall power budget. In this article, we first propose a circuit-switched interconnection architecture which uses crossroad switches to construct dedicated channels dynamically between any pairs of cores for nonhuge application-specific SoCs. The structure of the crossroad switch is simple, which can be regarded as a NoC-lite router, and we can easily construct a low-power on-chip network with these switches by a system-level design methodology. We also present the design methodology to tailor the proposed interconnection architecture to low-power structures by two proposed optimization schemes with profiled communication characteristics. The first scheme is power-aware topology construction, which can build low-power application-specific interconnection topologies. To further reduce the power consumption, we propose the second optimization scheme to predetermine the operating mode of dual-mode switches in the NoC at runtime. We evaluate several interconnection techniques, and the results show that the proposed architecture is more low-power and high-performance than others under some constraints and scale boundaries. We take multimedia applications as case studies, and experimental results show the power savings of power-aware topology approximate to 49\% of the interconnection architecture. The power consumption can be further reduced approximately 25\% by applying partially dedicated path mechanism.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Application specific; interconnection; low power; networks on chip; systems on chips", } @Article{Abbasian:2008:WBD, author = "A. Abbasian and S. Hatami and A. Afzali-Kusha and M. Pedram", title = "Wavelet-based dynamic power management for nonstationary service requests", journal = j-TODAES, volume = "13", number = "1", pages = "13:1--13:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297679", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, a wavelet-based dynamic power management policy (WBDPM) is proposed. In this approach, the workload source (service requester) is modeled by a nonstationary time series which, in turn, represented by a nondecimated Haar wavelet as its basis. The proposed approach is robust and has the ability to minimize energy dissipation under different performance constraints. To assess the accuracy of the model, the algorithm was implemented for data extracted from the hard disks of computers. Prediction results of this approach for the case of a nonstationary service requester exhibit accuracies of more than 95\%.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Dynamic power management; low-power system design; nonstationary service request; wavelet-based prediction", } @Article{Su:2008:SNT, author = "Yu-Shih Su and Po-Hsien Chang and Shih-Chieh Chang and Tingting Hwang", title = "Synthesis of a novel timing-error detection architecture", journal = j-TODAES, volume = "13", number = "1", pages = "14:1--14:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297680", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Delay variation can cause a design to fail its timing specification. Ernst et al. [2003] observe that the worst delay of a design is least probable to occur. They propose a mechanism to detect and correct occasional errors while the design can be optimized for the common cases. Their experimental results show significant performance (or power) gain as compared with the worst-case design. However, the architecture in Ernst et al. [2003] suffers the short path problem, which is difficult to resolve. In this article, we propose a novel error-detecting architecture to solve the short path problem. Our experimental results show considerable performance gain can be achieved with reasonable area overhead.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "fault tolerance; Logic synthesis", } @Article{Raabe:2008:RDS, author = "Andreas Raabe and Philipp A. Hartmann and Joachim K. Anlauf", title = "{ReChannel}: {Describing} and simulating reconfigurable hardware in {systemC}", journal = j-TODAES, volume = "13", number = "1", pages = "15:1--15:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297681", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the ongoing integration of (dynamic) reconfiguration into current system models, new methodologies and tools are needed to help the designer during the development process. This article introduces a language extension for SystemC along with a design methodology for describing and simulating dynamically reconfigurable systems at all levels of abstraction. The presented library provides maximum freedom of description of reconfiguration behavior and its control, while featuring simulation of runtime configuration, removal, and exchange of custom modules as well as third-party IP-cores during the complete architecture refinement process. When designing at RT-level, the resulting hardware description can easily be synthesized by standard synthesis tools.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "dynamic reconfiguration; hardware description; Reconfigurable hardware; refinement; simulation; SystemC", } @Article{Zhou:2008:AAS, author = "Xiangrong Zhou and Chenjie Yu and Alokika Dash and Peter Petrov", title = "Application-aware snoop filtering for low-power cache coherence in embedded multiprocessors", journal = j-TODAES, volume = "13", number = "1", pages = "16:1--16:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297682", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Maintaining local caches coherently in shared-memory multiprocessors results in significant power consumption. The customization methodology we propose exploits the fact that in embedded systems, important knowledge is available to the system designers regarding memory sharing between tasks. We demonstrate how the snoop-induced cache probings can be significantly reduced by identifying and exploiting in a deterministic way the shared memory regions between the processors. Snoop activity is enabled only for the accesses referring to known shared regions. The hardware support is not only cost efficient, but also software programmable, which allows for reprogrammability and customization across different tasks and applications.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Cache coherence; embedded multiprocessors; low-power embedded systems; snoop filtering", } @Article{Ahn:2008:SSC, author = "Yongjin Ahn and Keesung Han and Ganghee Lee and Hyunjik Song and Junhee Yoo and Kiyoung Choi and Xingguang Feng", title = "{SoCDAL}: {System-on-chip design AcceLerator}", journal = j-TODAES, volume = "13", number = "1", pages = "17:1--17:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297683", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Time-to-market pressure and the ever-growing design complexity of multiprocessor system-on-chips have demanded an efficient design environment that enables fast exploration of large design space. In this article, we introduce a new design environment, called SoCDAL, for accelerating multiprocessor system-on-chip design through fast design-space exploration targeting real-time multimedia systems. SoCDAL is a set of mostly automated tools covering system specification, hardware/software estimation, application-to-architecture mapping, simulation model generation, and system verification through simulation. For system specification, the process network model has been widely used for system specification because of its modeling capability. However, it is hard to use for real-time systems design, since its behavior cannot be estimated statically. We introduce a new approach which enables analyzing a process network model statically with some restrictions. For the hardware/software estimation, we analyze codes statically. Application-to-architecture mapping process implements a novel algorithm to support an arbitrary number of processors, with performance evaluation by static scheduling considering communication behavior. Mapping results are used to generate simulation models automatically at several transaction levels to be pipelined to a commercial tool. We show the effectiveness of our approaches by some experimental results with multimedia applications such as JPEG, H. 263, and H. 264 encoders, as well as an H. 264 decoder.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "application-to-architecture mapping; Codesign; design-space exploration; multiprocessor system-on-chip; process networks; scheduling; simulation; specification; static hardware/software estimation; synchronous dataflow; transaction-level model; worst-case execution time", } @Article{Zamora:2008:EMU, author = "Nicholas H. Zamora and Xiaoping Hu and Umit Y. Ogras and Radu Marculescu", title = "Enabling multimedia using resource-constrained video processing techniques: a node-centric perspective", journal = j-TODAES, volume = "13", number = "1", pages = "18:1--18:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297684", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Successful proliferation of multimedia-enabled devices and advances in very large-scale integration (VLSI) technology has spawned new research efforts in migrating video processing applications onto ever smaller and more inexpensive devices. This article focuses on the technical challenges associated with that migration. \par Due to limitations in size, battery lifetime, and, ultimately, cost, mapping complex video applications onto resource-constrained systems is a very challenging proposition. To this end, we first consider a technique, region-of-interest (ROI) processing, of defining a window within a video frame and only operating on the data inside that window, ignoring the rest of the frame. By using this lossy technique, the processing requirements can be reduced by roughly 80\% while the error introduced in the quality of the results is roughly 10\%. The other technique is adaptive data partitioning (ADP) combined with a content-based power management algorithm. By distributing video processing among multiple processors and shutting them down when they are not needed, the energy consumed per processor can be reduced by 60\% without sacrificing the performance of the underlying video-based application. \par Taken together, these novel techniques enable ambient multimedia systems and maintain the needed overall efficiency in video processing.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "data partitioning; lossy and lossless video processing; real-time video processing; Region-of-interest (ROI)", } @Article{Lee:2008:FCB, author = "Kyungsoo Lee and Naehyuck Chang and Jianli Zhuo and Chaitali Chakrabarti and Sudheendra Kadri and Sarma Vrudhula", title = "A fuel-cell-battery hybrid for portable embedded systems", journal = j-TODAES, volume = "13", number = "1", pages = "19:1--19:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297685", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents our work on the development of a fuel cell (FC) and battery hybrid (FC-Bh) system for use in portable microelectronic systems. We describe the design and control of the hybrid system, as well as a dynamic power management (DPM)-based energy management policy that extends its operational lifetime. The FC is of the proton exchange membrane (PEM) type, operates at room temperature, and has an energy density which is 4--6 times that of a Li-ion battery. The FC cannot respond to sudden changes in the load, and so a system powered solely by the FC is not economical. An FC-Bh power source, on the other hand, can provide the high energy density of the FC and the high power density of a battery. \par In this work we first describe the prototype FC-Bh system that we have built. Such a prototype helps to characterize the performance of a hybrid power source, and also helps explore new energy management strategies for embedded systems powered by hybrid sources. Next we describe a Matlab/Simulink-based FC-Bh system simulator which serves as an alternate experimental platform and that enables quick evaluation of system-level control policies. Finally, we present an optimization framework that explicitly considers the characteristics of the FC-Bh system and is aimed at minimizing the fuel consumption. This optimization framework is applied on top of a prediction-based DPM policy and is used to derive a new fuel-efficient DPM scheme. The proposed scheme demonstrates up to 32\% system lifetime extension compared to a competing scheme when run on a real trace-based MPEG encoding example.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "battery; DPM; fuel cell; hybrid systems; Simulation; simulator", } @Article{Chao:2008:LPG, author = "Wei-Chung Chao and Wai-Kei Mak", title = "Low-power gated and buffered clock network construction", journal = j-TODAES, volume = "13", number = "1", pages = "20:1--20:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297686", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose an efficient algorithm to construct a low-power zero-skew gated clock network, given the module locations and activity information. Unlike previous works, we consider masking logic insertion and buffer insertion simultaneously, and guarantee to yield a zero-skew clock tree. Both the logical and physical information of the modules are carefully taken into consideration when determining where masking logic should be inserted. We also account for the power overhead of the control signals so that the total average power consumption of the constructed zero-skew gated clock network can be minimized. To this end, we present a recursive approach to compute the effective switched capacitance of a general gated and buffered clock network, accounting for both the clock tree's and controller tree's switched capacitance. The power consumptions of the gated clock networks constructed by our algorithm are 20 to 36\% lower than those reported in the best previous work in the literature.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "buffer; clock gating; Clock tree; low power; zero-skew", } @Article{Sham:2008:OWR, author = "Chiu-Wing Sham and Evangeline F. Y. Young and Hai Zhou", title = "Optimizing wirelength and routability by searching alternative packings in floorplanning", journal = j-TODAES, volume = "13", number = "1", pages = "21:1--21:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297687", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent advances in VLSI technology have made optimization of the interconnect delay and routability of a circuit more important. We should consider interconnect planning as early as possible. We propose a postfloorplanning step to reduce the interconnect cost of a floorplan by searching alternative packings. If a packing contains a rectangular bounding box of a group of modules, we can rearrange the blocks in the bounding box to obtain a new floorplan with the same area, but possibly with a smaller interconnect cost. Experimental results show that we can reduce the interconnect cost of a packing without any penalty in area.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Floorplanning; wirelength reduction", } @Article{Wu:2008:CPR, author = "Meng-Chiou Wu and Rung-Bin Lin and Shih-Cheng Tsai", title = "Chip placement in a reticle for multiple-project wafer fabrication", journal = j-TODAES, volume = "13", number = "1", pages = "22:1--22:??", month = jan, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1297666.1297688", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:00 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Chip placement in a reticle is crucial to the cost of a multiproject wafer run. In this article we develop several chip placement methods based on the volume-driven compatibility optimization (VOCO) concept, which maximizes dicing compatibility among chips with large-volume requirements while minimizing reticle dimensions. Our mixed-integer linear programming models with VOCO are too complex to render good solutions for large test cases. Our B*-tree with VOCO and HQ with VOCO use $ 16 \% \sim 29 \% $ fewer wafers and $ 8 \% \sim 19 \% $ less reticle area than the hierarchical quadrisection (HQ) method proposed by Kahng et al. [2005]", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compatibility graph; conflict graph; mixed-integer linear programming (MILP); Multiple-project wafers (MPW); reticle floorplanning; set cover; set partition; shuttle mask; simulated annealing (SA); wafer dicing", } @Article{Dutt:2008:Eb, author = "Nikil Dutt", title = "Editorial", journal = j-TODAES, volume = "13", number = "2", pages = "23:1--23:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344419", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Saluja:2008:SBA, author = "Nikhil Saluja and Kanupriya Gulati and Sunil P. Khatri", title = "{SAT}-based {ATPG} using multilevel compatible don't-cares", journal = j-TODAES, volume = "13", number = "2", pages = "24:1--24:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344420", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In a typical IC design flow, circuits are optimized using multilevel don't cares. The computed don't cares are discarded before Technology Mapping or Automatic Test Pattern Generation (ATPG). In this paper, we present two combinational ATPG algorithms for combinational designs. These algorithms utilize the multilevel don't cares that are computed for the design during technology independent logic optimization. They are based on Boolean Satisfiability (SAT), and utilize the single stuck-at fault model. Both algorithms make use of the Compatible Observability Don't Cares (CODCs) associated with nodes of the circuit, to speed up the ATPG process. For large circuits, both algorithms make use of approximate CODCs (ACODCs), which we can compute efficiently. Our first technique speeds up fault propagation by modifying the active clauses in the transitive fanout (TFO) of the fault site. In our second technique, we define new j - active variables for specific nodes in the transitive fanin (TFI) of the fault site. Using these j-active variables we write additional clauses to speed up fault justification. Experimental results demonstrate that the combination of these techniques (when using CODCs) results in an average reduction of 45\% in ATPG runtimes. When ACODCs are used, a speed-up of about 30\% is obtained in the ATPG run-times for large designs. We compare our method against a commercial structural ATPG tool as well. Our method is slower for small designs, but for large designs, we obtain a 31\% average speedup over the commercial tool.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Automatic test pattern generation (ATPG); Boolean satisfiabilty (SAT); don't cares; testing", } @Article{Muchherla:2008:NEW, author = "Kishore Kumar Muchherla and Pinhong Chen and Dongsheng Ma and Janet Meiling Wang", title = "A noniterative equivalent waveform model for timing analysis in presence of crosstalk", journal = j-TODAES, volume = "13", number = "2", pages = "25:1--25:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344421", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to the nonuniform interconnect scaling in the Deep Sub Micron (DSM) region, the coupling capacitance between wires becomes an increasingly dominant fraction of the total wire capacitance. This couple capacitance introduces server crosstalk which causes delay variations on signal lines and raises signal integrity problems. Therefore, including crosstalk in the timing analysis methods has become imperative for current technologies. And to correctly model the crosstalk, output loading effects, waveform shape and gate driving capability have to be considered. However, most existing crosstalk models have not yet included these factors and consequently suffer from the low accuracy problem. In this article, we propose a noniterative equivalent waveform model that addresses the above mentioned issues. Our experimental results have shown that the new model achieves 3 times speed up and 95\% accuracy compared to the existing models.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Deep sub micron; delay; equivalent waveform; noise; timing analysis", } @Article{Yan:2008:TDO, author = "Jin-Tai Yan", title = "Timing-driven octilinear {Steiner} tree construction based on {Steiner-point} reassignment and path reconstruction", journal = j-TODAES, volume = "13", number = "2", pages = "26:1--26:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344422", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "It is well known that the problem of constructing a timing-driven rectilinear Steiner tree for any signal net is important in performance-driven designs and has been extensively studied. Until now, many efficient approaches have been proposed for the construction of a timing-driven rectilinear Steiner tree. As technology process advances, $ + 45^\circ $ and $ - 45^\circ $ diagonal segments can be permitted in an octilinear routing model. To our knowledge, no approach is proposed to construct a timing-driven octilinear Steiner tree for any signal net. In this paper, given a rectilinear Steiner tree for any signal net, we propose an efficient transformation-based approach to construct a timing-driven octilinear Steiner tree based on the computation of the octilinear distance and the concept of Steiner-point reassignment and path reconstruction in an octilinear routing model. The experimental results show that our proposed transformation-based approach can use reasonable CPU time to construct a TOST, and a 10\%--18\% improvement in timing delay and a 5\%--14\% improvement in total wire length in the original RSTs are obtained in the construction of TOSTs for the tested signal nets.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Elmore delay; Global routing; octilinear Steiner tree; Steiner points", } @Article{Baldassin:2008:OSB, author = "Alexandro Baldassin and Paulo Centoducatte and Sandro Rigo and Daniel Casarotto and Luiz C. V. Santos and Max Schultz and Olinto Furtado", title = "An open-source binary utility generator", journal = j-TODAES, volume = "13", number = "2", pages = "27:1--27:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344423", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Electronic system level (ESL) modeling allows early hardware-dependent software (HDS) development. Due to broad CPU diversity and shrinking time-to-market, HDS development can neither rely on hand-retargeting binary tools, nor can it rely on pre-existent tools within standard packages. As a consequence, binary utilities which can be easily adapted to new CPU targets are of increasing interest. We present in this article a framework for automatic generation of binary utilities. It relies on two innovative ideas: platform-aware modeling and more inclusive relocation handling. Generated assemblers, linkers, disassemblers and debuggers were validated for MIPS, SPARC, PowerPC, i8051 and PIC16F84. An open-source prototype generator is available for download.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Platform debugging; retargetable tools; TLM", } @Article{Moscola:2008:RCB, author = "James Moscola and John W. Lockwood and Young H. Cho", title = "Reconfigurable content-based router using hardware-accelerated language parser", journal = j-TODAES, volume = "13", number = "2", pages = "28:1--28:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344424", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a dense logic design for matching multiple regular expressions with a field programmable gate array (FPGA) at 10 + Gbps. It leverages on the design techniques that enforce the shortest critical path on most FPGA architectures while optimizing the circuit size. The architecture is capable of supporting a maximum throughput of 12. 90 Gbps on a Xilinx Virtex 4 LX200 and its performance is linearly scalable with size. Additionally, this article presents techniques for parsing data streams to provide semantic information for patterns found within a data stream. We illustrate how a content-based router can be implemented with our parsing techniques using an XML parser as an example. The content-based router presented was designed, implemented, and tested in a Xilinx Virtex XCV2000E FPGA on the FPX platform. It is capable of processing 32-bits of data per clock cycle and runs at 100 MHz. This allows the system to process and route XML messages at 3. 2 Gbps.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "content-based routing; parser hardware; Parsing; pattern matching; regular expressions; XML", } @Article{Jones:2008:RFI, author = "Alex K. Jones and Swapna Dontharaju and Shenchih Tung and Leo Mats and Peter J. Hawrylak and Raymond R. Hoare and James T. Cain and Marlin H. Mickle", title = "Radio frequency identification prototyping", journal = j-TODAES, volume = "13", number = "2", pages = "29:1--29:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344425", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While RFID is starting to become a ubiquitous technology, the variation between different RFID systems still remains high. This paper presents several prototyping environments for different components of radio frequency identification (RFID) tags to demonstrate how many of these components can be standardized for many different purposes. We include two active tag prototypes, one based on a microprocessor and the second based on custom hardware. To program these devices we present a design automation flow that allows RFID transactions to be described in terms of primitives with behavior written in ANSI C code. To save power with active RFID devices we describe a passive transceiver switch called the ``burst switch'' and demonstrate how this can be used in a system with a microprocessor or custom hardware controller. Finally, we present a full RFID system prototyping environment based on real-time spectrum analysis technology currently deployed at the University of Pittsburgh RFID Center of Excellence. Using our prototyping techniques we show how transactions from multiple standards can be combined and targeted to several microprocessors include the Microchip PIC, Intel StrongARM and XScale, and AD Chips EISC as well as several hardware targets including the Altera Apex, Actel Fusion, Xilinx Coolrunner II, Spartan 3 and Virtex 2, and cell-based ASICs.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Design automation; low-power; prototyping; RFID", } @Article{Hu:2008:PSF, author = "Yu Hu and Yan Lin and Lei He and Tim Tuan", title = "Physical synthesis for {FPGA} interconnect power reduction by dual-Vdd budgeting and retiming", journal = j-TODAES, volume = "13", number = "2", pages = "30:1--30:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344426", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Field programmable dual-Vdd interconnects are effective in reducing FPGA power. We formulate the dual-Vdd-aware slack budgeting problem as a linear program (LP) and a min-cost network flow problem, respectively. Both algorithms reduce interconnect power by 50\% on average compared to single-Vdd interconnects, but the network-flow-based algorithm runs 11x faster on MCNC benchmarks. Furthermore, we develop simultaneous retiming and slack budgeting (SRSB) with flip-flop layout constraints in dual-Vdd FPGAs based on mixed integer linear programming, and speed-up the algorithm by LP relaxation and local legalization. Compared to retiming followed by slack budgeting, SRSB reduces interconnect power by up to 28. 8\%.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "FPGA; Low power; retiming", } @Article{AlKhatib:2008:MSC, author = "Iyad {Al Khatib} and Francesco Poletti and Davide Bertozzi and Luca Benini and Mohamed Bechara and Hasan Khalifeh and Axel Jantsch and Rustam Nabiev", title = "A multiprocessor system-on-chip for real-time biomedical monitoring and analysis: {ECG} prototype architectural design space exploration", journal = j-TODAES, volume = "13", number = "2", pages = "31:1--31:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344427", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article we focus on multiprocessor system-on-chip (MPSoC) architectures for human heart electrocardiogram (ECG) real time analysis as a hardware/software (HW/SW) platform offering an advance relative to state-of-the-art solutions. This is a relevant biomedical application with good potential market, since heart diseases are responsible for the largest number of yearly deaths. Hence, it is a good target for an application-specific system-on-chip (SoC) and HW/SW codesign. We investigate a symmetric multiprocessor architecture based on STMicroelectronics VLIW DSPs that process in real time 12-lead ECG signals. This architecture improves upon state-of-the-art SoC designs for ECG analysis in its ability to analyze the full 12 leads in real time, even with high sampling frequencies, and its ability to detect heart malfunction for the whole ECG signal interval. We explore the design space by considering a number of hardware and software architectural options. Comparing our design with present-day solutions from an SoC and application point-of-view shows that our platform can be used in real time and without failures.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "electrocardiogram algorithms; embedded system design; hardware space exploration; Multiprocessor system-on-chip; real time analysis", } @Article{Zhou:2008:HTC, author = "Xiangrong Zhou and Peter Petrov", title = "Heterogeneously tagged caches for low-power embedded systems with virtual memory support", journal = j-TODAES, volume = "13", number = "2", pages = "32:1--32:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344428", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "An energy-efficient data cache organization for embedded processors with virtual memory is proposed. Application knowledge regarding memory references is used to eliminate most tag translations. A novel tagging scheme is introduced, where both virtual and physical tags coexist. Physical tags and special handling of superset index bits are only used for references to shared regions in order to avoid cache inconsistency. By eliminating the need for most address translations on cache access, a significant power reduction is achieved. We outline an efficient hardware architecture, where the application information is captured in a reprogrammable way and the cache is minimally modified.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Embedded systems", } @Article{Liu:2008:PVA, author = "Fang Liu and Sule Ozev and Plamen K. Nikolov", title = "Parametric variability analysis for multistage analog circuits using analytical sensitivity modeling", journal = j-TODAES, volume = "13", number = "2", pages = "33:1--33:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344429", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Process variations play an increasingly important role on the success of analog circuits. State-of-the-art analog circuits are based on complex architectures and contain many hierarchical layers and parameters. Knowledge of the parameter variances and their contribution patterns is crucial for a successful design process. This information is valuable to find solutions for many problems in design, design automation, testing, and fault tolerance. In this article, we present a hierarchical variance analysis methodology for multistage analog circuits. Starting from the process/layout level, we derive implicit hierarchical relations and extract the sensitivity information analytically. We make use of previously computed values whenever possible so as to reduce computational time. The proposed approach is particularly geared for the domain of design and test automation, where multiple runs on slightly different circuits are necessary. Experimental results indicate that the proposed method provides both accuracy and computational efficiency when compared with prior approaches.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "analog circuits; Hierarchical variance analysis; parameter correlations; performance model; process variations", } @Article{Cheng:2008:FSI, author = "Lei Cheng and Deming Chen and Martin D. F. Wong", title = "A fast simultaneous input vector generation and gate replacement algorithm for leakage power reduction", journal = j-TODAES, volume = "13", number = "2", pages = "34:1--34:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344430", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Input vector control (IVC) technique is based on the observation that the leakage current in a CMOS logic gate depends on gate input state, and a good input vector is able to minimize leakage when the circuit is in sleep mode. The gate replacement technique is a very effective method to further reduce the leakage current. In this article, we propose a fast heuristic algorithm to find a low-leakage input vector with simultaneous gate replacement. Results on MCNC91 benchmark circuits show that our algorithm produces 14\% better leakage current reduction with several orders of magnitude speedup in runtime for large circuits compared to the previous state-of-the-art algorithm. In particular, the average runtime for the ten largest combinational circuits has been dramatically reduced from 1879 seconds to 0.34 seconds.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "gate replacement; Input vector control; leakage reduction", } @Article{Bernasconi:2008:OKS, author = "Anna Bernasconi and Valentina Ciriani and Roberto Cordone", title = "The optimization of {kEP-SOPs}: {Computational} complexity, approximability and experiments", journal = j-TODAES, volume = "13", number = "2", pages = "35:1--35:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344431", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a new algebraic four-level expression called k-EXOR-projected sum of products (kEP-SOP). The optimization of a kEP-SOP is NP NP-hard, but can be approximated within a fixed performance guarantee in polynomial time. Moreover, fully testable circuits under the stuck-at-fault model can be derived from kEP-SOPs by adding at most a constant number of multiplexer gates. The experiments show that the computational time is very short and the results are most of the time optimal with respect to the number of products involved. kEP-SOPs also prove experimentally a good starting point for general multilevel logic synthesis.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "approximation algorithm; Automatic synthesis; multilevel logic synthesis; optimization; testing", } @Article{Bahar:2008:IJA, author = "R. Iris Bahar and Krishnendu Chakrabarty", title = "Introduction to joint {ACM JETC\slash TODAES} special issue on new, emerging, and specialized technologies", journal = j-TODAES, volume = "13", number = "2", pages = "36:1--36:??", month = apr, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1344418.1344432", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jun 12 18:10:39 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dutt:2008:E, author = "Nikil Dutt", title = "Editorial", journal = j-TODAES, volume = "13", number = "3", pages = "37:1--37:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367046", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jones:2008:ISS, author = "Alex K. Jones and Robert Walker", title = "Introduction to the special section on demonstrable software systems and hardware platforms {II}", journal = j-TODAES, volume = "13", number = "3", pages = "38:1--38:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367047", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kwon:2008:RPP, author = "Seongnam Kwon and Yongjoo Kim and Woo-Chul Jeun and Soonhoi Ha and Yunheung Paek", title = "A retargetable parallel-programming framework for {MPSoC}", journal = j-TODAES, volume = "13", number = "3", pages = "39:1--39:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367048", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As more processing elements are integrated in a single chip, embedded software design becomes more challenging: It becomes a parallel programming for nontrivial heterogeneous multiprocessors with diverse communication architectures, and design constraints such as hardware cost, power, and timeliness. In the current practice of parallel programming with MPI or OpenMP, the programmer should manually optimize the parallel code for each target architecture and for the design constraints. Thus, the design-space exploration of MPSoC (multiprocessor systems-on-chip) costs become prohibitively large as software development overhead increases drastically. To solve this problem, we develop a parallel-programming framework based on a novel programming model called common intermediate code (CIC). In a CIC, functional parallelism and data parallelism of application tasks are specified independently of the target architecture and design constraints. Then, the CIC translator translates the CIC into the final parallel code, considering the target architecture and design constraints to make the CIC retargetable. Experiments with preliminary examples, including the H.263 decoder, show that the proposed parallel-programming framework increases the design productivity of MPSoC software significantly.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design-space exploration; embedded software; multiprocessor system on chip; parallel-programming; software generation", } @Article{Kumar:2008:MSS, author = "Akash Kumar and Shakith Fernando and Yajun Ha and Bart Mesman and Henk Corporaal", title = "Multiprocessor systems synthesis for multiple use-cases of multiple applications on {FPGA}", journal = j-TODAES, volume = "13", number = "3", pages = "40:1--40:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367049", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Future applications for embedded systems demand chip multiprocessor designs to meet real-time deadlines. The large number of applications in these systems generates an exponential number of use-cases. The key design automation challenges are designing systems for these use-cases and fast exploration of software and hardware implementation alternatives with accurate performance evaluation of these use-cases. These challenges cannot be overcome by current design methodologies which are semiautomated, time consuming, and error prone.\par In this article, we present a design methodology to generate multiprocessor systems in a systematic and fully automated way for {\em multiple use-cases}. Techniques are presented to merge multiple use-cases into one hardware design to minimize cost and design time, making it well suited for fast design-space exploration (DSE) in MPSoC systems. Heuristics to partition use-cases are also presented such that each partition can fit in an FPGA, and all use-cases can be catered for.\par The proposed methodology is implemented into a tool for Xilinx FPGAs for evaluation. The tool is also made available online for the benefit of the research community and is used to carry out a DSE case study with multiple use-cases of real-life applications: H263 and JPEG decoders. The generation of the entire design takes about 100 ms, and the whole DSE was completed in 45 minutes, including FPGA mapping and synthesis. The heuristics used for use-case partitioning reduce the design-exploration time elevenfold in a case study with mobile-phone applications.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design exploration; FPGA; multi-application; multimedia systems; multiple use-cases; multiprocessor systems; synchronous data-flow graphs", } @Article{Krashinsky:2008:ISV, author = "Ronny Krashinsky and Christopher Batten and Krste Asanovi{\'c}", title = "Implementing the {Scale} vector-thread processor", journal = j-TODAES, volume = "13", number = "3", pages = "41:1--41:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367050", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Scale vector-thread processor is a complexity-effective solution for embedded computing which flexibly supports both vector and highly multithreaded processing. The 7.1-million transistor chip has 16 decoupled execution clusters, vector load and store units, and a nonblocking 32KB cache. An automated and iterative design and verification flow enabled a performance-, power-, and area-efficient implementation with two person-years of development effort. Scale has a core area of 16.6 mm$^2$ in 180 nm technology, and it consumes 400 mW--1.1 W while running at 260 MHz.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "hybrid C++/Verilog simulation; iterative VLSI design flow; multithreaded processors; procedural datapath pre-placement; vector processors; vector-thread processors", } @Article{Mishra:2008:SDD, author = "Prabhat Mishra and Nikil Dutt", title = "Specification-driven directed test generation for validation of pipelined processors", journal = j-TODAES, volume = "13", number = "3", pages = "42:1--42:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367051", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Functional validation is a major bottleneck in pipelined processor design due to the combined effects of increasing design complexity and lack of efficient techniques for directed test generation. Directed test vectors can reduce overall validation effort, since shorter tests can obtain the same coverage goal compared to the random tests. This article presents a specification-driven directed test generation methodology. The proposed methodology makes three important contributions. First, a general graph model is developed that can capture the structure and behavior (instruction set) of a wide variety of pipelined processors. The graph model is generated from the processor specification. Next, we propose a functional fault model that is used to define the functional coverage for pipelined architectures. Finally, we propose two complementary test generation techniques: test generation using model checking, and test generation using template-based procedures. These test generation techniques accept the graph model of the architecture as input and generate test programs to detect all the faults in the functional fault model. Our experimental results on two pipelined processor models demonstrate several orders-of-magnitude reduction in overall validation effort by drastically reducing both test-generation time and number of test programs required to achieve a coverage goal.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "functional validation; model checking; test generation", } @Article{Joo:2008:ECP, author = "Yongsoo Joo and Youngjin Cho and Donghwa Shin and Jaehyun Park and Naehyuck Chang", title = "An energy characterization platform for memory devices and energy-aware data compression for multilevel-cell flash memory", journal = j-TODAES, volume = "13", number = "3", pages = "43:1--43:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367052", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Memory devices often consume more energy than microprocessors in current portable embedded systems, but their energy consumption changes significantly with the type of transaction, data values, and access timing, as well as depending on the total number of transactions. These variabilities mean that an innovative tool and framework are required to characterize modern memory devices running in embedded system architectures.\par We introduce an energy measurement and characterization platform for memory devices, and demonstrate an application to multilevel-cell (MLC) flash memories, in which we discover significant value-dependent programming energy variations. We introduce an energy-aware data compression method that minimizes the flash programming energy, rather than the size of the compressed data, which is formulated as an entropy coding with unequal bit-pattern costs. Deploying a probabilistic approach, we derive energy-optimal bit-pattern probabilities and expected values of the bit-pattern costs which are applicable to the large amounts of compressed data typically found in multimedia applications. Then we develop an energy-optimal prefix coding that uses integer linear programming, and construct a prefix-code table. From a consideration of Pareto-optimal energy consumption, we can make tradeoffs between data size and programming energy, such as a 41\% energy savings for a 52\% area overhead.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compression; flash memory; MLC", } @Article{Huffmire:2008:DSS, author = "Ted Huffmire and Brett Brotherton and Nick Callegari and Jonathan Valamehr and Jeff White and Ryan Kastner and Tim Sherwood", title = "Designing secure systems on reconfigurable hardware", journal = j-TODAES, volume = "13", number = "3", pages = "44:1--44:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367053", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The extremely high cost of custom ASIC fabrication makes FPGAs an attractive alternative for deployment of custom hardware. Embedded systems based on reconfigurable hardware integrate many functions onto a single device. Since embedded designers often have no choice but to use soft IP cores obtained from third parties, the cores operate at different trust levels, resulting in mixed-trust designs. The goal of this project is to evaluate recently proposed security primitives for reconfigurable hardware by building a real embedded system with several cores on a single FPGA and implementing these primitives on the system. Overcoming the practical problems of integrating multiple cores together with security mechanisms will help us to develop realistic security-policy specifications that drive enforcement mechanisms on embedded systems.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Advanced Encryption Standard (AES); controlled sharing; enforcement mechanisms; execution monitors; Field programmable gate arrays (FPGAs); hardware security; isolation; memory protection; reference monitors; security policies; security primitives; separation; static analysis; systems-on-a-chip (SoCs)", } @Article{Manolios:2008:AVS, author = "Panagiotis Manolios and Sudarshan K. Srinivasan", title = "Automatic verification of safety and liveness for pipelined machines using {WEB} refinement", journal = j-TODAES, volume = "13", number = "3", pages = "45:1--45:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367054", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We show how to automatically verify that complex pipelined machine models satisfy the same safety and liveness properties as their instruction-set architecture (ISA) models by using well-founded equivalence bisimulation (WEB) refinement. We show how to reduce WEB-refinement proof obligations to formulas expressible in the decidable logic of counter arithmetic with lambda expressions and uninterpreted functions (CLU). This allows us to automate the verification of the pipelined machine models by using the UCLID decision procedure to transform CLU formulas to Boolean satisfiability problems. To relate pipelined machine states to ISA states, we use the commitment and flushing refinement maps. We evaluate our work using 17 pipelined machine models that contain various features, including deep pipelines, precise exceptions, branch prediction, interrupts, and instruction queues. Our experimental results show that the overhead of proving liveness, obtained by comparing the cost of proving both safety and liveness with the cost of only proving safety, is about 17\%, but depends on the refinement map used; for example, the liveness overhead is 23\% when flushing is used and is negligible when commitment is used.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bisimulation; commitment; flushing; liveness; pipelined machines; refinement; refinement maps; SAT; verification", } @Article{Wu:2008:PVA, author = "Huaizhi Wu and Martin D. F. Wong and Wilsin Gosti", title = "Postplacement voltage assignment under performance constraints", journal = j-TODAES, volume = "13", number = "3", pages = "46:1--46:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367055", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multi-Vdd is an effective method to reduce both leakage and dynamic power. A key challenge in a multi-Vdd design is to control the complexity of the power-supply system and limit the demand for level shifters. This can be tackled by grouping cells of different supply voltages into a small number of voltage islands. Recently, an elegant algorithm was proposed for generating voltage islands that balance the power-versus-design-cost tradeoff under performance requirement, according to the placement proximity of the critical cells. One prerequisite of this algorithm is an initial voltage assignment at the standard-cell level that meets timing. In this article, we present a novel method to produce quality voltage assignment which not only meets timing but also forms good proximity of the critical cells to provide a smooth input to the aforementioned voltage island generation. Our algorithm is based on effective delay budgeting and efficient computation of physical proximity by Voronoi diagram. Our extensive experiments on real industrial designs show that our algorithm leads to 25\%--75\% improvement in the voltage island generation in terms of the number of voltage islands generated, with computation time only linear to design size.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "low power; timing; voltage assignment; Voronoi diagram", } @Article{Bombieri:2008:ROT, author = "Nicola Bombieri and Franco Fummi and Graziano Pravadelli", title = "Reuse and optimization of testbenches and properties in a {TLM-to-RTL} design flow", journal = j-TODAES, volume = "13", number = "3", pages = "47:1--47:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367056", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In transaction-level modeling (TLM), verification methodologies based on transactions allow testbenches, properties, and IP cores in mixed TL-RTL designs to be reused. However, no papers in the literature analyze the effectiveness of transaction-based verification (TBV) in comparison to the more traditional RTL approach. The first contribution of this article is the introduction of a functional-fault-model-based methodology for demonstrating the effectiveness of reuse through TBV. A second contribution is the introduction of a similar methodology for efficient property checking which identifies and removes redundant properties prior to assertion-based verification or model checking.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "fault models; functional verification; model checking; TBV; TLM", } @Article{Inoue:2008:PVS, author = "Hiroaki Inoue and Junji Sakai and Masato Edahiro", title = "Processor virtualization for secure mobile terminals", journal = j-TODAES, volume = "13", number = "3", pages = "48:1--48:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367057", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a processor virtualization architecture, VIRTUS, to provide a dedicated domain for preinstalled applications and virtualized domains for downloaded native applications. With it, security-oriented next-generation mobile terminals can provide any number of domains for native applications. VIRTUS features three new technologies, namely, VMM asymmetrization, dynamic interdomain communication (IDC), and virtualization-assist logic, and it is first in the world to virtualize an ARM-based multiprocessor. Evaluations have shown that VMM asymmetrization results in significantly less performance degradation and LOC increase than do other VMMs. Further, dynamic IDC overhead is low enough, and virtualization-assist logic can be implemented in a sufficiently small area.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "multiprocessor; processor virtualization", } @Article{Sanz:2008:CSS, author = "Concepci{\'o}n Sanz and Manuel Prieto and Jos{\'e} Ignacio G{\'o}mez and Antonis Papanikolaou and Miguel Miranda and Francky Catthoor", title = "Combining system scenarios and configurable memories to tolerate unpredictability", journal = j-TODAES, volume = "13", number = "3", pages = "49:1--49:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367058", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Process variability and the dynamism of new applications increase the uncertainty of embedded systems and force designers to use pessimistic assumptions, which have a tremendous impact on both the performance and energy consumption of their memory organizations. In this article we introduce an experimental framework which tries to mitigate the effects of both sources of unpredictability. At compile time, an extensive profiling helps us to detect system scenarios and bounds application dynamism. At the organization level, we incorporate a heterogeneous memory architecture composed by several configurable memories. A calibration process and a runtime control system adapt the platform to the current application needs. Our approach manages to reduce significantly the energy overhead associated to both variability and application dynamism (up to 60\%, according to our simulations) without compromising the timing constraints existing in our target domain of dynamic periodic multimedia applications.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "parametric yield; process variation; variability compensation", } @Article{Ozturk:2008:IBE, author = "Ozcan Ozturk and Mahmut Kandemir", title = "{ILP}-based energy minimization techniques for banked memories", journal = j-TODAES, volume = "13", number = "3", pages = "50:1--50:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367059", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Main memories can consume a significant portion of overall energy in many data-intensive embedded applications. One way of reducing this energy consumption is banking, that is, dividing available memory space into multiple banks and placing unused (idle) memory banks into low-power operating modes. Prior work investigated code-restructuring- and data-layout-reorganization-based approaches for increasing the energy benefits that could be obtained from a banked memory architecture. This article explores different techniques that can potentially coexist within the same optimization framework for maximizing benefits of low-power operating modes. These techniques include employing nonuniform bank sizes, data migration, data compression, and data replication. By using these techniques, we try to increase the chances for utilizing low-power operating modes in a more effective manner, and achieve further energy savings over what could be achieved by exploiting low-power modes alone. Specifically, nonuniform banking tries to match bank sizes with application-data access patterns. The goal of data migration is to cluster data with similar access patterns in the same set of banks. Data compression reduces the size of the data used by an application, and thus helps reduce the number of memory banks occupied by data. Finally, data replication increases bank idleness by duplicating select read-only data blocks across banks. We formulate each of these techniques as an ILP (integer linear programming) problem, and solve them using a commercial solver. Our experimental analysis using several benchmarks indicates that all the techniques presented in this framework are successful in reducing memory energy consumption. Based on our experience with these techniques, we recommend to compiler writers for banked memories to consider data compression, replication, and migration.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compilers; data compression; DRAM; low-power operating modes; memory banking; migration; replication", } @Article{Das:2008:RSA, author = "Sabyasachi Das and Sunil P. Khatri", title = "Resource sharing among mutually exclusive sum-of-product blocks for area reduction", journal = j-TODAES, volume = "13", number = "3", pages = "51:1--51:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367060", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In state-of-the-art digital designs, arithmetic blocks consume a major portion of the total area of the IC. The arithmetic sum-of-product (SOP) is the most widely used arithmetic block. Some of the examples of SOP are adder, subtractor, multiplier, multiply-accumulator (MAC), squarer, chain-of-adders, incrementor, decrementor, etc. In this article, we introduce a novel, area-efficient architecture to share different SOP blocks which are used in a mutually exclusive manner. We implement the core functions of the largest SOP only once and reuse different parts of the core subblocks for all other SOP operations with the help of multiplexers. This architecture can be used in the nontiming-critical paths of the design, to save significant amounts of area. Our experimental data shows that the proposed sharing-based architecture results in about 37\% area savings compared to the results obtained from a commercially available best-in-class datapath synthesis tool. In addition, our proposed shared implementation consumes about 18\% less power. These improvements were verified on placed-and-routed designs as well.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tseng:2008:PPD, author = "I-Lun Tseng and Adam Postula", title = "Partitioning parameterized 45-degree polygons with constraint programming", journal = j-TODAES, volume = "13", number = "3", pages = "52:1--52:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367061", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "An algorithm for partitioning parameterized 45-degree polygons into parameterized trapezoids is proposed in this article. The algorithm is based on the plane-sweep technique and can handle polygons with complicated constraints. The input to the algorithm consists of the contour of a parameterized polygon to be partitioned and a set of constraints for parameters of the contour. The algorithm uses horizontal cuts only and generates a number of nonoverlapping trapezoids whose union is the original parameterized polygon. Processing of constraints and coordinates that contain first-order multiple-variable polynomials has been made possible by incorporating the JaCoP constraint programming library. The proposed algorithm has been implemented in Java programming language and can be used as the basis to build the trapezoidal corner stitching data structure for parameterized VLSI layout masks.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "analog and mixed-signal design; parameterized layouts; parameterized polygons; polygon decomposition; trapezoidal corner stitching", } @Article{Sehgal:2008:PAS, author = "Anuja Sehgal and Sudarshan Bahukudumbi and Krishnendu Chakrabarty", title = "Power-aware {SoC} test planning for effective utilization of port-scalable testers", journal = j-TODAES, volume = "13", number = "3", pages = "53:1--53:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367062", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Many system-on-chip (SoC) integrated circuits contain embedded cores with different scan frequencies. To better meet the test requirements for such heterogeneous SoCs, leading tester companies have recently introduced port-scalable testers, which can simultaneously drive groups of channels at different data rates. However, the number of tester channels available for scan testing is limited; therefore, a higher shift frequency can increase the test time for a core if the resulting test access architecture reduces the bit-width used to access it. We present a scalable test planning technique that exploits port scalability of testers to reduce SoC test time. We compare the proposed heuristic optimization method to two baseline methods based on prior works that use a single scan data rate for all embedded cores. We also propose a power-aware test planning technique to effectively utilize port-scalable testers under constraints of test power consumption. Experimental results are presented for power-aware test scheduling to illustrate the impact of power constraints on overall test time.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "integer linear programming; port-scalable testers; SoC test; test access architecture", } @Article{Pecenka:2008:ESR, author = "Tomas Pecenka and Lukas Sekanina and Zdenek Kotasek", title = "Evolution of synthetic {RTL} benchmark circuits with predefined testability", journal = j-TODAES, volume = "13", number = "3", pages = "54:1--54:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367063", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a new real-world application of evolutionary computing in the area of digital-circuits testing. A method is described which enables to evolve large synthetic RTL benchmark circuits with a predefined structure and testability. Using the proposed method, a new collection of synthetic benchmark circuits was developed. These benchmark circuits will be useful in a validation process of novel algorithms and tools in the area of digital-circuits testing. Evolved benchmark circuits currently represent the most complex benchmark circuits with a known level of testability. Furthermore, these circuits are the largest that have ever been designed by means of evolutionary algorithms. This work also investigates suitable parameters of the evolutionary algorithm for this problem and explores the limits in the complexity of evolved circuits.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "benchmark circuit; evolvable hardware; testability analysis", } @Article{Pedram:2008:E, author = "Massoud Pedram", title = "Editorial", journal = j-TODAES, volume = "13", number = "4", pages = "55:1--55:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391963", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Guan:2008:SAP, author = "Nan Guan and Qingxu Deng and Zonghua Gu and Wenyao Xu and Ge Yu", title = "Schedulability analysis of preemptive and nonpreemptive {EDF} on partial runtime-reconfigurable {FPGAs}", journal = j-TODAES, volume = "13", number = "4", pages = "56:1--56:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391964", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Field Programmable Gate Arrays (FPGAs) are very popular in today's embedded systems design, and Partial Runtime-Reconfigurable (PRTR) FPGAs allow HW tasks to be placed and removed dynamically at runtime. Hardware task scheduling on PRTR FPGAs brings many challenging issues to traditional real-time scheduling theory, which have not been adequately addressed by the research community compared to software task scheduling on CPUs. In this article, we consider the schedulability analysis problem of HW task scheduling on PRPR FPGAs. We derive utilization bounds for several variants of global preemptive/nonpreemptive EDF scheduling, and compare the performance of different utilization bound tests.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "FPGA; Real-time scheduling; reconfigurable devices", } @Article{Mukherjee:2008:HLC, author = "Rajarshi Mukherjee and Song Liu and Seda Ogrenci Memik and Somsubhra Mondal", title = "A high-level clustering algorithm targeting dual {V$_{dd}$ FPGAs}", journal = j-TODAES, volume = "13", number = "4", pages = "57:1--57:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391965", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent advanced power optimizations deployed in commercial FPGAs, laid out a roadmap towards FPGA devices that can be integrated into ultra low power systems. In this article, we present a high-level design tool to support the process of mapping an application onto a FPGA device with dual supply voltages. Our main contribution in this paper is an algorithm, which creates voltage scaling ready clusters by utilizing the timing slack available in the designs. We propose to first create clusters of CLBs within a given CLB-level netlist. This clustering algorithm intends to group chains of CLBs possessing similar amounts of timing slack along their critical path together. Once these clusters are identified, they are placed onto respective V$_{dd}$ partitions on the device. We have evaluated different dual V$_{dd}$ fabrics and the potential gain in power consumption is explored. When a subset of the logic blocks on the device can be driven by low V$_{dd}$ levels (either with a dedicated low V$_{dd}$ supply or with a programmable selection between low and high V$_{dd}$ levels for these blocks) this affects placement and routing. As a result the maximum frequency of the designs may be affected. In order to evaluate the overall impact of creating voltage islands, we measured the Energy-Delay Product for our benchmark designs. We observed that the Energy-Delay product can be decreased by 26.9\% when the placement of the designs into different voltage levels is guided by our clustering algorithm.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "clustering; Dynamic power; field programmable gate arrays; partitioning; placement; voltage scaling", } @Article{Resano:2008:ESR, author = "Javier Resano and Juan Antonio Clemente and Carlos Gonzalez and Daniel Mozos and Francky Catthoor", title = "Efficiently scheduling runtime reconfigurations", journal = j-TODAES, volume = "13", number = "4", pages = "58:1--58:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391966", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to the emergence of portable devices that must run complex dynamic applications there is a need for flexible platforms for embedded systems. Runtime reconfigurable hardware can provide this flexibility but the reconfiguration latency can significantly decrease the performance. When dealing with task graphs, runtime support that schedules the reconfigurations in advance can drastically reduce this overhead. However, executing complex scheduling heuristics at runtime may generate an excessive penalty. Hence, we have developed a hybrid design-time/runtime reconfiguration scheduling heuristic that generates its final schedule at runtime but carries out most computations at design-time. We have tested our approach in a PowerPC 405 processor embedded on a FPGA demonstrating that it generates a very small runtime penalty while providing almost as good schedules as a full runtime approach.", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "FPGAs; hardware multitasking; Reconfigurable architectures; runtime/design-time scheduling", } @Article{Garg:2008:SLT, author = "Siddharth Garg and Diana Marculescu", title = "System-level throughput analysis for process variation aware multiple voltage-frequency island designs", journal = j-TODAES, volume = "13", number = "4", pages = "59:1--59:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391967", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increasing variability in manufacturing process parameters is expected to lead to significant performance degradation in deep submicron technologies. Multiple Voltage-Frequency Island (VFI) design styles with fine-grained, process-variation aware clocking have recently been shown to possess increased immunity to manufacturing process variations. In this article, we propose a theoretical framework that allows designers to quantify the performance improvement that is to be expected if they were to migrate from a fully synchronous design to the proposed multiple VFI design style. Specifically, we provide techniques to efficiently and accurately estimate the probability distribution of the execution rate (or throughput) of both single and multiple VFI systems under the influence of manufacturing process variations. Finally, using an MPEG-2 encoder benchmark, we demonstrate how the proposed analysis framework can be used by designers to make architectural decisions such as the granularity of VFI domain partitioning based on the throughput constraints their systems are required to satisfy.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Globally asynchronous locally synchronous; manufacturing process variations; maximum cycle mean; performance analysis; system-level design; voltage-frequency islands", } @Article{Ozturk:2008:APB, author = "Ozcan Ozturk and Mahmut Kandemir and Guangyu Chen", title = "Access pattern-based code compression for memory-constrained systems", journal = j-TODAES, volume = "13", number = "4", pages = "60:1--60:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391968", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As compared to a large spectrum of performance optimizations, relatively less effort has been dedicated to optimize other aspects of embedded applications such as memory space requirements, power, real-time predictability, and reliability. In particular, many modern embedded systems operate under tight memory space constraints. One way of addressing this constraint is to compress executable code and data as much as possible. While researchers on code compression have studied efficient hardware and software based code compression strategies, many of these techniques do not take application behavior into account; that is, the same compression/decompression strategy is used irrespective of the application being optimized. This article presents an application-sensitive code compression strategy based on control flow graph (CFG) representation of the embedded program. The idea is to start with a memory image wherein all basic blocks of the application are compressed, and decompress only the blocks that are predicted to be needed in the near future. When the current access to a basic block is over, our approach also decides the point at which the block could be compressed. We propose and evaluate several compression and decompression strategies that try to reduce memory requirements without excessively increasing the original instruction cycle counts. Some of our strategies make use of profile data, whereas others are fully automatic. Our experimental evaluation using seven applications from the MediaBench suite and three large embedded applications reveals that the proposed code compression strategy is very successful in practice. Our results also indicate that working at a basic block granularity, as opposed to a procedure granularity, is important for maximizing memory space savings.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "CFG; code access pattern; code compression; Embedded systems; memory optimization", } @Article{Baradaran:2008:CAM, author = "Nastaran Baradaran and Pedro C. Diniz", title = "A compiler approach to managing storage and memory bandwidth in configurable architectures", journal = j-TODAES, volume = "13", number = "4", pages = "61:1--61:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391969", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Configurable architectures offer the unique opportunity of realizing hardware designs tailored to the specific data and computational patterns of an application code. Customizing the storage structures is becoming increasingly important in mitigating the continuing gap between memory latencies and internal computing speeds. In this article we describe and evaluate a compiler algorithm that maps the arrays of a loop-based computation to internal storage structures, either RAM blocks or discrete registers. Our objective is to minimize the overall execution time while considering the capacity and bandwidth constraints of the storage resources. The novelty of our approach lies in creating a single framework that combines high-level compiler techniques with lower-level scheduling information for mapping the data. We illustrate the benefits of our approach for a set of image/signal processing kernels using a Xilinx Virtex\TM{} Field-Programmable Gate Array (FPGA). Our algorithm leads to faster designs compared to the state-of-the-art {\em custom data layout\/} mapping technique, in some instances using less storage. When compared to hand-coded designs, our results are comparable in terms of execution time and resources, but are derived in a minute fraction of the design time.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Compiler analysis; configurable architectures; high-level hardware synthesis; storage allocation and management", } @Article{Banerjee:2008:ASM, author = "Ansuman Banerjee and Pallab Dasgupta and P. P. Chakrabarti", title = "Auxiliary state machines + context-triggered properties in verification", journal = j-TODAES, volume = "13", number = "4", pages = "62:1--62:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391970", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Formal specifications of interface protocols between a design-under-test and its environment mostly consist of two types of correctness requirements, namely (a) a set of invariants that applies throughout the protocol execution and (b) a set of {\em context-triggered\/} properties that applies only when the protocol state belongs to a specific set of contexts. To model such requirements, an increasingly popular design choice in the assertion IP design community has been the use of abstract {\em context state machines\/} and state-oriented properties. In this paper, we formalize this modeling style and present algorithms for verifying such specifications. Specifically, we present a purely formal approach and a semi-formal approach for verifying such specifications. We demonstrate the use of this design style in modeling some of the industry standard protocol descriptions and present encouraging results.", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Panda:2008:SBV, author = "S. K. Panda and Arnab Roy and P. P. Chakrabarti and Rajeev Kumar", title = "Simulation-based verification using {Temporally Attributed Boolean Logic}", journal = j-TODAES, volume = "13", number = "4", pages = "63:1--63:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391971", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a specification logic called Temporally Attributed Boolean (TAB) Logic for Assertion Based Verification, which allows us to: (i) represent assertions succinctly, (ii) incorporate data-orientation and (iii) associate timing to design intentions. TAB Logic allows us to write specifications functionally linking system variables from different temporal contexts. We present examples to show the motivation for this logic especially in the context of high level modeling of complex real time systems. We formally define TAB Logic, formulate the problem of verification on a simulation trace and present efficient algorithms to check TAB assertions, both offline and online. We present results of application of TAB Logic for Instruction Semantics and Bus Transaction Verification of a bus integrated pipelined processor core implementation. We also employ TAB Logic to validate the Interrupt mode behavior of the processor core implementation. Further, we show the utility of TAB Logic in fault detection. Finally, we demonstrate the applicability of TAB Logic in the domain of simulation based verification of analog circuits like Operational Amplifiers and DC-DC Converters. We finally discuss the limitations of TAB logic and conclude.", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Bus verification; instruction semantics verification; interrupt testing; offline-online verification algorithm; simulation based verification; temporal logic; timing verification", } @Article{Wang:2008:LAS, author = "Sying-Jyan Wang and Kuo-Lin Peng and Kuang-Cyun Hsiao and Katherine Shu-Min Li", title = "Layout-aware scan chain reorder for launch-off-shift transition test coverage", journal = j-TODAES, volume = "13", number = "4", pages = "64:1--64:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391972", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Launch-off-shift (LOS) is a popular delay test technique for scan-based designs. However, it is usually not possible to achieve good delay fault coverage in LOS test due to conflicts in test vectors. In this article, we propose a layout-based scan chain ordering method to improve fault coverage for LOS test with limited routing overhead. A fast and effective algorithm is used to eliminate conflicts in test vectors while at the same time restrict the extra scan chain routing. This approach provides many advantages. (1) The proposed method can improve delay fault coverage for LOS test. (2) With layout information taken into account, the routing penalty is limited, and thus the impact on circuit performance will not be significant. Experimental results show that the proposed LOS test method achieves about the same level of delay fault coverage as enhanced scan does, while the average scan chain wire length is about 2.2 times of the shortest scan chain.", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "scan chain ordering; Scan test; test generation; transition faults", } @Article{Moiseev:2008:TAP, author = "Konstantin Moiseev and Avinoam Kolodny and Shmuel Wimer", title = "Timing-aware power-optimal ordering of signals", journal = j-TODAES, volume = "13", number = "4", pages = "65:1--65:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391973", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A computationally efficient technique for reducing interconnect active power in VLSI systems is presented. Power reduction is accomplished by simultaneous wire spacing and net ordering, such that cross-capacitances between wires are optimally shared. The existence of a unique power-optimal wire order within a bundle is proven, and a method to construct this order is derived. The optimal order of wires depends only on the activity factors of the underlying signals; hence, it can be performed prior to spacing optimization. By using this order of wires, optimality of the combined solution is guaranteed (as compared with any other ordering and spacing of the wires). Timing-aware power optimization is enabled by simultaneously considering timing criticality weights and activity factors for the signals. The proposed algorithm has been applied to various interconnect layouts, including wire bundles from high-end microprocessor circuits in 65 nm technology. Interconnect power reduction of 17\% on average has been observed in such bundles.", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "interconnect optimization; power optimization; Wire ordering; wire spacing", } @Article{Lu:2008:EDI, author = "Chao-Hung Lu and Hung-Ming Chen and Chien-Nan Jimmy Liu", title = "Effective decap insertion in area-array {SoC} floorplan design", journal = j-TODAES, volume = "13", number = "4", pages = "66:1--66:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391974", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As VLSI technology enters the nanometer era, supply voltages continue to drop due to the reduction of power dissipation, but it makes power integrity problems even worse. Employing decoupling capacitances (decaps) in floorplan stage is a common approach to alleviating supply noise problems. Previous researches overestimate the decap budget and do not fully utilize the empty space of the floorplan. A floorplan usually has a lot of available space that can be used to insert the decap without increasing the floorplan area. Therefore, the goal of this work is to develop a better model to calculate the required decap to solve the power supply noise problem in area-array based designs, and increase the usage of available space in the floorplan to reduce the area overhead caused by decap insertion. The experimental results of this work are encouraging. Compared with previous approaches, our methodology reduces 38\% of the decap budget in average for MCNC benchmarks but can still meet the power supply noise requirements. The final floorplan areas with decap are also smaller than the numbers reported in previous works.", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "decap insertion; floorplan; Power supply noise", } @Article{Moffitt:2008:CDF, author = "Michael D. Moffitt and Jarrod A. Roy and Igor L. Markov and Martha E. Pollack", title = "Constraint-driven floorplan repair", journal = j-TODAES, volume = "13", number = "4", pages = "67:1--67:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391975", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this work, we propose a new and efficient approach to the {\em floorplan repair\/} problem, where violated design constraints are satisfied by applying small changes to an existing rough floorplan. Such a floorplan can be produced by a human designer, a scalable placement algorithm, or result from engineering adjustments to an existing floorplan. In such cases, overlapping modules must be separated, and others may need to be repositioned to satisfy additional requirements. Our algorithmic framework uses an expressive graph-based encoding of constraints which can reflect fixed-outline, region, proximity and alignment constraints. By tracking the implications of existing constraints, we resolve violations by imposing gradual modifications to the floorplan, in an attempt to preserve the characteristics of its initial design. Empirically, our approach is effective at removing overlaps and repairing violations that may occur when design constraints are acquired and imposed dynamically.", acknowledgement = ack-nhfb, articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "constraints; Floorplanning; legalization", } @Article{Ozdal:2008:ORA, author = "Muhammet Mustafa Ozdal and Martin D. F. Wong and Philip S. Honsinger", title = "Optimal routing algorithms for rectilinear pin clusters in high-density multichip modules", journal = j-TODAES, volume = "13", number = "4", pages = "68:1--68:??", month = sep, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1391962.1391976", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 1 16:09:32 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the circuit densities and transistor counts are increasing, the package routing problem is becoming more and more challenging. In this article, we study an important routing problem encountered in typical high-end MCM designs: routing within dense pin clusters. Pin clusters are often formed by pins that belong to the same functional unit or the same data bus, and can become bottlenecks in terms of overall routability. Typically, these clusters have irregular shapes, which can be approximated with rectilinear convex boundaries. Since such boundaries have often irregular shapes, a traditional escape routing algorithm may give unroutable solutions. In this article, we study how the positions of escape terminals on a convex boundary affect the overall routability. For this purpose, we propose a set of necessary and sufficient conditions to model routability outside a rectilinear convex boundary. Given an escape routing solution, we propose an optimal algorithm to select the maximal subset of nets that are routable outside the boundary. After that, we focus on an integrated approach to consider routability constraints (outside the boundary) during the actual escape routing algorithm. Here, we propose an optimal algorithm to find the best escape routing solution that satisfies all routability constraints. Our experiments demonstrate that we can reduce the number of layers by 17\% on the average, by using this integrated methodology.", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Escape routing; multi-chip modules; network flow", } @Article{Keinert:2009:SAE, author = "Joachim Keinert and Martin Streub{\"u}hr and Thomas Schlichter and Joachim Falk and Jens Gladigau and Christian Haubelt and J{\"u}rgen Teich and Michael Meredith", title = "{SystemCoDesigner} --- an automatic {ESL} synthesis approach by design space exploration and behavioral synthesis for streaming applications", journal = j-TODAES, volume = "14", number = "1", pages = "1:1--1:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455230", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With increasing design complexity, the gap from ESL (Electronic System Level) design to RTL synthesis becomes more and more crucial to many industrial projects. Although several behavioral synthesis tools exist to automatically generate synthesizable RTL code from C/C++/SystemC-based input descriptions and software generation for embedded processors is automated as well, an efficient ESL synthesis methodology combining both is still missing. This article presents SystemCoDesigner, a novel SystemC-based ESL tool to automatically optimize a hardware/software SoC (System on Chip) implementation with respect to several objectives. Starting from a SystemC behavioral model, SystemCoDesigner automatically extracts the mathematical model, performs a behavioral synthesis step, and explores the multiobjective design space using state-of-the-art multiobjective optimization algorithms. During design space exploration, a single design point is evaluated by simulating highly accurate performance models, which are automatically generated from the SystemC behavioral model and the behavioral synthesis results. Moreover, SystemCoDesigner permits the automatic generation of bit streams for FPGA targets from any previously optimized SoC implementation. Thus SystemCoDesigner is the first fully automated ESL synthesis tool providing a correct-by-construction generation of hardware/software SoC implementations. As a case study, a model of a Motion-JPEG decoder was automatically optimized and implemented using SystemCoDesigner. Several synthesized SoC variants based on this model show different tradeoffs between required hardware costs and achieved system throughput, ranging from software-only solutions to pure hardware implementations that reach real-time performance for QCIF streams on a 50MHz FPGA.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "hardware/software codesign; System design", } @Article{Hansson:2009:CTC, author = "Andreas Hansson and Kees Goossens and Marco Bekooij and Jos Huisken", title = "{CoMPSoC}: a template for composable and predictable multi-processor system on chips", journal = j-TODAES, volume = "14", number = "1", pages = "2:1--2:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455231", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A growing number of applications, often with firm or soft real-time requirements, are integrated on the same System on Chip, in the form of either hardware or software intellectual property. The applications are started and stopped at run time, creating different use-cases. Resources, such as interconnects and memories, are shared between different applications, both within and between use-cases, to reduce silicon cost and power consumption.\par The functional and temporal behaviour of the applications is verified by simulation and formal methods. Traditionally, designers resort to monolithic verification of the system as whole, since the applications interfere in shared resources, and thus affect each other's behaviour. Due to interference between applications, the integration and verification complexity grows exponentially in the number of applications, and the task to verify correct behaviour of concurrent applications is on the system designer rather than the application designers.\par In this work, we propose a Composable and Predictable Multi-Processor System on Chip (CoMPSoC) platform template. This scalable hardware and software template removes all interference between applications through resource reservations. We demonstrate how this enables a divide-and-conquer design strategy, where all applications, potentially using different programming models and communication paradigms, are developed and verified independently of one another. Performance is analyzed per application, using state-of-the-art dataflow techniques or simulation, depending on the requirements of the application. These results still apply when the applications are integrated onto the platform, thus separating system-level design and application design.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Composable; model of computation; network on chip; predictable; system on chip", } @Article{Gheorghita:2009:SSB, author = "Stefan Valentin Gheorghita and Martin Palkovic and Juan Hamers and Arnout Vandecappelle and Stelios Mamagkakis and Twan Basten and Lieven Eeckhout and Henk Corporaal and Francky Catthoor and Frederik Vandeputte and Koen {De Bosschere}", title = "System-scenario-based design of dynamic embedded systems", journal = j-TODAES, volume = "14", number = "1", pages = "3:1--3:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455232", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the past decade, real-time embedded systems have become much more complex due to the introduction of a lot of new functionality in one application, and due to running multiple applications concurrently. This increases the dynamic nature of today's applications and systems, and tightens the requirements for their constraints in terms of deadlines and energy consumption. State-of-the-art design methodologies try to cope with these novel issues by identifying several most used cases and dealing with them separately, reducing the newly introduced complexity. This article presents a generic and systematic design-time/run-time methodology for handling the dynamic nature of modern embedded systems, which can be utilized by existing design methodologies to increase their efficiency. It is based on the concept of {\em system scenarios}, which group system behaviors that are similar from a multidimensional cost perspective --- such as resource requirements, delay, and energy consumption --- in such a way that the system can be configured to exploit this cost similarity. At design-time, these scenarios are individually optimized. Mechanisms for predicting the current scenario at run-time, and for switching between scenarios, are also derived. This design trajectory is augmented with a run-time calibration mechanism, which allows the system to learn on-the-fly during its execution, and to adapt itself to the current input stimuli, by extending the scenario set, changing the scenario definitions, and both the prediction and switching mechanisms. To show the generality of our methodology, we show how it has been applied on four very different real-life design problems. In all presented case studies, substantial energy reductions were obtained by exploiting scenarios.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Design methodology; dynamic nature; embedded systems; energy reduction; real-time systems; system scenarios", } @Article{Xu:2009:STA, author = "Qiang Xu and Yubin Zhang and Krishnendu Chakrabarty", title = "{SOC} test-architecture optimization for the testing of embedded cores and signal-integrity faults on core-external interconnects", journal = j-TODAES, volume = "14", number = "1", pages = "4:1--4:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455233", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The test time for core-external interconnect shorts and opens is typically much less than that for core-internal logic. Therefore, prior work on test-infrastructure design for core-based system-on-a-chip (SOC) has mainly focused on minimizing the test time for core-internal logic. However, as feature sizes shrink for newer process technologies, the test time for signal integrity (SI) faults on interconnects cannot be neglected. The test time for SI faults can be comparable to, or even larger than, the test time for the embedded cores. We investigate the impact of interconnect SI tests on SOC test-architecture design and optimization. A compaction method for SI faults and algorithms for test-architecture optimization are also presented. Experimental results for the ITC'02 benchmarks show that the proposed approach can significantly reduce the overall testing time for core-internal logic and core-external interconnects.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Core-based system-on-chip; interconnect testing; test scheduling; test-access mechanism (TAM)", } @Article{Jin:2009:GND, author = "Zhong-Yi Jin and Curt Schurgers and Rajesh K. Gupta", title = "A gateway node with duty-cycled radio and processing subsystems for wireless sensor networks", journal = j-TODAES, volume = "14", number = "1", pages = "5:1--5:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455234", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Wireless sensor nodes are increasingly being tasked with computation and communication intensive functions while still subject to constraints related to energy availability. On these embedded platforms, once all low power design techniques have been explored, duty-cycling the various subsystems remains the primary option to meet the energy and power constraints. This requires the ability to provide spurts of high MIPS and high bandwidth connections. However, due to the large overheads associated with duty-cycling the computation and communication subsystems, existing high performance sensor platforms are not efficient in supporting such an option. In this article, we present the design and optimizations taken in a wireless gateway node (WGN) that bridges data from wireless sensor networks to Wi-Fi networks in an on-demand basis. We discuss our strategies to reduce duty-cycling related costs by partitioning the system and by reducing the amount of time required to activate or deactivate the high-powered components. We compare the design choices and performance parameters with those made in the Intel {\em Stargate\/} platform to show the effectiveness of duty-cycling on our platform. We have built a working prototype, and the experimental results with two different power management schemes show significant reductions in latency and average power consumption compared to the {\em Stargate}. The WGN running our power-gating scheme performs about six times better in terms of average system power consumption than the {\em Stargate\/} running the suspend-system scheme for large working-periods where the active power dominates. For short working-periods where the transition (enable/disable) power becomes dominant, we perform up to seven times better. The comparative performance of our system is even greater when the sleep power dominates.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Embedded systems; gateway; power savings; sensor nodes", } @Article{Wu:2009:EER, author = "Chin-Hsien Wu", title = "An energy-efficient {I/O} request mechanism for multi-bank flash-memory storage systems", journal = j-TODAES, volume = "14", number = "1", pages = "6:1--6:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455235", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Emerging critical issues for flash-memory storage systems, especially with regard to implementation within many embedded systems, are the programmed I/O nature of data transfers and their energy-efficient nature. We propose an I/O request mechanism in the Memory-Technology-Device (MTD) layer to exploit the programmed I/O-based data transfers for flash-memory storage systems. We propose to revise the waiting function in the Memory-Technology-Device (MTD) layer to relieve the microprocessor from busy-waiting, in order to make more CPU cycles available for other tasks. An energy-efficient mechanism based on the I/O request mechanism is also presented for multi-bank flash-memory storage systems, which particularly focuses on switching the power state of each flash-memory bank. We demonstrate that the energy-efficient I/O request mechanism not only saves more CPU cycles to execute other tasks, but also reduces the energy consumption of flash-memory, based on experiments incorporating realistic system workloads.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "embedded systems; energy-efficient; Flash Memory; programmed I/O; storage systems", } @Article{Dontharaju:2009:DAP, author = "Swapna Dontharaju and Shenchih Tung and James T. Cain and Leonid Mats and Marlin H. Mickle and Alex K. Jones", title = "A design automation and power estimation flow for {RFID} systems", journal = j-TODAES, volume = "14", number = "1", pages = "7:1--7:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455236", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While RFID has become a ubiquitous technology, there is still a need for RFID systems with different capabilities, protocols, and features depending on the application. This article describes a design automation flow and power estimation technique for fast implementation and design feedback of new RFID systems. Physical layer features are described using {\em waveform features}, which are used to automatically generate physical layer encoding and decoding hardware blocks. {\em RFID primitives\/} to be supported by the tag are enumerated with {\em RFID macros\/} and the behavior of each primitive is specified using ANSI-C within the template to automatically generate the tag controller. Case studies implementing widely used standards such as ISO 18000 Part 7 and ISO 18000 Part 6C using this automation technique are presented. The power macromodeling flow demonstrated here is shown to be within 5\% to 10\% accuracy, while providing results 100 times faster than traditional methods. When eliminating the need for certain features of ISO 18000 Part 6C, the design flow shows that the power required by the implementation is reduced by nearly 50\%.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design automation; low-power; prototyping; RFID", } @Article{Dasdan:2009:PEA, author = "Ali Dasdan", title = "Provably efficient algorithms for resolving temporal and spatial difference constraint violations", journal = j-TODAES, volume = "14", number = "1", pages = "8:1--8:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455237", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A system of difference constraints is a formal model of temporal and spatial constraints in many areas such as scheduling, constraint satisfaction, and layout compaction. During construction of such a system, constraint violations often arise, and they need to be resolved. Previous algorithms for this task fall into two groups: those algorithms that are fast but cannot resolve all violations, and those algorithms that can resolve all violations but are exponentially slow. We propose the first algorithms that are fast as well as able to resolve all violations. Moreover, unlike the previous algorithms, our algorithms support the ordering of violations using their inherent criticality or user-defined priority. We provably and experimentally justify the efficiency and efficacy of our algorithms.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Behavioral synthesis; constraint satisfaction; interface timing; layout compaction; multimedia synchronization; rate analysis; real-time systems; scheduling; timing constraints", } @Article{Sinha:2009:DIC, author = "Arnab Sinha and Pallab Dasgupta and Bhaskar Pal and Sayantan Das and Prasenjit Basu and P. P. Chakrabarti", title = "Design intent coverage revisited", journal = j-TODAES, volume = "14", number = "1", pages = "9:1--9:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455238", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "{\em Design intent coverage\/} is a formal methodology for analyzing the gap between a formal architectural specification of a design and the formal functional specifications of the component RTL blocks of the design. In this article we extend the design intent coverage methodology to hybrid specifications containing both state-machines and formal properties. We demonstrate the benefits of this extension in two domains of considerable recent interest, namely (a) the use of auxiliary state-machines in formal specifications, and (b) the use of modest sized RTL blocks in the design intent coverage analysis.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Design Intent Coverage", } @Article{Yang:2009:MCS, author = "Zijiang Yang and Chao Wang and Aarti Gupta and Franjo Ivanv{\v{c}}i{\'c}", title = "Model checking sequential software programs via mixed symbolic analysis", journal = j-TODAES, volume = "14", number = "1", pages = "10:1--10:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455239", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present an efficient symbolic search algorithm for software model checking. Our algorithms perform word-level reasoning by using a combination of decision procedures in Boolean and integer and real domains, and use novel symbolic search strategies optimized specifically for sequential programs to improve scalability. Experiments on real-world C programs show that the new symbolic search algorithms can achieve several orders-of-magnitude improvements over existing methods based on bit-level (Boolean) reasoning.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "binary decision diagram; composite symbolic formula; image computation; Model checking; Presburger arithmetic; reachability analysis", } @Article{Mehta:2009:ICH, author = "Gayatri Mehta and Justin Stander and Mustafa Baz and Brady Hunsaker and Alex K. Jones", title = "Interconnect customization for a hardware fabric", journal = j-TODAES, volume = "14", number = "1", pages = "11:1--11:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455240", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article describes several multiplexer-based interconnection strategies designed to improve energy consumption of stripe-based coarse-grain reconfigurable fabrics. Application requirements for the architecture as well as two dense subgraphs are extracted from a suite of signal and image processing benchmarks. These statistics are used to drive the strategy of the composition of multiplexer-based interconnect. The article compares interconnects that are fully connected between stripes, those with a cardinality of 8:1 to 4:1, and extensions that provide a 5:1 cardinality, limited 6:1 cardinality, and hybrids between 5:1 and 3:1 cardinalities. Additionally, dedicated vertical routes are considered replacing some computational units with dedicated pass-gates. Using a fabric interconnect model (FIM) written in XML, we demonstrate that fabric instances and mappers can be automatically generated using a Web-based design flow. Upon testing these instances, we found that using an 8:1 cardinality interconnect with 33\% of the computational units replaced with dedicated pass-gates provided the best energy versus mappability tradeoff, resulting in a 50\% energy improvement over fully connected rows and 20\% energy improvement over an 8:1 cardinality interconnect without dedicated vertical routes.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "architecture; computer-aided design; demonstrable; hardware fabric; low-energy; Reconfigurable", } @Article{Sham:2009:CPE, author = "Chiu-Wing Sham and Evangeline F. Y. Young and Jingwei Lu", title = "Congestion prediction in early stages of physical design", journal = j-TODAES, volume = "14", number = "1", pages = "12:1--12:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455241", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Routability optimization has become a major concern in physical design of VLSI circuits. Due to the recent advances in VLSI technology, interconnect has become a dominant factor of the overall performance of a circuit. In order to optimize interconnect cost, we need a good congestion estimation method to predict routability in the early designing stages. Many congestion models have been proposed but there's still a lot of room for improvement. Besides, routers will perform rip-up and reroute operations to prevent overflow, but most models do not consider this case. The outcome is that the existing models will usually underestimate the routability. In this paper, we have a comprehensive study on our proposed congestion models. Results show that the estimation results of our approaches are always more accurate than the previous congestion models.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Estimation; floorplanning; placement", } @Article{Zhu:2009:ESA, author = "Yi Zhu and Yuanfang Hu and Michael B. Taylor and Chung-Kuan Cheng", title = "Energy and switch area optimizations for {FPGA} global routing architectures", journal = j-TODAES, volume = "14", number = "1", pages = "13:1--13:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455242", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Low energy and small switch area usage are two important design objectives in FPGA global routing architecture design. This article presents an improved MCF model based CAD flow that performs aggressive optimizations, such as topology and wire style optimization, to reduce the energy and switch area of FPGA global routing architectures. The experiments show that when compared to traditional mesh architecture, the optimized FPGA routing architectures achieve up to 10\% to 15\% energy savings and up to 20\% switch area savings in average for a set of seven benchmark circuits.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "FPGA; global routing; low power", } @Article{Huang:2009:OPR, author = "Shih-Hsu Huang and Chia-Ming Chang and Yow-Tyng Nieh", title = "Opposite-phase register switching for peak current minimization", journal = j-TODAES, volume = "14", number = "1", pages = "14:1--14:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455243", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In a synchronous sequential circuit, huge current peaks are often observed at the moment of clock transition (since all registers are clocked). Previous works focus on reducing the number of switching registers. However, even though the switching registers are the same, different combinations of switching directions still result in different peak currents. Based on that observation, in this article, we propose an ECO (engineering change order) approach to minimize the peak current by considering the switching directions of registers. Our approach is well suitable for reducing the peak current in IC testing. Experimental data consistently show that our approach works well in practice.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "IC testing; Logic synthesis; peak current; sequential circuit synthesis", } @Article{Lin:2009:SCD, author = "Yen-Chun Lin and Li-Ling Hung", title = "Straightforward construction of depth-size optimal, parallel prefix circuits with fan-out 2", journal = j-TODAES, volume = "14", number = "1", pages = "15:1--15:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455244", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Prefix computation is used in various areas and is considered as a primitive operation. Parallel prefix circuits are parallel prefix algorithms on the combinational circuit model. The depth of a prefix circuit is a measure of its processing time; smaller depth implies faster computation. The size of a prefix circuit is the number of operation nodes in it. Smaller size implies less power consumption, less VLSI area, and less cost. A prefix circuit with $n$ inputs is depth-size optimal if its depth plus size equals $ 2 n - 2$. A circuit with a smaller fan-out is in general faster and occupies less VLSI area. To be of practical use, the depth and fan-out of a prefix circuit should be small. In this paper, a family of depth-size optimal, parallel prefix circuits with fan-out 2 is presented. This family of prefix circuits is easier to construct and more amenable to automatic synthesis than two other families of the same type, although the three families have the same minimum depth among all depth-size optimal prefix circuits with fan-out 2. The balanced structure of the new family is also a merit.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Depth-size optimal; fan-out; parallel prefix circuits", } @Article{Kahng:2009:LAA, author = "Andrew B. Kahng and Chul-Hong Park and Puneet Sharma and Qinke Wang", title = "Lens aberration aware placement for timing yield", journal = j-TODAES, volume = "14", number = "1", pages = "16:1--16:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455245", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Process variations due to lens aberrations are to a large extent systematic, and can be modeled for purposes of analyses and optimizations in the design phase. Traditionally, variations induced by lens aberrations have been considered random due to their small extent. However, as process margins reduce, and as improvements in reticle enhancement techniques control variations due to other sources with increased efficacy, lens aberration-induced variations gain importance. For example, our experiments indicate that delays of most cells in the Artisan TSMC 90nm library are affected by 2--8\% due to lens aberration. Aberration-induced variations are systematic and depend on the location in the lens field. In this article, we first propose an aberration-aware timing analysis flow that accounts for aberration-induced cell delay variations. We then propose an aberration-aware timing-driven analytical placement approach that utilizes the predictable slow and fast regions created on the chip due to aberration to improve cycle time. We study the dependence of our improvement on chip size, as well as use of the technique along with field blading which allows partial reticle exposure. We evaluate our technique on two testcases, {\em AES\/} and {\em JPEG\/} implemented in 90nm technology. The proposed technique reduces cycle time by 4.322\% (80ps) at the cost of 1.587\% increase in trial-routed wirelength for AES. On JPEG, we observe a cycle time reduction of 5.182\% (132ps) at the cost of 1.095\% increase in trial-routed wirelength.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design for manufacturing; Layout; lithography; timing yield", } @Article{Chien:2009:SMV, author = "Chih-Da Chien and Cheng-An Chien and Jui-Chin Chu and Jiun-In Guo and Ching-Hwa Cheng", title = "A {252Kgates\slash 4.9Kbytes SRAM\slash 71mW} multistandard video decoder for high definition video applications", journal = j-TODAES, volume = "14", number = "1", pages = "17:1--17:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455246", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes a low-cost, low-power multistandard video decoder for high definition (HD) video applications. The proposed design supports multiple-standard (JPEG baseline, MPEG-1/2/4 Simple Profile (SP), and H.264 Baseline Profile (BP)) video decoding through interactive parsing control and common parameter bus interface. In order to reduce hardware cost, the shared adder-based structure and reusable data management are proposed to achieve hardware sharing and reduce internal memory size, respectively. In addition, the proposed design is optimized through reducing memory bandwidth by increasing both data reuse amount and burst length of memory access as well as eliminating cycle overhead in data access for supporting HD video decoding with single AHB-based SDR memory. The proposed 252Kgates/4.9kB/71mW/0.13$ \mu $ m multi-standard video decoder reduces 72\% in gate count and 87\% in power consumption as compared to the state-of-the-art design, when operating at 120MHz for real-time HD1080 video decoding with single AHB-based SDR memory.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "H.264; MPEG; Video decoder", } @Article{Reviriego:2009:EED, author = "Pedro Reviriego and Juan Antonio Maestro", title = "Efficient error detection codes for multiple-bit upset correction in {SRAMs} with {BICS}", journal = j-TODAES, volume = "14", number = "1", pages = "18:1--18:??", month = jan, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1455229.1455247", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 26 18:12:50 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Memories are one of the most widely used elements in electronic systems, and their reliability when exposed to Single Events Upsets (SEUs) has been studied extensively. As transistor sizes shrink, Multiple Bits Upsets (MBUs) are becoming an increasingly important factor in the reliability of memories exposed to radiation effects. To address this issue, Built-in Current Sensors (BICS) have recently been applied in conjunction with Single Error Correction/Double Error Detection (SEC-DED) codes to protect memories from MBUs. In this article, this approach is taken one step further, proposing specific codes optimized to be combined with BICS to provide protection against MBUs in memories. By exploiting the locality of errors within an MBU and the error detection and location capabilities of BICS, the proposed codes result in both a better protection level and a reduced cost compared with the existing SEC-DED approach.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "error correcting codes; Fault tolerant memory; high-level protection technique; protection against radiation", } @Article{Avnit:2009:PCC, author = "K. Avnit and V. D'silva and A. Sowmya and S. Ramesh and S. Parameswaran", title = "Provably correct on-chip communication: a formal approach to automatic protocol converter synthesis", journal = j-TODAES, volume = "14", number = "2", pages = "19:1--19:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497562", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hardware module reuse is a standard solution to the problems of increasing complexity of chip architectures and pressure to reduce time to market. In the absence of a single module interface standard, predesigned modules for ``plug-and-play'' usually require a converter between incompatible interface protocols. Current approaches to automatic synthesis of protocol converters mostly lack formal foundations and either employ abstractions far removed from the HDL implementation level or grossly simplify the structure of the protocols considered. This work presents a state-machine-based formalism for modeling bus-based communication protocols and a notion of protocol compatibility and of correct conversion between incompatible protocols. This formalism is used to derive algorithms for checking protocol compatibility and for provably correct, automatic converter synthesis. Experiments with automatic converter synthesis between different configurations of widely used commercial bus protocols, such as AMBA AHB, ASB APB, and the Open Core Protocol (OCP) are discussed. The work here is unique in its combination of a completely formal approach and the use of a low abstraction level that enables precise modeling of protocol characteristics that is also close to HDL.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "automatic design; converter synthesis; protocol compatibility; System-on-chip", } @Article{Pasricha:2009:SLP, author = "Sudeep Pasricha and Young-Hwan Park and Nikil Dutt and Fadi J. Kurdahi", title = "System-level {PVT} variation-aware power exploration of on-chip communication architectures", journal = j-TODAES, volume = "14", number = "2", pages = "20:1--20:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497563", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the shift towards deep submicron (DSM) technologies, the increase in leakage power and the adoption of power-aware design methodologies have resulted in potentially significant variations in power consumption under different process, voltage, and temperature (PVT) corners. In this article, we first investigate the impact of PVT corners on power consumption at the system-on-chip (SoC) level, especially for the on-chip communication infrastructure. Given a target technology library, we then show how it is possible to ``scale up'' and abstract the PVT variability at the system level, allowing characterization of the PVT-aware design space early in the design flow. We conducted several experiments to estimate power for PVT corner cases, at the gate level, as well as at the higher system level. Our preliminary results are very interesting, and indicate that (i) there are significant variations in power consumption across PVT corners; and (ii) the PVT-aware power estimation problem may be amenable to a reasonably simple abstraction at the system level.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "digital systems; high-level synthesis; on-chip communication architectures; performance exploration; power estimation; PVT variation", } @Article{Mukhopadhyay:2009:IAA, author = "Rajdeep Mukhopadhyay and S. K. Panda and Pallab Dasgupta and John Gough", title = "Instrumenting {AMS} assertion verification on commercial platforms", journal = j-TODAES, volume = "14", number = "2", pages = "21:1--21:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497564", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The industry trend appears to be moving towards designs that integrate large digital circuits with multiple analog/RF (radio frequency) interfaces. In the verification of these large integrated circuits, the number of nets that need to be monitored has been growing rapidly. Consequently, the mixed-signal design community has been feeling the need for AMS (Analog and Mixed Signal) assertions that can automatically monitor conformance with expected time-domain behavior and help in debugging deviations from the design intent. The main challenges in providing this support are (a) developing AMS assertion languages or AMS verification libraries, and (b) instrumenting existing commercial simulators to support assertion verification during simulation. In this article, we report two approaches: the first extends the {\em Open Verification Library\/} (OVL) to the AMS domain by integrating a new collection of AMS verification libraries; while the second extends {\em SystemVerilog Assertions\/} (SVA) by augmenting analog predicates into SVA. We demonstrate the use of AMS-OVL on the Cadence Virtuoso environment while emphasizing that our libraries can work in any environment that supports Verilog and Verilog-A. We also report the development of tool support for AMS-SVA using a combination of Cadence NCSIM and Synopsys VCS. We demonstrate the utility of both approaches on the verification of LP3918, an integrated power management unit (PMU) from National Semiconductors. We believe that in the absence of existing EDA (Electronic Design Automation) tools for AMS assertion verification, the proposed approaches of integrating our libraries and our tool sets with existing commercial simulators will be of considerable and immediate practical value.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Assertion; integrated mixed signal design; OVL; simulation; SVA; verification library", } @Article{Palkovic:2009:TOL, author = "Martin Palkovic and Francky Catthoor and Henk Corporaal", title = "Trade-offs in loop transformations", journal = j-TODAES, volume = "14", number = "2", pages = "22:1--22:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497565", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Nowadays, multimedia systems deal with huge amounts of memory accesses and large memory footprints. To alleviate the impact of these accesses and reduce the memory footprint, high-level memory exploration and optimization techniques have been proposed. These techniques try to more efficiently utilize the memory hierarchy. An important step in these optimization techniques are loop transformations (LT). They have a crucial effect on later data memory footprint optimization steps and code generation. However, the state-of-the-art work has focused only on individual objectives. The main one in literature involves improving the locality of data accesses, and thus reducing the data memory footprint. It does not consider the trade-offs in the LT step in relation to successive optimization steps. Therefore, it is not globally efficient in mapping the application on the target platform.\par In this article we will discuss several trade-offs during the loop transformations. To our knowledge, we are the first ones considering these global trade-offs. Previous work always gave mostly one solution, having the best locality and thus the optimized memory footprint, even though some research in two-dimensional trade-offs in this area exists as well. We start from this state-of-the-art solution with minimal footprint. We show that by sacrificing the footprint, we can obtain gains in data reuse (crucial for energy reduction) and reduce the control-flow complexity. We demonstrate our approach on a real-life application, namely the QSDPCM video coder. At the end, we show that considering trade-offs for this application leads to 16\% energy reduction in a two-layer memory subsystem and 10\% cycle reduction on the ARM platform.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "cost components; Data transfer and storage exploration; loop transformations; optimization; trade-offs", } @Article{Fummi:2009:CMH, author = "Franco Fummi and Mirko Loghi and Massimo Poncino and Graziano Pravadelli", title = "A cosimulation methodology for {HW\slash SW} validation and performance estimation", journal = j-TODAES, volume = "14", number = "2", pages = "23:1--23:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497566", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Cosimulation strategies allow us to simulate and verify HW/SW embedded systems before the real platform is available. In this field, there is a large variety of approaches that rely on different communication mechanisms to implement an efficient interface between the SW and the HW simulators. However, the literature lacks a comprehensive methodology which addresses the need for integrating and synchronizing heterogeneous simulators, like, for example, the SystemC simulation kernel for HW modules and an instruction set simulator for SW applications, without being intrusive for the HW and SW descriptions involved in the simulation. In this context, this article presents, compares, and integrates in a system-level framework two different co-simulation strategies for modeling, analyzing, and validating the performance of a HW/SW embedded system. Moreover, for both of them, a mechanism is proposed to provide an accurate time synchronization of the HW/SW communication. The first strategy is intended to provide an early cosimulation environment where HW/SW interaction can be validated without involving the operating system. The communication is implemented between a single SW task and a SystemC description of an HW module by exploiting the features of the remote debugging interface of a debugger (the GNU GDB), and by modifying the SystemC simulation kernel. On the other hand, the second strategy is intended to be used in further development steps, when the operating system is introduced to validate the cosimulation between HW modules and multitasking SW applications. In this approach, the communication is implemented via interrupts by using the features offered by the operating system.\par Experimental results are reported on two different case studies to analyze and compare the effectiveness of both the approaches.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Embedded Systems; HW/SW co-simulation; HW/SW validation", } @Article{Inoue:2009:DSD, author = "Hiroaki Inoue and Tsuyoshi Abe and Kazuhisa Ishizaka and Junji Sakai and Masato Edahiro", title = "Dynamic security domain scaling on embedded symmetric multiprocessors", journal = j-TODAES, volume = "14", number = "2", pages = "24:1--24:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497567", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a method for dynamic security-domain scaling on SMPs that offers both highly scalable performance and high security for future high-end embedded systems. Its most important feature is its highly efficient use of processor resources, accomplished by dynamically changing the number of processors within a security-domain (i.e., dynamically yielding processors to other security-domains) in response to application load requirements. Two new technologies make this scaling possible without any virtualization software: (1) self-transition management and (2) unified virtual address mapping. Evaluations show that this domain control provides highly scalable performance and incurs almost no performance overhead in security-domains. The increase in OSs in binary code size is less than 1.5\%, and the time required for individual state transitions is on the order of a single millisecond. This scaling is the first in the world to make possible the dynamic changing of the number of processors within a security-domain on an ARM SMP.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "AMP; dynamic security-domain scaling; SMP", } @Article{Qiu:2009:CMW, author = "Meikang Qiu and Edwin H.-M. Sha", title = "Cost minimization while satisfying hard\slash soft timing constraints for heterogeneous embedded systems", journal = j-TODAES, volume = "14", number = "2", pages = "25:1--25:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497568", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In high-level synthesis for real-time embedded systems using heterogeneous functional units (FUs), it is critical to select the best FU type for each task. However, some tasks may not have fixed execution times. This article models each varied execution time as a probabilistic random variable and solves {\em heterogeneous assignment with probability\/} (HAP) problem. The solution of the HAP problem assigns a proper FU type to each task such that the total cost is minimized while the timing constraint is satisfied with a guaranteed confidence probability. The solutions to the HAP problem are useful for both hard real-time and soft real-time systems. Optimal algorithms are proposed to find the optimal solutions for the HAP problem when the input is a tree or a simple path. Two other algorithms, one is optimal and the other is near-optimal heuristic, are proposed to solve the general problem. The experiments show that our algorithms can effectively reduce the total cost while satisfying timing constraints with guaranteed confidence probabilities. For example, our algorithms achieve an average reduction of 33.0\% on total cost with 0.90 confidence probability satisfying timing constraints compared with the previous work using worst-case scenario.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Embedded Systems; heterogeneous; high-level synthesis; real-time", } @Article{Zhou:2009:TAR, author = "Xiangrong Zhou and Chenjie Yu and Peter Petrov", title = "Temperature-aware register reallocation for register file power-density minimization", journal = j-TODAES, volume = "14", number = "2", pages = "26:1--26:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497569", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Increased chip temperature has been known to cause severe reliability problems and to significantly increase leakage power. The register file has been previously shown to exhibit the highest temperature compared to all other hardware components in a modern high-end embedded processor, which makes it particularly susceptible to faults and elevated leakage power. We show that this is mostly due to the highly clustered register file accesses where a set of few registers physically placed close to each other are accessed with very high frequency. We propose compile-time temperature-aware register reallocation methodologies for breaking such groups of registers and to uniformly distribute the accesses to the register file. This is achieved with {\em no performance\/} and {\em no hardware overheads}. We show that the underlying problem is NP-hard, and subsequently introduce and evaluate two efficient algorithmic heuristics. Our extensive experimental study demonstrates the efficiency of the proposed methodology.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hong:2009:RFD, author = "Yu-Ru Hong and Juinn-Dar Huang", title = "Reducing fault dictionary size for million-gate large circuits", journal = j-TODAES, volume = "14", number = "2", pages = "27:1--27:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497570", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In general, fault dictionary is prevented from practical applications in fault diagnosis due to its extremely large size. Several previous works are proposed for the fault dictionary size reduction. However, some of them fail to bring down the size to an acceptable level, and others might not be able to handle today's million-gate circuits due to their high time and space complexity. In this article, an algorithm is presented to reduce the size of pass-fail dictionary while still preserving high diagnostic resolution. The proposed algorithm possesses low time and space complexity by avoiding constructing the huge distinguishability table, which inevitably boosts up the required computation complexity. Experimental results demonstrate that the proposed algorithm is capable of handling industrial million-gate large circuits in a reasonable amount of runtime and memory.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "diagnostic resolution; fault diagnosis; Fault dictionary", } @Article{Kavousianos:2009:EPS, author = "Xrysovalantis Kavousianos and Dimitris Bakalis and Dimitris Nikolos", title = "Efficient partial scan cell gating for low-power scan-based testing", journal = j-TODAES, volume = "14", number = "2", pages = "28:1--28:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497571", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Gating of the outputs of a portion of the scan cells (partial gating) has been recently proposed as a method for reducing the dynamic power dissipation during scan-based testing. We present a new systematic method for selecting, under area and performance design constraints, the most suitable for gating subset of scan cells as well as the proper gating value for each one of them, aiming at the reduction of the average switching activity during testing. We show that the proposed method outperforms the corresponding already known methods, with respect to average dynamic power dissipation reduction.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Low-power testing; partial gating; scan cell gating; scan-based testing", } @Article{Rakhmatov:2009:BVM, author = "Daler Rakhmatov", title = "Battery voltage modeling for portable systems", journal = j-TODAES, volume = "14", number = "2", pages = "29:1--29:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497572", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Limited battery life imposes stringent constraints on the operation of battery-powered portable systems. During battery discharge, the battery voltage decreases, until a certain cutoff value is reached, marking the end of battery life. The amount of discharge capacity and energy delivered by the battery during its life depends not only on the battery characteristics, but also on the load conditions. A different system design may result in a different battery current (load) profile over time, leading to a different battery voltage profile over time. This article presents an analytical model that relates the battery voltage to the battery current, thus facilitating system design optimizations with respect to the battery performance. It captures well-known nonlinear phenomena of capacity loss at high discharge rates, charge recovery, and capacity fading. The proposed model has been validated against measurements taken on Li-ion batteries. We also describe techniques for efficient calculations of model's estimates, which lets a user exploit accuracy-complexity tradeoffs.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "accuracy-complexity tradeoff; analytical modeling; battery performance; battery-powered systems; Low-power design", } @Article{Kumar:2009:EML, author = "Yokesh Kumar and Prosenjit Gupta", title = "External memory layout vs. schematic", journal = j-TODAES, volume = "14", number = "2", pages = "30:1--30:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497573", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The circuit represented by a VLSI layout must be verified by checking it against the schematic circuit as an important part of the functional verification step. This involves two central problems of matching the circuit graphs with each other (graph isomorphism) and extracting a higher level of circuit from a given level by finding subcircuits in the circuit graph (subgraph isomorphism). Modern day VLSI layouts contain millions of devices. Hence the memory requirements of the data structures required by tools for verifying them become huge and can easily exceed the amount of internal memory available on a computer. In such a scenario, a program not aware of the memory hierarchy performs badly because of its unorganized input/output operations (I/Os) as the speed of a disk access is about a million times slower than accessing a main memory location. In this article, we present I/O-efficient algorithms for the graph isomorphism and subgraph isomorphism problems in the context of verification of VLSI layouts. Experimental results show the need and utility of I/O-efficient algorithms for handling problems with large memory requirements.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design automation; external memory algorithms; Graph; subgraph isomorphism; verification of layouts", } @Article{Chen:2009:SAP, author = "Po-Yuan Chen and Kuan-Hsien Ho and Tingting Hwang", title = "Skew-aware polarity assignment in clock tree", journal = j-TODAES, volume = "14", number = "2", pages = "31:1--31:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497574", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In modern sequential VLSI designs, clock tree plays an important role in synchronizing different components in a chip. To reduce peak current and power/ground noises caused by clock network, assigning different signal polarities to clock buffers is proposed in previous work. Although peak current and power/ground noises are minimized by signal polarities assignment, an assignment without timing information may increase the clock skew significantly. As a result, a timing-aware signal polarities assigning technique is necessary. In this article, we propose a novel signal polarities assigning technique which can not only reduce peak current and power/ground noises simultaneously but also render the clock skew in control. The experimental result shows that the clock skew produced by our algorithm is 94\% of original clock skew in average while the clock skews produced by three algorithms (Partition, MST, Matching) in the absence of post clock tuning steps in the previous work are 235\%, 272\%, and 283\%, respectively. Moreover, our algorithm is as efficient as the three algorithms of the previous work in reducing peak current and power/ground noises.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Clock skew; clock tree; peak current; polarity assignment; power/ground noise", } @Article{Cho:2009:BHR, author = "Minsik Cho and Katrina Lu and Kun Yuan and David Z. Pan", title = "{BoxRouter 2.0}: a hybrid and robust global router with layer assignment for routability", journal = j-TODAES, volume = "14", number = "2", pages = "32:1--32:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497575", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present BoxRouter 2.0, and discuss its architecture and implementation. As high-performance VLSI design becomes more interconnect-dominant, efficient congestion elimination in global routing is in greater demand. Hence, we propose a global router which has a strong ability to improve routability and minimize the number of vias with blockages, while minimizing wirelength. BoxRouter 2.0 is extended from BoxRouter 1.0, but can perform multi-layer routing with 2D global routing and layer assignment. Our 2D global routing is equipped with two ideas: node shifting for congestion-aware Steiner tree and robust negotiation-based A* search for routing stability. After 2D global routing, 2D-to-3D mapping is done by the layer assignment which is powered by progressive via/blockage-aware integer linear programming. Experimental results show that BoxRouter 2.0 has better routability with comparable wirelength than other routers on ISPD07 benchmark, and it can complete (no overflow) the widely used ISPD98 benchmark for the first time in the literature with the shortest wirelength. We further generate a set of harder ISPD98 benchmarks to push the limit of BoxRouter 2.0, and propose the hardened ISPD98 benchmarks to map state-of-the-art solutions for future routing research.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "congestion; global routing; integer linear programming; layer assignment; physical design; routability; VLSI", } @Article{Gulati:2009:FBH, author = "Kanupriya Gulati and Suganth Paul and Sunil P. Khatri and Srinivas Patil and Abhijit Jas", title = "{FPGA}-based hardware acceleration for {Boolean} satisfiability", journal = j-TODAES, volume = "14", number = "2", pages = "33:1--33:??", month = mar, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1497561.1497576", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Apr 2 15:06:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present an FPGA-based hardware solution to the Boolean satisfiability (SAT) problem, with the main goals of scalability and speedup. In our approach the traversal of the implication graph as well as conflict clause generation are performed in hardware, in parallel. The experimental results and their analysis, along with the performance models are discussed. We show that an order of magnitude improvement in runtime can be obtained over MiniSAT (the best-in-class software based approach) by using a Virtex-4 (XC4VFX140) FPGA device. The resulting system can handle instances with as many as 10K variables and 280K clauses.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Boolean Constant Propagation (BCP); Boolean satisfiabilty (SAT); conflict induced clauses; FPGA; non-chronological backtrack", } @Article{Malik:2009:SCU, author = "Avinash Malik and Zoran Salcic and Partha S. Roop", title = "{SystemJ} compilation using the {Tandem Virtual Machine} approach", journal = j-TODAES, volume = "14", number = "3", pages = "34:1--34:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529256", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "SystemJ is a language based on the Globally Asynchronous Locally Synchronous (GALS) paradigm. A SystemJ program is a collection of GALS nodes, also called clock domains, and each clock domain is a synchronous program that extends the Java language. Initial compilation of SystemJ has been to standard Java executing on a Java Virtual Machine (JVM), which is both inefficient and bulky for small embedded systems. This article proposes a new approach for compiling and executing SystemJ using a new type of virtual machine, called a Tandem Virtual Machine (TVM). The TVM approach provides an efficient implementation of SystemJ on both standard processors and resource-constrained embedded processors. The new approach is based on separating the control-driven and data-driven operations for execution on two virtual machines. While the JVM executes the data-driven operations, a Control Virtual Machine (CVM) is introduced to execute the control-driven parts of a SystemJ program. The TVM approach is capable of handling all data-driven and control-driven operations required by the GALS model. The benchmark results show that the TVM has code size improvements of over 60\% on average and also a substantial improvement in execution speed over standard Java-based compilation.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compilation; esterel; System-level design; SystemJ; virtual machines", } @Article{Cong:2009:SRB, author = "Jason Cong and Yiping Fan and Junjuan Xu", title = "Simultaneous resource binding and interconnection optimization based on a distributed register-file microarchitecture", journal = j-TODAES, volume = "14", number = "3", pages = "35:1--35:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529257", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Behavior synthesis and optimization beyond the register-transfer level require an efficient utilization of the underlying platform features. This article presents a platform-based resource binding approach based on a {\em Distributed Register-File Microarchitecture (DRFM)}, which makes efficient use of distributed embedded memory blocks as register files in modern FPGAs. DRFM contains multiple islands, each having a local register file, a functional unit pool, and data-routing logic. Compared to the traditional discrete-register counterpart, a DRFM allows use of the platform-featured on-chip memory or register-file IP blocks to implement its local register files, and this results in a substantial saving of multiplexing logic and global interconnects. DRFM provides a useful architectural template and a direct optimization objective for minimizing interisland connections for synthesis algorithms. Given the scheduling solution and resource (functional units) constraints, two novel algorithms in the resource binding stage are developed based on DRFM: (i) a simultaneous DRFM clustering and binding algorithm, which decides the configuration of DRFM and the assignment of operations into islands with the focus on optimizing global connections; (ii) a data-forwarding scheduling algorithm, which takes advantage of the operation slacks to handle the read-port restriction of register files. On the Xilinx Virtex4 FPGA platform, experimental results with a set of real-life test cases show a 50\% logic area reduction achieved by applying our approach, with a 14.6\% performance improvement, compared to the traditional discrete-register-based approach. Also, experiments on small-size designs show that our algorithm produces the same number of total connections and at most one more maximum feeding-in connection compared to optimal solutions generated by ILP.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Behavioral synthesis; distributed register file; resource binding", } @Article{Raghavan:2009:PTG, author = "Praveen Raghavan and Murali Jayapala and Andy Lambrechts and Javed Absar and Francky Catthoor", title = "Playing the trade-off game: {Architecture} exploration using {Coffeee}", journal = j-TODAES, volume = "14", number = "3", pages = "36:1--36:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529258", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern mobile devices need to be extremely energy efficient. Due to the growing complexity of these devices, energy-aware design exploration has become increasingly important. Current exploration tools often do not support energy estimation, or require the design to be very detailed before estimation is possible. It is important to get early feedback on both performance and energy consumption during all phases of the design and at higher abstraction levels. This article presents a unified optimization and exploration framework to explore source-level transformation to processor architecture design space. The proposed retargetable compiler and simulator framework can map applications to a range of processors and memory configurations, simulate, and report detailed performance and energy estimates. An accurate and consistent energy modeling approach is introduced which can estimate the energy consumption of processor and memories at a component level, which can help to guide the design process. Fast energy-aware architecture exploration is illustrated by modeling both state-of-the-art processors as well as other architectures. Various design trade-offs are also illustrated on different academic as well as industrial benchmarks from both the wireless communication and multimedia domain. We also illustrate a design space exploration on different applications and show that there is large trade-off space between application performance, energy consumption, and area. We show that the proposed framework is consistent, accurate, and covers a large design space including various novel low-power extensions in a unified framework.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "architecture exploration; area; compiler-architecture interaction; design; embedded systems; Energy; loop transformations; power estimation; power-performance trade-off; processors; VLIW", } @Article{Das:2009:SBT, author = "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar", title = "Scenario-based timing verification of multiprocessor embedded applications", journal = j-TODAES, volume = "14", number = "3", pages = "37:1--37:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529259", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work presents a static timing-analysis method for verification of scenario-based real-time properties, on graphical task-level models of embedded applications. Scenario-based properties specify timing constraints which must be honored for specific control-flow behaviors and task execution orderings. Static checking of scenario-based properties currently requires computationally expensive model checking methods. Hence the proposed graph-based static timing-analysis algorithm improves upon the state-of-the-art. This is manifested in a significant performance advantage over timed model checking (up to 1000X in several cases), which suffers from state space explosion. The proposed algorithm also employs compositional reasoning and abstraction refinement for handling large problems. We also illustrate methods for using scenario-based timing analysis, which can act as alternatives to traditional timed model checking for verification of timed systems like FDDI and Fischer protocols. We implement this timing verification algorithm as a tool called {\em SymTime\/} and present experimental results for SymTime comparing it with SPIN, UPPAAL, and a TCTL model checker for Time Petri Nets, called Romeo.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "execution scenarios; real time systems; static timing analysis; Timing verification", } @Article{Grosse:2009:MPO, author = "Philippe Grosse and Yves Durand and Paul Feautrier", title = "Methods for power optimization in {SOC}-based data flow systems", journal = j-TODAES, volume = "14", number = "3", pages = "38:1--38:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529260", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Whereas the computing power of DSP or general-purpose processors was sufficient for 3G baseband telecommunication algorithms, stringent timing constraints of 4G wireless telecommunication systems require computing-intensive data-driven architectures. Managing the complexity of these systems within the energy constraints of a mobile terminal is becoming a major challenge for designers. System-level low-power policies have been widely explored for generic software-based systems, but data-flow architectures used for high data-rate telecommunication systems feature heterogeneous components that require specific configurations for power management. In this study, we propose an innovative power optimization scheme tailored to self-synchronized data-flow systems. Our technique, based on the synchronous data-flow modeling approach, takes advantage of the latest low-power techniques available for digital architectures. We illustrate our optimization method on a complete 4G telecommunication baseband modem and show the energy savings expected by this technique considering present and future silicon technologies.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "4G base-band modem; data-driven SOC; Power optimization; synchronous data-flow graph", } @Article{Clarke:2009:WLS, author = "Jonathan A. Clarke and George A. Constantinides and Peter Y. K. Cheung", title = "Word-length selection for power minimization via nonlinear optimization", journal = j-TODAES, volume = "14", number = "3", pages = "39:1--39:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529261", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article describes the first method for minimizing the dynamic power consumption of a Digital Signal Processing (DSP) algorithm implemented on reconfigurable hardware via word-length optimization. Fast models for estimating the power consumption of the arithmetic components and the routing power of these algorithm implementations are used within a constrained nonlinear optimization formulation that solves a relaxed version of word-length optimization. Tight lower and upper bounds on the cost of the integer word-length problem can be obtained using the proposed solution, with typical upper bounds being 2.9\% and 5.1\% larger than the lower bounds for area and power consumption, respectively. Heuristics can then use the upper bound as a starting point from which to get even closer to the known lower bound. Results show that power consumption can be improved by up to 40\% compared to that achieved when using simple word-length selection techniques, and further comparisons are made between the minimization of different cost functions that give insight into the advantages offered by multiple word-length optimization.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bitwidth; power consumption; Power consumption; signal processing; synthesis; word length", } @Article{Morgado:2009:GRS, author = "P. Marques Morgado and Paulo F. Flores and L. Miguel Silveira", title = "Generating realistic stimuli for accurate power grid analysis", journal = j-TODAES, volume = "14", number = "3", pages = "40:1--40:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529262", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power analysis tools are an integral component of any current power sign-off methodology. The performance of a design's power grid affects the timing and functionality of a circuit, directly impacting the overall performance. Ensuring power grid robustness implies taking into account, among others, static and dynamic effects of voltage drop, ground bounce, and electromigration. This type of verification is usually done by simulation, targeting a worst-case scenario where devices, switching almost simultaneously, could impose stern current demands on the power grid. While determination of the exact worst-case switching conditions from the grid perspective is usually not practical, the choice of simulation stimuli has a critical effect on the results of the analysis. Targetting safe but unrealistic settings could lead to pessimistic results and costly overdesigns in terms of die area. In this article we describe a software tool that generates a reasonable, realistic, set of stimuli for simulation. The approach proposed accounts for timing and spatial restrictions that arise from the circuit's netlist and placement and generates an approximation to the worst-case condition. The resulting stimuli indicate that only a fraction of the gates change in any given timing window, leading to a more robust verification methodology, especially in the dynamic case. Generating such stimuli is akin to performing a standard static timing analysis, so the tool fits well within conventional design frameworks. Furthermore, the tool can be used for hotspot detection in early design stages.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "ground bounce; Power grid; simulation; stimuli generation; verification; voltage drop", } @Article{Yu:2009:APG, author = "Hao Yu and Joanna Ho and Lei He", title = "Allocating power ground vias in {$3$D} {ICs} for simultaneous power and thermal integrity", journal = j-TODAES, volume = "14", number = "3", pages = "41:1--41:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529263", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The existing work on via allocation in 3D ICs ignores power/ground vias' ability to simultaneously reduce voltage bounce and remove heat. This article develops the first in-depth study on the allocation of power/ground vias in 3D ICs with simultaneous consideration of power and thermal integrity. By identifying principal ports and parameters, effective electrical and thermal macromodels are employed to provide dynamic power and thermal integrity as well as sensitivity with respect to via density. With the use of sensitivity, an efficient via allocation simultaneously driven by power and thermal integrity is developed. Experiments show that, compared to sequential power and thermal optimization using static integrity, sequential optimization using the dynamic integrity reduces nonsignal vias by up to 18\%, and simultaneous optimization using dynamic integrity further reduces nonsignal vias by up to 45.5\%.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "macromodeling; parametric 3D-IC design; Thermal and power integrity", } @Article{Liu:2009:MAA, author = "Bo Liu and Francisco V. Fern{\'a}ndez and Georges Gielen and R. Castro-L{\'o}pez and E. Roca", title = "A memetic approach to the automatic design of high-performance analog integrated circuits", journal = j-TODAES, volume = "14", number = "3", pages = "42:1--42:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529264", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article introduces an evolution-based methodology, named memetic single-objective evolutionary algorithm (MSOEA), for automated sizing of high-performance analog integrated circuits. Memetic algorithms may achieve higher global and local search ability by properly combining operators from different standard evolutionary algorithms. By integrating operators from the differential evolution algorithm, from the real-coded genetic algorithm, operators inspired by the simulated annealing algorithm, and a set of constraint handling techniques, MSOEA specializes in handling analog circuit design problems with numerous and tight design constraints. The method has been tested through the sizing of several analog circuits. The results show that design specifications are met and objective functions are highly optimized. Comparisons with available methods like genetic algorithm and differential evolution in conjunction with static penalty functions, as well as with intelligent selection-based differential evolution, are also carried out, showing that the proposed algorithm has important advantages in terms of constraint handling ability and optimization quality.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Analog circuit sizing; analog design automation; constrained optimization; memetic algorithm", } @Article{Mutyam:2009:SST, author = "Madhu Mutyam", title = "Selective shielding technique to eliminate crosstalk transitions", journal = j-TODAES, volume = "14", number = "3", pages = "43:1--43:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529265", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With CMOS process technology scaling to deep submicron level, propagation delay across long on-chip buses is becoming one of the main performance limiting factors in high-performance designs. Propagation delay is very significant when adjacent wires are transitioning in opposite direction as compared to transitioning in the same direction. As opposite transitions on adjacent wires (called as {\em crosstalk transitions\/}) have significant impact on propagation delay, several bus encoding techniques have been proposed in literature to eliminate such transitions.\par We propose {\em selective shielding\/} technique to eliminate crosstalk transitions. We show that the selective shielding technique requires $ \lceil 3 n / 2 \rceil $ wires to encode a $n$-bit bus. SPICE simulations by considering 90nm technology nodes reveal that, for uniformly distributed random data, our technique achieves nearly 39\% (21\%) delay savings over 10 {\em mm\/}-length uncoded 32-bit bus for pipelined (nonpipelined) data transmission at the cost of nearly 7\% energy overhead.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bus encoding; Crosstalk; power consumption; switching activity", } @Article{Taskin:2009:CTR, author = "Baris Taskin and Joseph Demaio and Owen Farell and Michael Hazeltine and Ryan Ketner", title = "Custom topology rotary clock router with tree subnetworks", journal = j-TODAES, volume = "14", number = "3", pages = "44:1--44:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529266", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Increasing demands on computing power have spurred the development of faster, higher-density Integrated Circuits (ICs), compounding power and complexity concerns in design budgets. The clock distribution network is a significant contributor to such power and complexity concerns. Resonant rotary clocking is a relatively new technology that realizes several benefits over current clocking methods, including power, frequency, and variation tolerance, yet lacks the automation tools to promote increased use. Towards this end, an automated rotary clock routing methodology is presented that generates custom topology rotary ring routes with tree subnetworks. In addition to the benefits of adiabatic clocking, the presented custom topology router permits 38.6\% shorter wirelengths on average for register tapping, compared to traditional prescribed skew, binary tree routing.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "clock network design; clock skew; multiphase synchronization; Resonant rotary clocking", } @Article{Liu:2009:HPO, author = "Chih-Hung Liu and Shih-Yi Yuan and Sy-Yen Kuo and Szu-Chi Wang", title = "High-performance obstacle-avoiding rectilinear {Steiner} tree construction", journal = j-TODAES, volume = "14", number = "3", pages = "45:1--45:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529267", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Rectilinear Steiner trees are used to route signal nets by global and detail routers in VLSI design for a long time. However, in current IC industry, there are significantly increasing obstacles to be considered, such as large-scale power networks, pre-routed nets, IP blocks, and antenna jumpers. Accordingly, the {\em obstacle-avoiding rectilinear Steiner minimal tree\/} (OARSMT) problem has become more important. In this article, we propose a new routing graph, {\em obstacle-avoiding routing graph\/} (OARG), for the OARSMT problem. Due to the important properties of OARG, we construct a 3-step algorithm and a local refinement scheme, which both can take advantage of these properties, to find a suboptimal solution efficiently. Furthermore, each step of our 3-step algorithm as well as the local refinement scheme has theoretical or practical benefits. Therefore, each of them can be applicable to other existing works for general or specific considerations such as efficiency or effectiveness. Extensive experimental results show that our method outperforms all existing works in terms of wirelength and achieves the best speed performance.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "obstacle-avoiding; rectilinear; Routing; Steiner tree", } @Article{Yan:2009:TAS, author = "Tan Yan and Martin D. F. Wong", title = "Theories and algorithms on single-detour routing for untangling twisted bus", journal = j-TODAES, volume = "14", number = "3", pages = "46:1--46:??", month = may, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1529255.1529268", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Jun 3 16:12:53 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Previous works on PCB bus routing assume matched pin ordering on both sides. But in practice, the pin ordering might be mismatched and the nets become twisted. In this article, we propose a preprocessing step to untangle such twisted nets. We also introduce a practical routing style, which we call {\em single-detour routing}, to simplify the untangling problem. We then present a necessary and sufficient condition for the existence of single-detour routing solutions. Furthermore, we present a dynamic-programming-based algorithm to solve the single-detour untangling problem with consideration of wire capacity between adjacent pins. Our algorithm produces an optimal single-detour routing solution that rematches the pin ordering. By integrating our algorithm into the bus router in a previous length-matching router, we show that many routing problems that cannot be solved previously can now be solved with insignificant increase in runtime.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Bus routing; dynamic programming; printed circuit board (PCB); single-detour routing; twisted bus", } @Article{Gopalakrishnan:2009:ATB, author = "Sivaram Gopalakrishnan and Priyank Kalla", title = "{2009 ACM TODAES} best paper award: {Optimization} of polynomial datapaths using finite ring algebra", journal = j-TODAES, volume = "14", number = "4", pages = "47:1--47:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562515", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bertels:2009:EMM, author = "Peter Bertels and Wim Heirman and Erik D'Hollander and Dirk Stroobandt", title = "Efficient memory management for hardware accelerated {Java Virtual Machines}", journal = j-TODAES, volume = "14", number = "4", pages = "48:1--48:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562516", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Application-specific hardware accelerators can significantly improve a system's performance. In a Java-based system, we then have to consider a hybrid architecture that consists of a Java Virtual Machine running on a general-purpose processor connected to the hardware accelerator. In such a hybrid architecture, data communication between the accelerator and the general-purpose processor can incur a significant cost, which may even annihilate the original performance improvement of adding the accelerator. A careful layout of the data in the memory structure is therefore of major importance to maintain the acceleration performance benefits.\par This article addresses the reduction of the communication cost in a distributed shared memory consisting of the main memory of the processor and the accelerator's local memory, which are unified in the Java heap. Since memory access times are highly nonuniform, a suitable allocation of objects in either main memory or the accelerator's local memory can significantly reduce the communication cost. We propose several techniques for finding the optimal location for each Java object's data, either statically through profiling or dynamically at runtime. We show how we can reduce communication cost by up to 86\% for the SPECjvm and DaCapo benchmarks. We also show that the best strategy is application dependent and also depends on the relative cost of remote versus local accesses. For a relative cost higher than 10, a self-learning dynamic approach often results in the best performance.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Dynamic memory management; hardware acceleration; Java Virtual Machine", } @Article{Faezipour:2009:HPE, author = "Miad Faezipour and Mehrdad Nourani and Rina Panigrahy", title = "A hardware platform for efficient worm outbreak detection", journal = j-TODAES, volume = "14", number = "4", pages = "49:1--49:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562517", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Network Intrusion Detection Systems (NIDS) monitor network traffic to detect attacks or unauthorized activities. Traditional NIDSes search for patterns that match typical network compromise or remote hacking attempts. However, newer networking applications require finding the frequently repeated strings in a packet stream for further investigation of potential attack attempts. Finding frequently repeated strings within a given time frame of the packet stream has been quite efficient to detect polymorphic worm outbreaks. A novel real-time worm outbreak detection system using two-phase hashing and monitoring repeated common substrings is proposed in this article. We use the concept of shared counters to minimize the memory cost while efficiently sifting through suspicious strings. The worm outbreak system has been prototyped on Altera Stratix FPGA. We have tested the system for various settings and packet stream sizes. Experimental results verify that our system can support line speed of gigabit-rates with negligible false positive and negative rates.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "false negative; false positive; hashing; Network Intrusion Detection System; polymorphic worm; shared counters; worm outbreak", } @Article{Lee:2009:TSA, author = "Byunghyun Lee and Ki-Seok Chung and Bontae Koo and Nak-Woong Eum and Taewhan Kim", title = "Thermal sensor allocation and placement for reconfigurable systems", journal = j-TODAES, volume = "14", number = "4", pages = "50:1--50:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562518", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A dynamic monitoring of thermal behavior of hardware resources using thermal sensors is very important to maintain the operation of systems safe and reliable. This article addresses the problem of thermal sensor allocation and placement for reconfigurable systems. For programmable logic arrays, the degree of the use of hardware resources in the systems highly depends on the target application to be implemented, making the allocation of thermal sensors at the manufacturing stage inadequate (or too costly if implemented) due to the unpredictable thermal profile. This means that the thermal sensor allocation could be processed at the time when the reconfigurable logic is implemented (i.e., at the post manufacturing stage). This work proposes an effective solution to the problem of thermal sensor allocation and placement at the post-manufacturing stage. Specifically, we define the Sensor Allocation and Placement Problem (SAPP), and propose a solution which formulates SAPP into the Unate-Covering Problem (UCP) and solves it optimally. Also we combine SAPP with temperature correlation to reduce required sensors more aggressively and propose a solution by applying UCP again. We then provide an extended solution to handle a practical design issue where the hardware resources for the sensor implementation on specific array locations have already been used up by the application logic. Experimental results using MCNC benchmarks show that our proposed technique uses 62.4\% and 19.7\% less number of sensors to monitor hotspots on the average than that used by the grid-based and the bisection-based approaches while the overhead of auxiliary circuitry is minimized, respectively.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "optimal placement; reconfigurable system; Thermal sensor; unate-covering problem", } @Article{Yuh:2009:TTB, author = "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang", title = "{T}-trees: a tree-based representation for temporal and three-dimensional floorplanning", journal = j-TODAES, volume = "14", number = "4", pages = "51:1--51:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562519", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Improving logic capacity by time-sharing, dynamically reconfigurable FPGAs are employed to handle designs of high complexity and functionality. In this article, we model each task as a 3D-box and deal with the temporal floorplanning/placement problem for dynamically reconfigurable FPGA architectures. We present a tree-based data structure, called {\em T-trees}, to represent the spatial and temporal relations among tasks. Each node in a T-tree has at most three children which represent the dimensional relationship among tasks. For the T-tree, we develop an efficient packing method and derive the condition to ensure the satisfaction of precedence constraints which model the temporal ordering among tasks induced by the execution of dynamically reconfigurable FPGAs. Experimental results show that our tree-based formulation can obtain significantly better solution quality with less execution time than the most recent state-of-the-art work.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "partially dynamical reconfiguration; Reconfigurable computing; temporal floorplanning", } @Article{Yuh:2009:LAT, author = "Ping-Hung Yuh and Chia-Lin Yang and Chi-Feng Li and Chung-Hsiang Lin", title = "Leakage-aware task scheduling for partially dynamically reconfigurable {FPGAs}", journal = j-TODAES, volume = "14", number = "4", pages = "52:1--52:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562520", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As technology continues to shrink, reducing leakage power of Field-Programmable Gate Arrays (FPGAs) becomes a critical issue for the practical use of FPGAs. In this article, we address the leakage issue of partially dynamically reconfigurable FPGA architectures with sleep transistors embedded into FPGA fabrics. In particular, we focus on eliminating leakage waste due to the delay between reconfiguration and execution time of a task. For partially dynamically reconfigurable FPGAs, the configuration prefetching technique is commonly used to hide runtime reconfiguration overhead. With prefetching, the configuration of a task is loaded into FPGAs as early as possible. Therefore, there is often a delay between reconfiguration and execution time of a task. In this period of time, the SRAM cells allocated to a task cannot be turned off even though they are not utilized.\par In this article, we propose a two-stage task scheduling methodology to reduce leakage waste due to the delay between reconfiguration and execution time of a task without sacrificing performance. In the first stage, a performance-driven task scheduler that targets at minimizing the schedule length is invoked to generate an initial placement. In the second stage, a postplacement leakage-aware task scheduling is applied to refine the initial placement such that leakage waste is minimized provided that the schedule length is not increased. To solve the postplacement leakage optimization problem, we propose two algorithms. The first one is an optimal algorithm based on Integer Linear Programming (ILP). The second algorithm is a heuristic approach that iteratively refines the placement to reduce leakage waste. Experimental results on real and synthetic designs show that the efficiency and effectiveness of the proposed postplacement leakage reduction techniques.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "leakage; partially dynamical reconfiguration; placement; Reconfigurable computing; scheduling", } @Article{Chen:2009:LRD, author = "Po-Yuan Chen and Chiao-Chen Fang and Tingting Hwang and Hsi-Pin Ma", title = "Leakage reduction, delay compensation using partition-based tunable body-biasing techniques", journal = j-TODAES, volume = "14", number = "4", pages = "53:1--53:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562521", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In recent years, fabrication technology of CMOS has scaled to nanometer dimensions. As scaling progresses, several new challenges follow. Among them, the most noticeable two are process variations and leakage current of the circuit. To tackle the problems of process variations and leakage current, an effective way is to use a body-biasing technique. In substance, using the RBB technique can minimize leakage current but increase the delay of a gate. Contrary to RBB, the FBB technique decreases the delay but increases leakage current of a gate. In the previous work, a single body-biasing is applied to the whole circuit. In a slow circuit, since the FBB is applied to the whole circuit, the leakage current of all gates in the circuit increases dramatically. On the other hand, in a fast circuit, RBB is applied to decrease the leakage current. However, without violating the timing specification, the value of body-biasing is restricted by the critical paths, and the saving of leakage current is limited. In this article, we propose a design flow to partition the circuit into subcircuits so that each subcircuit can be applied its individual RBB or FBB. Experiments show that our method is able to save leakage current from 42\% to 47\% as compared to designs not using a body-biasing technique. Under process variations, our method can save 42\% to 49\% leakage on fast circuits and 20\% to 35\% on slow circuits.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Body biasing; leakage current; low-power design; process variations", } @Article{Ranganathan:2009:VAM, author = "Nagarajan Ranganathan and Upavan Gupta and Venkataraman Mahalingam", title = "Variation-aware multimetric optimization during gate sizing", journal = j-TODAES, volume = "14", number = "4", pages = "54:1--54:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562522", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The aggressive scaling of technology has not only accentuated the effects of intradie parametric variations in devices, but it has also impacted the effects of optimizing a certain performance metric on the optimality of other metrics. Thus, there is a need for optimization methods that can perform the simultaneous optimization of multiple metrics considering the effects of process variations. In this article, a novel variation-aware gate sizing framework has been developed that can perform simultaneous optimization of multiple performance metrics. In this framework, the relationships between the optimization metrics (like dynamic power, leakage power, and crosstalk noise) are modeled as a function of the gate sizes in the objective function. The delay values obtained from unconstrained delay optimization and the noise margins derived from coupling capacitance information form the constraints for the multimetric optimization problem. As an abstract framework, it is independent of the type of mathematical programming approach as well as the metrics chosen to be optimized. The framework has been implemented using a mathematical programming approach and has been tested on ITC'99 benchmarks for different combinations of multimetric and single-metric optimizations of delay, dynamic power, leakage power, and crosstalk noise. The results indicate that the framework identifies good solution points, and is efficient for postlayout optimization via gate sizing.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "crosstalk noise; delay; Gate sizing; mathematical programming; optimization; power", } @Article{Moiseev:2009:PDO, author = "Konstantin Moiseev and Avinoam Kolodny and Shmuel Wimer", title = "Power-delay optimization in {VLSI} microprocessors by wire spacing", journal = j-TODAES, volume = "14", number = "4", pages = "55:1--55:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1562523", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The problem of optimal space allocation among interconnect wires in a VLSI layout, in order to minimize the switching power consumption and the average signal delay, is addressed in this article. We define a Weighted Power-Delay Sum (WPDS) objective function and derive necessary and sufficient conditions for the existence of optimal interwire space allocation, based on the notion of capacitance density. At the optimum, every wire must be in equilibrium of its line-to-line weighted capacitance density on its two opposite sides, and the WPDS of the whole circuit is minimal if and only if capacitance density is uniformly distributed across the entire layout. This condition is shown to be equivalent to all paths of the layout cross-capacitance graph having the same length and all cuts having the same flow. An implementation which has been used in the design of a recent commercial high-end microprocessor and yielded 17\% power reduction and 9\% delay reduction in top-level interconnects is presented.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "delay-optimization; interconnect optimization; power optimization; Wire spacing", } @Article{Engelke:2009:SSU, author = "Piet Engelke and Bernd Becker and Michel Renovell and Juergen Schloeffel and Bettina Braitling and Ilia Polian", title = "{SUPERB}: {Simulator Utilizing Parallel Evaluation of Resistive Bridges}", journal = j-TODAES, volume = "14", number = "4", pages = "56:1--56:??", month = aug, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1562514.1596831", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 27 14:38:55 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A high-performance resistive bridging fault simulator SUPERB (Simulator Utilizing Parallel Evaluation of Resistive Bridges) is proposed. It is based on fault sectioning in combination with parallel-pattern or parallel-fault multiple-stuck-at simulation. It outperforms a conventional interval-based resistive bridging fault simulator by three orders of magnitude while delivering identical results. Further competing tools are outperformed by several orders of magnitude. Industrial-size circuits, including a multi-million-gates design, could be simulated with runtimes within an order of magnitude of the runtimes for pattern-parallel stuck-at fault simulation.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "bridging fault simulation; fault mapping; PPSFP; Resistive bridging faults; SPPFP", } @Article{Chang:2009:DIE, author = "Li-Pin Chang and Chun-Da Du", title = "Design and implementation of an efficient wear-leveling algorithm for solid-state-disk microcontrollers", journal = j-TODAES, volume = "15", number = "1", pages = "6:1--6:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640463", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Solid-state disks (SSDs) are storage devices that emulate hard drives with flash memory. They have been widely deployed in mobile computers as disk drive replacements. Flash memory is organized in terms of erase blocks. With the current technology, a block can reach the end of its lifetime after thousands of erasure operations. Wear leveling is a technique to evenly erase the entire flash memory so that all blocks remain alive as long as possible. This study introduces a new wear-leveling algorithm based the observation that, under a real-life mobile PC's workload, most erasure operations are contributed by a small fraction of blocks. Our key ideas are (1) moving rarely updated data to a block that is extraordinarily worn and (2) avoiding repeatedly involving a block in wear-leveling activities. This study presents a successful implementation of the proposed wear-leveling algorithm using about 200 bytes of RAM in an SSD controller rated at 33 MHz. Evaluation results show that this algorithm achieves even wear of the entire flash memory while reducing the overheads of extra flash-memory operations.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "embedded systems; flash memory; solid-state disks; Wear leveling", } @Article{Geelen:2009:SLE, author = "Bert Geelen and Vissarion Ferentinos and Francky Catthoor and Gauthier Lafruit and Diederik Verkest and Rudy Lauwereins and Thanos Stouraitis", title = "Spatial locality exploitation for runtime reordering of {JPEG2000} wavelet data layouts", journal = j-TODAES, volume = "15", number = "1", pages = "8:1--8:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640465", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Exploitation of spatial locality is essential for memories to increase the access bandwidth and to reduce the access-related latency and energy per word. Spatial locality exploitation of a kernel can be improved by modifying placement of data in memory, but this may be felt not only by the kernel itself, but also in other application components accessing the same data. Thus care is needed to avoid global miss-rate improvements are thwarted by miss-rate increases in other application components. This article examines application-level miss-rate increases due to handling modified Wavelet Transform data layouts by explicitly reordering at runtime, exploiting the execution order freedom within a reordering buffer when the layout of surrounding components is known. For the JPEG2000 application, taking into account the reordering costs still results in 80\% net WT miss-rate gains.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Layout transformations; spatial locality; wavelet transform", } @Article{Keutzer:2009:ATD, author = "Kurt Keutzer and Peng Li and Li Shang and Hai Zhou", title = "{ACM Transactions on Design Automation of Electronic Systems (TODAES)} special section call for papers: {Parallel CAD}: Algorithm design and programming", journal = j-TODAES, volume = "15", number = "1", pages = "9:1--9:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640466", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2009:MLP, author = "Jaehyun Kim and Chungki Oh and Youngsoo Shin", title = "Minimizing leakage power of sequential circuits through mixed-{$ V_t $} flip-flops and multi-{$ V_t $} combinational gates", journal = j-TODAES, volume = "15", number = "1", pages = "4:1--4:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640461", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The current use of multi-$ V_t $ to control leakage power targets combinational gates, even though sequential elements such as flip-flops and latches also contribute appreciable leakage. We can, nevertheless, apply multi-$ V_t $ to flip-flops, but few can take advantage of high-$ V_t $, which causes abrupt changes in timing. We combine low- and high-$ V_t $ at the transistor level to design mixed-$ V_t $ flip-flops with reduced leakage, an unchanged footprint, and a small increase in either setup time or clock-to-Q delay, but not both. An allocation algorithm for two $ V_t $'s determines the $ V_t $ (mixed, high, or low) of each flip-flop and the $ V_t $ of each combinational gate (high or low) in a sequential circuit. Experiments with 65-nm technology show an average leakage saving of 42\% compared to conventional multi-$ V_t $ approaches; the leakage of flip-flops alone is cut by 78\%. This saving is largely unaffected by die-to-die or within-die process variations, which we show through simulations. Standard deviation of leakage caused by process variation is also reduced due to less use of low-$ V_t $ devices. We also extend our approach to three $ V_t $'s, and obtain a further 14\% reduction in leakage.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Flip-flop; leakage current; low power; mixed- V t; sequential circuit", } @Article{Mu:2009:AHS, author = "Jingqing Mu and Roman Lysecky", title = "Autonomous hardware\slash software partitioning and voltage\slash frequency scaling for low-power embedded systems", journal = j-TODAES, volume = "15", number = "1", pages = "2:1--2:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640459", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Warp processing is a recent computing technology capable of autonomously partitioning the critical kernels within an executing software application to hardware circuits implemented within an on-chip FPGA. While previous performance-driven warp processing has been shown to provide significant performance improvements over software only execution, the dynamic performance improvement of warp processors may be lost for certain application domains, such as real-time systems. Alternatively, as power consumption continue to become a dominant design constraint, we present and thoroughly analyze a low-power warp processing methodology that leverages voltage and/or frequency scaling to substantially reduce power consumption without any performance degradation --- all without requiring designer effort beyond the initial software development.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "dynamically adaptable systems; hardware/software partitioning; low-power; low-power FPGAs; reconfigurable computing; Warp processing", } @Article{Pomeranz:2009:UST, author = "Irith Pomeranz and Sudhakar M. Reddy", title = "Using stuck-at tests to form scan-based tests for transition faults in standard-scan circuits", journal = j-TODAES, volume = "15", number = "1", pages = "7:1--7:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640464", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In enhanced-scan circuits, a two-pattern test for a transition fault can be obtained by using a test {\em t$_j$ \/} that detects a stuck-at fault, and preceding it by a test {\em t$_i$ \/} that activates another stuck-at fault. Thus, test generation for transition faults can be done by combining pairs of stuck-at tests. This provides an alternative to deterministic test generation, as well as reduces the test storage requirements for transition fault tests. We study the possibility of generating scan-based tests for transition faults in standard-scan circuits in a similar way, by combining pairs of stuck-at tests. Since it is not always possible to obtain a standard-scan test that is equivalent to a two-pattern test based on stuck-at tests {\em t$_i$ \/} and {\em t$_j$}, it is not always possible to guarantee that the combination of {\em t$_i$ \/} and {\em t$_j$ \/} will detect a transition fault. To compensate for this, it is necessary to try combinations of different stuck-at test pairs, resulting in an increased simulation effort to compute effective standard-scan tests. Our focus in this work is on reducing this simulation effort by reducing the number of stuck-at test pairs that need to be considered.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Broadside tests; scan circuits; skewed-load tests; stuck-at faults; transition faults", } @Article{Rao:2009:COT, author = "Rajeev R. Rao and Vivek Joshi and David Blaauw and Dennis Sylvester", title = "Circuit optimization techniques to mitigate the effects of soft errors in combinational logic", journal = j-TODAES, volume = "15", number = "1", pages = "5:1--5:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640462", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Soft errors in combinational logic circuits are emerging as a significant reliability problem for VLSI designs. Technology scaling trends indicate that the soft error rates (SER) of logic circuits will be dominant factor for future technology generations. SER mitigation in logic can be accomplished by optimizing either the gates inside a logic block or the flipflops present on the block boundaries. We present novel circuit optimization techniques that target these elements separately as well as in unison to reduce the SER of combinational logic circuits.\par First, we describe the construction of a new class of flip-flop variants that leverage the effect of temporal masking by selectively increasing the length of the latching window thereby preventing faulty transients from being registered. In contrast to previous flip-flop designs that rely on logic duplication and complicated circuit design styles, the new variants are redesigned from the library flip-flop using efficient transistor sizing. We then propose a flip-flop selection method that uses slack information at each primary output node to determine the flip-flop configuration that produces maximum SER savings. Next, we propose a gate sizing algorithm that trades off SER reduction and area overhead. This approach first computes bounds on the maximum achievable SER reduction by resizing a gate. This bound is then used to prune the circuit graph, arriving at a smaller set of candidate gates on which we perform incremental sensitivity computations to determine the gates that are the largest contributors to circuit SER. Third, we propose a unified, co-optimization approach combining flip-flop selection with the gate sizing algorithm. The joint optimization algorithm produces larger SER reductions while incurring smaller circuit overhead than either technique taken in isolation. Experimental results on a variety of benchmarks show average SER reductions of 10.7X with gate sizing, 5.7X with flip-flop assignment, and 30.1X for the combined optimization approach, with no delay penalties and area overheads within 5-6\%. The runtimes for the optimization algorithms are on the order of 1-3 minutes.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "circuit optimization; combinational logic; sequential circuits; Soft errors", } @Article{Wolinski:2009:ADA, author = "Christophe Wolinski and Krzysztof Kuchcinski and Erwan Raffin", title = "Automatic design of application-specific reconfigurable processor extensions with {UPaK} synthesis kernel", journal = j-TODAES, volume = "15", number = "1", pages = "1:1--1:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640458", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a new tool for automatic design of application-specific reconfigurable processor extensions based on UPaK (Abstract Unified Patterns Based Synthesis Kernel for Hardware and Software Systems). We introduce a complete design flow that identifies new instructions, selects specific instructions and schedules a considered application on the newly created reconfigurable architecture. The identified extensions are implemented as specialized sequential or parallel instructions. These instructions are executed on a reconfigurable unit implementing all merged patterns. Our method uses specially developed algorithms for subgraph isomorphism that are implemented as graph matching constraints. These constraints together with separate algorithms are able to efficiently identify computational patterns and carry out application mapping and scheduling. Our methods can handle both time-constrained and resource-constrained scheduling. Experimental results show that the presented method provides high coverage of application graphs with small number of patterns and ensures high application execution speedup both for sequential and parallel application execution with reconfigurable processor extensions implementing selected patterns.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "constraint programming; Reconfigurable architectures; resource assignment; scheduling; system-level synthesis", } @Article{Wu:2009:PCV, author = "Meng-Chen Wu and Ming-Ching Lu and Hung-Ming Chen and Jing-Yang Jou", title = "Performance-constrained voltage assignment in multiple supply voltage {SoC} floorplanning", journal = j-TODAES, volume = "15", number = "1", pages = "3:1--3:??", month = dec, year = "2009", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1640457.1640460", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:18:31 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Using voltage island methodology to reduce power consumption for System-on-a-Chip (SoC) designs has become more and more popular recently. Currently this approach has been considered either in system-level architecture or postplacement stage. Since hierarchical design and reusable intellectual property (IP) are widely used, it is necessary to optimize floorplanning/placement methodology considering voltage islands generation to solve power and critical path delay problems. In this article, we propose a floorplanning methodology considering voltage islands generation and performance constraints. Our method is flexible and can be extended to hierarchical design. The experimental results on some MCNC benchmarks show that our method is effective in meeting performance constraints and can simultaneously consider the tradeoff between power routing cost and total power dissipation.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cabodi:2010:SHA, author = "Gianpiero Cabodi and Luciano Lavagno and Marco Murciano and Alex Kondratyev and Yosinori Watanabe", title = "Speeding-up heuristic allocation, scheduling and binding with {SAT}-based abstraction\slash refinement techniques", journal = j-TODAES, volume = "15", number = "2", pages = "12:1--12:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698762", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hardware synthesis is the process by which system-level, Register Transfer (RT)-level, or behavioral descriptions can be turned into real implementations, in terms of logic gates. Scheduling is one of the most time-consuming steps in the overall design flow, and may become much more complex when performing hardware synthesis from high-level specifications. Exploiting a single scheduling strategy on very large designs is often reductive and potentially inadequate. Furthermore, finding the ``best'' single candidate among all possible scheduling algorithms is practically infeasible. In this article we introduce a hybrid scheduling approach that is a preliminary step towards a comprehensive solution not yet provided by industrial or by academic solutions. Our method relies on an abstract symbolic representation of data flow nodes (operations) bound to control flow paths: it produces a more realistic lower bound during the prescheduling resource estimation step and speeds up slower but accurate heuristic scheduling techniques, thus achieving a globally improved result.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "allocation; binding; High level synthesis; resource estimation; satisfiability; scheduling", } @Article{Chang:2010:CPA, author = "Naehyuck Chang and J{\"o}rg Henkel", title = "Call for papers: {ACM Transactions on Design Automation of Electronic Systems (TODAES)} special section on low-power electronics and design", journal = j-TODAES, volume = "15", number = "2", pages = "20:1--20:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698770", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Das:2010:TAM, author = "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar", title = "Thermal analysis of multiprocessor {SoC} applications by simulation and verification", journal = j-TODAES, volume = "15", number = "2", pages = "15:1--15:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698765", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Overheating of computer chips leads to degradation of performance and reliability. Therefore, preventing chips from overheating in spite of increased performance requirements has emerged as a major challenge. Since the cost of cooling has been rising steadily, various architecture and application design techniques are used to prevent chip overheating. Temperature-aware task scheduling has emerged as an important application design methodology for addressing this problem in multiprocessor SoC systems.\par In this work we present the formulation and implementation of a method for analyzing the thermal (chip heating) behavior of a MPSoC task schedule, during the early stages of the design. We highlight the challenges in developing such a framework and propose solutions for tackling them. Due to nondeterminism in task execution times and decision branches, multiprocessor applications cannot be evaluated accurately by the current state-of-the-art {\em thermal\/} {\em simulation\/} and {\em steady-state\/} analysis methods. Hence an analysis covering nondeterministic execution behaviors is required for thermal analysis of MPSoC task schedules. To address this issue we propose a model checking-based approach for solving the thermal analysis problem and formulate it as a hybrid automata reachability verification problem. We present an algorithm for constructing this hybrid automata given the task schedule, a set of power profiles of tasks, and the Compact Thermal Model (CTM) of the chip. Information about task power consumption is inferred from Markov chains which are learned from power profiles of tasks, obtained from simulation or emulation runs. A numerical analysis-based algorithm which uses CounterExample-Guided Abstraction Refinement (CEGAR) is developed for reachability analysis of this hybrid automata. We propose a directed simulation methodology which uses results of a time-bounded analysis of the hybrid automata modeling thermal behavior of the application, to simulate the expected worst-case execution runs of the same. The algorithms presented in this work have been implemented in a prototype tool called {\em HeatCheck}. We present experimental results and analysis of thermal behavior of a set of task schedules executing on a MPSoC system.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "chip temperature; hybrid automata; Markov chain; multiprocessor system-on-chip; Thermal analysis", } @Article{Jamieson:2010:BER, author = "Peter Jamieson and Tobias Becker and Peter Y. K. Cheung and Wayne Luk and Tero Rissa and Teemu Pitk{\"a}nen", title = "Benchmarking and evaluating reconfigurable architectures targeting the mobile domain", journal = j-TODAES, volume = "15", number = "2", pages = "14:1--14:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698764", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present the GroundHog 2009 benchmarking suite that evaluates the power consumption of reconfigurable technology for applications targeting the mobile computing domain. This benchmark suite includes seven designs; one design targets fine-grained FPGA fabrics allowing for quick state-of-the-art evaluation, and six designs are specified at a high level allowing them to target a range of existing and future reconfigurable technologies. Each of the six designs can be stimulated with the help of synthetically generated input stimuli created by an open-source tool included in the downloadable suite. Another tool is included to help verify the correctness of each implemented design. To demonstrate the potential of this benchmark suite, we evaluate the power consumption of two modern industrial FPGAs targeting the mobile domain. Also, we show how an academic FPGA framework, VPR 5.0, that has been updated for power estimates can be used to estimates the power consumption of different FPGA architectures and an open-source CAD flow mapping to these architectures.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "benchmark; Benchmarking; FPGAs; mobile; power", } @Article{Kurimoto:2010:PAE, author = "Masanori Kurimoto and Hiroaki Suzuki and Rei Akiyama and Tadao Yamanaka and Haruyuki Ohkuma and Hidehiro Takata and Hirofumi Shinohara", title = "Phase-adjustable error detection flip-flops with 2-stage hold-driven optimization, slack-based grouping scheme and slack distribution control for dynamic voltage scaling", journal = j-TODAES, volume = "15", number = "2", pages = "17:1--17:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698767", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "For Dynamic Voltage Scaling (DVS), we propose a novel design methodology. This methodology is composed of an error detection circuit and three technologies to reduce the area and power penalties which are the large issues for the conventional DVS with error detection. The proposed circuit, Phase-Adjustable Error Detection Flip-Flip (PEDFF), adjusts the clock phase of an additional FF for the timing error detection, based on the timing slack. 2-Stage Hold-Driven Optimization (2-SHDO) technology splits the hold-driven optimization in two stages. Slack-Based Grouping Scheme (SBGS) technology divides each timing path into appropriate groups based on the timing slack. Slack Distribution Control (SDC) technology improves the sharp distribution of the path delay at which the logic synthesis tool has relaxed the delay. We evaluate the methodology by simulating a 32-bit microprocessor in 90 nm CMOS technology. The proposed methodology reduces the energy consumption by 19.8\% compared to non-DVS. The OR-tree's latency is shortened to 16.3\% compared to the conventional DVS. The area and power penalties for delay buffers on short paths are reduced to 35.0\% and 40.6\% compared to the conventional DVS, respectively. The proposed methodology with SDC reduces the energy consumption by 17.0\% on another example with the sharp slack distribution by the logic synthesis compared to non-DVS.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "CTS; DVS; Error detection flip-flop; P{\&} R; STA", } @Article{Kwon:2010:SPC, author = "Seongnam Kwon and Soonhoi Ha", title = "Serialized parallel code generation framework for {MPSoC}", journal = j-TODAES, volume = "15", number = "2", pages = "11:1--11:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698761", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The models of computations that express concurrency naturally are preferred for initial specification of MPSoC system, since popular programming languages such as C and C++ are designed for sequential execution. In our previous work, we proposed a design framework where two models are used for the initial specification of the system behavior; task model at the top level and dataflow model inside each task. After the partition and mapping process is performed with each architecture candidate, the target code is automatically generated for both Design-Space Exploration (DSE) and final implementation. In this article, we focus on parallel code generation for MPSoC, proposing two main techniques. The first is to express functional and data parallelism differently following the partition and mapping decision. In the proposed technique, the generated code consists of multiple tasks running concurrently, which achieves functional parallelism. On the other hand, we use OpenMP directives to express data parallelism inside a task. Second is to adopt the code serialization technique to execute a multitasking application without OS scheduler, aiming to generate the highly portable code on various platforms for an efficient DSE process. We extend the previous code serialization techniques to multiprocessor systems and utilize the formal properties of the dataflow model for efficient code generation. The experiments including H.263 codec example show the viability of the proposed technique and the efficiency of the generated code.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design-space exploration; Embedded software; multiprocessor system on chip; parallel programming; software generation", } @Article{Li:2010:PAL, author = "Duo Li and Sheldon X.-D. Tan and Eduardo H. Pacheco and Murli Tirumala", title = "Parameterized architecture-level dynamic thermal models for multicore microprocessors", journal = j-TODAES, volume = "15", number = "2", pages = "16:1--16:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698766", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a new architecture-level parameterized dynamic thermal behavioral modeling algorithm for emerging thermal-related design and optimization problems for high-performance multicore microprocessor design. We propose a new approach, called {\em ParThermPOF}, to build the parameterized thermal performance models from the given accurate architecture thermal and power information. The new method can include a number of variable parameters such as the locations of thermal sensors in a heat sink, different components (heat sink, heat spreader, core, cache, etc.), thermal conductivity of heat sink materials, etc. The method consists of two steps: first, a response surface method based on low-order polynomials is applied to build the parameterized models at each time point for all the given sampling nodes in the parameter space. Second, an improved Generalized Pencil-Of-Function (GPOF) method is employed to build the transfer-function-based behavioral models for each time-varying coefficient of the polynomials generated in the first step. Experimental results on a practical quad-core microprocessor show that the generated parameterized thermal model matches the given data very well. The compact models by ParThermPOF offer two order of magnitudes speedup over the commercial thermal analysis tool {\em FloTHERM\/} on the given examples. ParThermPOF is very suitable for design space exploration and optimization where both time and system parameters need to be considered.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "architecture; behavioral modeling; chip-multiprocessor; Multicore; thermal modeling", } @Article{Paul:2010:LOC, author = "Somnath Paul and Hamid Mahmoodi and Swarup Bhunia", title = "Low-overhead {$ F_{\hbox {max}} $} calibration at multiple operating points using delay-sensitivity-based path selection", journal = j-TODAES, volume = "15", number = "2", pages = "19:1--19:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698769", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Maximum operating frequency ({\em F\/}$_{{\em max \/ }}$) of a system often needs to be determined at multiple operating points, defined by voltage and temperatures. Such calibration is important for the speed binning process, where the voltage-frequency (V- {\em F\/}$_{{\em max \/ }}$) relation needs to be accurately determined to sort chips into different bins that can be used for different applications. Moreover, adaptive systems typically require {\em F\/}$_{{\em max \/ }}$ calibration at multiple operating points in order to dynamically change operating condition such as supply voltage or body bias for power, temperature, or throughput management. For example, a Dynamic Voltage and Frequency Scaling (DVFS) system requires accurate delay calibration at multiple operating voltages in order to apply the correct operating frequency corresponding to a scaled supply. In this article, we propose a low-overhead design technique that allows efficient characterization of {\em F\/}$_{{\em max \/ }}$ at different operating voltages and temperatures. The proposed method selects a set of representative timing paths in a circuit based on their temperature and voltage sensitivities and dynamically configures them into a ring oscillator to compute the critical path delay. Compared to existing {\em F\/}$_{{\em max \/ }}$ calibration approaches, the proposed approach provides the following two main advantages: (1) it introduces a delay sensitivity metric to isolate few representative timing paths; (2) it considers actual timing paths instead of critical path replicas, thereby accounting for local within-die delay variations. The all-digital calibration method is robust under process variations and achieves high delay estimation accuracy (> 4\% error) at the cost of negligible design overhead (1.7\% in delay, 0.3\% in power, and 3.5\% in die-area).", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "F max calibration; frequency binning; temperature adaptation", } @Article{Reviriego:2010:RAM, author = "Pedro Reviriego and Juan Antonio Maestro and Chris J. Bleakley", title = "Reliability analysis of memories protected with {BICS} and a per-word parity bit", journal = j-TODAES, volume = "15", number = "2", pages = "18:1--18:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698768", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents an analysis of the reliability of memories protected with Built-in Current Sensors (BICS) and a per-word parity bit when exposed to Single Event Upsets (SEUs). Reliability is characterized by Mean Time to Failure (MTTF) for which two analytic models are proposed. A simple model, similar to the one traditionally used for memories protected with scrubbing, is proposed for the low error rate case. A more complex Markov model is proposed for the high error rate case. The accuracy of the models is checked using a wide set of simulations. The results presented in this article allow fast estimation of MTTF enabling design of optimal memory configurations to meet specified MTTF goals at minimum cost. Additionally the power consumption of memories protected with BICS is compared to that of memories using scrubbing in terms of the number of read cycles needed in both configurations.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "built-in current sensors; Error correcting codes; Fault-tolerant memory; high-level protection technique", } @Article{Schirner:2010:FAP, author = "Gunar Schirner and Andreas Gerstlauer and Rainer D{\"o}mer", title = "Fast and accurate processor models for efficient {MPSoC} design", journal = j-TODAES, volume = "15", number = "2", pages = "10:1--10:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698760", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With growing system complexity and ever-increasing software content, the development of embedded software for upcoming MPSoC architectures is a tremendous challenge. Traditional ISS-based validation becomes infeasible due to the large complexity.\par Addressing the need for flexible and fast simulating models, we introduce in this article our approach of abstract processor modeling in the context of multiprocessor architectures. We combine modeling of computation on processors with an abstract RTOS and accurate interrupt handling into a versatile, multifaceted processor model with several levels of features.\par Our processor models are utilized in a framework allowing designers to develop a system in a top-down manner using automatic model generation and compilation down to a given MPSoC architecture. During generation, instances of our processor models are integrated into a system model combining software, hardware, and bus communication. The generated system model serves for rapid design space exploration and a fast and accurate system validation.\par Our experimental results show the benefits of our processor modeling using an actual multiprocessor mobile phone baseband platform. Our abstract models of this complex system reach a simulation speed of 300MCycles/s within a high accuracy of less than 3\% error. In addition, our results quantify the speed/accuracy trade-off at varying abstraction levels of our models to guide future processor model designers.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "MPSoC; multi-processor system-on-chip; performance prediction/estimation; Processor modeling; system-level design; TLM; transaction-level model", } @Article{Yuan:2010:HSP, author = "Mingxuan Yuan and Zonghua Gu and Xiuqiang He and Xue Liu and Lei Jiang", title = "Hardware\slash software partitioning and pipelined scheduling on runtime reconfigurable {FPGAs}", journal = j-TODAES, volume = "15", number = "2", pages = "13:1--13:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698763", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "FPGAs are widely used in today's embedded systems design due to their low cost, high performance, and reconfigurability. Partially RunTime-Reconfigurable (PRTR) FPGAs, such as Virtex-2 Pro and Virtex-4 from Xilinx, allow part of the FPGA area to be reconfigured while the remainder continues to operate without interruption, so that HW tasks can be placed and removed dynamically at runtime. We address two problems related to HW task scheduling on PRTR FPGAs: (1) HW/SW partitioning. Given an application in the form of a task graph with known execution times on the HW (FPGA) and SW (CPU), and known area sizes on the FPGA, find an valid allocation of tasks to either HW or SW and a static schedule with the optimization objective of minimizing the total schedule length (makespan). (2) Pipelined scheduling. Given an input task graph, construct a pipelined schedule on a PRTR FPGA with the goal of maximizing system throughput while meeting a given end-to-end deadline. Both problems are NP-hard. Satisfiability Modulo Theories (SMT) is an extension to SAT by adding the ability to handle arithmetic and other decidable theories. We use the SMT solver Yices with Linear Integer Arithmetic (LIA) theory as the optimization engine for solving the two scheduling problems. In addition, we present an efficient heuristic algorithm based on kernel recognition for the pipelined scheduling problem, a technique borrowed from SW pipelining, to overcome the scalability problem of the SMT-based optimal solution technique.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "HW/SW partitioning; runtime reconfigurable FPGA; scheduling", } @Article{Blanc:2010:RAS, author = "Nicolas Blanc and Daniel Kroening", title = "Race analysis for {SystemC} using model checking", journal = j-TODAES, volume = "15", number = "3", pages = "21:1--21:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754406", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "SystemC is a system-level modeling language that offers a wide range of features to describe concurrent systems at different levels of abstraction. The SystemC standard permits simulators to implement a deterministic scheduling policy, which often hides concurrency-related design flaws. We present a novel compiler for SystemC that integrates a very precise formal race analysis by means of model checking. Our compiler produces a simulator that uses the outcome of the analysis to perform partial order reduction. The key insight to make the model checking engine scale is to apply it only to tiny fractions of the SystemC model. We show that the outcome of the analysis is not only valuable to eliminate redundant context switches at runtime, but can also be used to diagnose race conditions statically. In particular, our analysis is able to reveal races that can remain undetected during simulation and is able to formally prove the absence of races.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "formal analysis; model checking; partial-order reduction; simulation; SystemC", } @Article{Ahmed:2010:CBP, author = "Waseem Ahmed and Douglas Myers", title = "Concept-based partitioning for large multidomain multifunctional embedded systems", journal = j-TODAES, volume = "15", number = "3", pages = "22:1--22:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754407", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hardware-software partitioning is an important phase in embedded systems. Decisions made during this phase impact the quality, cost, performance, and the delivery date of the final product. Over the past decade or more, various partitioning approaches have been proposed. A majority operate at a relatively fine granularity and use a low-level executable specification as the starting point. This presents problems if the context is families of industrial products with frequent release of upgraded or new members. Managing complexity using a low-level specification is extremely challenging and impacts developer productivity. Designing using a high-level specification and component-based development, although a better option, imposes component integration and replacement problems during system evolution and new product release. A new approach termed Concept-Based Partitioning is presented that focuses on system evolution, product lines, and large-scale reuse when partitioning. Beginning with information from UML 2.0 sequence diagrams and a concept repository concepts are identified and used as the unit of partitioning within a specification. A methodology for the refinement of interpart communication in the system specification using sequence diagrams is also presented. Change localization during system evolution, composability during large-scale reuse, and provision for configurable feature variations for a product line are facilitated by a Generic Adaptive Layer (GAL) around selected concepts. The methodology was applied on a subsystem of an Unmanned Aerial Vehicle (UAV) using various concepts which improved the composability of concepts while keeping performance and size overhead within the 2\% range.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Codesign; embedded system design; product families; system evolution; system partitioning; UML", } @Article{Raval:2010:LPT, author = "R. K. Raval and C. H. Fernandez and C. J. Bleakley", title = "Low-power {TinyOS} tuned processor platform for wireless sensor network motes", journal = j-TODAES, volume = "15", number = "3", pages = "23:1--23:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754408", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article we describe a low-power processor platform for use in Wireless Sensor Network (WSN) nodes (motes). WSN motes are small, battery-powered devices comprised of a processor, sensors, and a radio frequency transceiver. It is expected that WSNs consisting of large numbers of motes will offer long-term, distributed monitoring, and control of real-world equipment and phenomena. A key requirement for these applications is long battery life. We investigate a processor platform architecture based on an application-specific programmable processor core, System-On-Chip bus, and a hardware accelerator. The architecture improves on the energy consumption of a conventional microprocessor design by tuning the architecture for a suite of TinyOS-based WSN applications. The tuning method used minimizes changes to the instruction set architecture facilitating rapid software migration to the new platform. The processor platform was implemented and validated in an FPGA-based WSN mote. The benefits of the approach in terms of energy consumption are estimated to be a reduction of 48\% for ASIC implementation relative to a conventional programmable processor for a typical TinyOS application suite without use of voltage scaling.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Embedded system design; hardware-software codesign; low power processor; Wireless Sensor Network", } @Article{Guan:2010:RFP, author = "Xuan Guan and Yunsi Fei", title = "Register file partitioning and recompilation for register file power reduction", journal = j-TODAES, volume = "15", number = "3", pages = "24:1--24:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754409", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Register files in modern embedded processors contribute a substantial budget in the energy consumption due to their large switching capacitance and long working time. For some embedded processors, on average 25\% of registers account for 83\% of register file accessing time. This motivates us to partition the register file into hot and cold regions, with the most frequently used registers placed in the hot region, and the rarely accessed ones in the cold region. We employ the bit-line splitting and drowsy register cell techniques to reduce the overall register file accessing power. We propose a novel approach to partition the register in a way that can achieve the largest power saving. We formulate the register file partitioning process into a graph partitioning problem, and apply an effective algorithm to obtain the optimal result. We evaluate our algorithm for MiBench and SPEC2000 applications on the SimpleScalar PISA system, and an average saving of 58.3\% and 54.4\% over the nonpartitioned register file accessing power is achieved. The area overhead is negligible, and the execution time overhead is acceptable (5.5\% for MiBench 2.4\% for SPEC2000). Further evaluation for MiBench applications is performed on Alpha and X86 system.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "compilers; Low-power design; processor architectures; register file partitioning", } @Article{Zhang:2010:CSD, author = "Yufu Zhang and Ankur Srivastava and Mohamed Zahran", title = "On-chip sensor-driven efficient thermal profile estimation algorithms", journal = j-TODAES, volume = "15", number = "3", pages = "25:1--25:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754410", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article addresses the problem of chip-level thermal profile estimation using runtime temperature sensor readings. We address the challenges of: (a) availability of only a few thermal sensors with constrained locations (sensors cannot be placed just anywhere); (b) random chip power density characteristics due to unpredictable workloads and fabrication variability. Firstly we model the random power density as a probability density function. Given such statistical characteristics and the runtime thermal sensor readings, we exploit the correlation in power dissipation among different chip modules to estimate the expected value of temperature at each chip location. Our methods are optimal if the underlying power density has Gaussian nature. We give a heuristic method to estimate the chip-level thermal profile when the underlying randomness is non-Gaussian. An extension of our method has also been proposed to address the dynamic case. Several speedup strategies are carefully investigated to improve the efficiency of the estimation algorithm. Experimental results indicated that, given only a few thermal sensors, our method can generate highly accurate chip-level thermal profile estimates within a few milliseconds.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "estimation; on-chip sensor; statistical; Thermal profile", } @Article{Chang:2010:LSC, author = "Kai-Hui Chang and Valeria Bertacco and Igor L. Markov and Alan Mishchenko", title = "Logic synthesis and circuit customization using extensive external don't-cares", journal = j-TODAES, volume = "15", number = "3", pages = "26:1--26:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754411", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditional digital circuit synthesis flows start from an HDL behavioral definition and assume that circuit functions are almost completely defined, making don't-care conditions rare. However, recent design methodologies do not always satisfy these assumptions. For instance, third-party IP blocks used in a system-on-chip are often overdesigned for the requirements at hand. By focusing only on the input combinations occurring in a specific application, one could resynthesize the system to greatly reduce its area and power consumption. Therefore we extend modern digital synthesis with a novel technique, called SWEDE, that makes use of extensive external don't-cares. In addition, we utilize such don't-cares present implicitly in existing simulation-based verification environments for circuit customization. Experiments indicate that SWEDE scales to large ICs with half-million input vectors and handles practical cases well.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Circuit customization; don't-care optimization; logic synthesis", } @Article{Liu:2010:ECR, author = "Shenghua Liu and Guoqiang Chen and Tom Tong Jing and Lei He and Robi Dutta and Xian-Long Hong", title = "Effective congestion reduction for {IC} package substrate routing", journal = j-TODAES, volume = "15", number = "3", pages = "27:1--27:??", month = may, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1754405.1754412", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jun 21 17:21:11 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Off-chip substrate routing for high-density packages is challenging due to requirements such as high density, lack of vertical detour, non-Manhattan routing, and primarily planar routing. The existing substrate routing algorithms often result in a large number of unrouted nets that have to be routed manually. This article develops an effective yet efficient diffusion-driven method D-Router to reduce congestion. Starting with an initial routing, we develop an effective diffusion-based congestion reduction. We iteratively find a congested window and spread out connections to reduce congestion inside the window by a simulated diffusion process based on the duality between congestion and concentration. The window is released after the congestion is eliminated. Compared with the state-of-the-art substrate routing method that leads to 480 nets unrouted for ten industrial designs with a total of 6415 nets, the D-Router reduces the amount of unrouted nets to 104, a reduction to the 4.6 multiple. In addition, the D-Router obtains a similar reduction on unrouted nets but runs up to 94 times faster when compared with a negotiation-based substrate routing.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "congestion reduction; IC package; routability; routing; substrate", } @Article{Shin:2010:PGC, author = "Youngsoo Shin and Jun Seomun and Kyu-Myung Choi and Takayasu Sakurai", title = "Power gating: {Circuits}, design methodologies, and best practice for standard-cell {VLSI} designs", journal = j-TODAES, volume = "15", number = "4", pages = "28:1--28:??", month = sep, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1835420.1835421", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 6 09:42:42 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power Gating has become one of the most widely used circuit design techniques for reducing leakage current. Its concept is very simple, but its application to standard-cell VLSI designs involves many careful considerations. The great complexity of designing a power-gated circuit originates from the side effects of inserting current switches, which have to be resolved by a combination of extra circuitry and customized tools and methodologies. In this tutorial we survey these design considerations and look at the best practice within industry and academia. Topics include output isolation and data retention, current switch design and sizing, and physical design issues such as power networks, increases in area and wirelength, and power grid analysis. Designers can benefit from this tutorial by obtaining a better understanding of implications of power gating during an early stage of VLSI designs. We also review the ways in which power gating has been improved. These include reducing the sizes of switches, cutting transition delays, applying power gating to smaller blocks of circuitry, and reducing the energy dissipated in mode transitions. Power Gating has also been combined with other circuit techniques, and these hybrids are also reviewed. Important open problems are identified as a stimulus to research.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design methodology; leakage current; low power; Power gating; standard-cell; VLSI", } @Article{Yu:2010:PSA, author = "Cheng-Juei Yu and Yi-Hsin Wu and Sheng-De Wang", title = "An in-place search algorithm for the resource constrained scheduling problem during high-level synthesis", journal = j-TODAES, volume = "15", number = "4", pages = "29:1--29:??", month = sep, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1835420.1835422", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 6 09:42:42 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose an in-place search algorithm for computing the exact solutions to the resource constrained scheduling problem. This algorithm supports operation chaining, pipelining and multicycling in the underlying scheduling problem. Based on two lower-bound estimation mechanisms that are capable of predicting the criterion values of search nodes represented by partially scheduled data flow graphs, the proposed algorithm can effectively prune the nonpromising search space and finds the optimum usually several times faster than existing techniques. As opposed to existing search-based scheduling techniques whose space complexity is squared or exponential in the search depth, our approach requires only a constant storage space during the traversal of the search tree. The low space complexity is accomplished by using a combination-generating algorithm, which leads our approach to visit search nodes in such a way that each one is obtained by making only a small change to its sibling without keeping any parent nodes in memory. Experimental results on several well known benchmarks with varying resource constraints show the effectiveness of the proposed algorithm.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "Design automation; exact scheduling; high-level synthesis; optimal scheduling; resource-constrained scheduling", } @Article{Lee:2010:PTP, author = "Kyoungwoo Lee and Aviral Shrivastava and Nikil Dutt and Nalini Venkatasubramanian", title = "Partitioning techniques for partially protected caches in resource-constrained embedded systems", journal = j-TODAES, volume = "15", number = "4", pages = "30:1--30:??", month = sep, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1835420.1835423", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 6 09:42:42 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Increasing exponentially with technology scaling, the soft error rate even in earth-bound embedded systems manufactured in deep subnanometer technology is projected to become a serious design consideration. Partially protected cache (PPC) is a promising microarchitectural feature to mitigate failures due to soft errors in power, performance, and cost sensitive embedded processors. A processor with PPC maintains two caches, one protected and the other unprotected, both at the same level of memory hierarchy. The intuition behind PPCs is that not all data in the application is equally prone to soft errors. By finding and mapping the data that is more prone to soft errors to the protected cache, and error-resilient data to the unprotected cache, failures induced by soft errors can be significantly reduced at a minimal power and performance penalty. Consequently, the effectiveness of PPCs critically hinges on the compiler's ability to partition application data into error-prone and error-resilient data. The effectiveness of PPCs has previously been demonstrated on multimedia applications --- where an obvious partitioning of data exists, the multimedia data is inherently resilient to soft errors, and the rest of the data and the entire code is assumed to be error-prone. Since the amount of multimedia data is a quite significant component of the entire application data, this obvious partitioning is quite effective. However, no such obvious data and code partitioning exists for general applications. This severely restricts the applicability of PPCs to data caches and instruction caches in general. This article investigates vulnerability-based partitioning schemes that are applicable to applications in general and effectively reduce failures due to soft errors at minimal power and performance overheads.\par Our experimental results on an HP iPAQ-like processor enhanced with PPC architecture, running benchmarks from the MiBench suite demonstrate that our partitioning heuristic efficiently finds page partitions for data PPCs that can reduce the failure rate by 48\% at only 2\% performance and 7\% energy overhead, and finds page partitions for instruction PPCs that reduce the failure rate by 50\% at only 2\% performance and 8\% energy overhead, on average.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "embedded systems; Page partitioning technique; partially protected cache; soft error; vulnerability", } @Article{Bonny:2010:HBC, author = "Talal Bonny and J{\"o}rg Henkel", title = "{Huffman}-based code compression techniques for embedded processors", journal = j-TODAES, volume = "15", number = "4", pages = "31:1--31:??", month = sep, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1835420.1835424", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 6 09:42:42 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The size of embedded software is increasing at a rapid pace. It is often challenging and time consuming to fit an amount of required software functionality within a given hardware resource budget. Code compression is a means to alleviate the problem by providing substantial savings in terms of code size. In this article we introduce a novel and efficient hardware-supported compression technique that is based on Huffman Coding. Our technique reduces the size of the generated decoding table, which takes a large portion of the memory. It combines our previous techniques, Instruction Splitting Technique and Instruction Re-encoding Technique into new one called Combined Compression Technique to improve the final compression ratio by taking advantage of both previous techniques. The instruction Splitting Technique is instruction set architecture (ISA)-independent. It splits the instructions into portions of varying size (called patterns) before Huffman coding is applied. This technique improves the final compression ratio by more than 20\% compared to other known schemes based on Huffman Coding. The average compression ratios achieved using this technique are 48\% and 50\% for ARM and MIPS, respectively. The Instruction Re-encoding Technique is ISA-dependent. It investigates the benefits of reencoding unused bits (we call them reencodable bits) in the instruction format for a specific application to improve the compression ratio. Reencoding those bits can reduce the size of decoding tables by up to 40\%. Using this technique, we improve the final compression ratios in comparison to the first technique to 46\% and 45\% for ARM and MIPS, respectively (including all overhead that incurs). The Combined Compression Technique improves the compression ratio to 45\% and 42\% for ARM and MIPS, respectively. In our compression technique, we have conducted evaluations using a representative set of applications and we have applied each technique to two major embedded processor architectures, namely ARM and MIPS.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "code compression; code density; Embedded systems; Huffman coding", } @Article{Li:2010:CPG, author = "Zhifang Li and Wenjian Luo and Lihua Yue and Xufa Wang", title = "On the completeness of the polymorphic gate set", journal = j-TODAES, volume = "15", number = "4", pages = "32:1--32:??", month = sep, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1835420.1835425", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 6 09:42:42 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Polymorphic gates are special kinds of logic gates that can exhibit different functions under the control of environmental parameters, such as light, temperature, and VDD. These polymorphic gates can be used to build polymorphic circuits that perform different functions under different environments. Because polymorphic gates are different from traditional logic gates, the existent completeness theory for the traditional logic gate set is not suitable for the polymorphic gate set. So far, only the definition of the complete polymorphic gate set is given. There is no approach to judging whether a given polymorphic gate set is complete. The contributions of this article include three aspects. First, the impact of logic-1 and logic-0 on the completeness of the polymorphic gate set is discussed. Second, the theory and two related algorithms for judging the completeness of polymorphic gate sets with two modes are given. Finally, the theory and related algorithms for complete polymorphic gate sets with more than two modes are proposed.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "completeness theory; polymorphic circuit; Polymorphic electronics; polymorphic gate", } @Article{Wang:2010:CDF, author = "Renshen Wang and Evangeline Young and Chung-Kuan Cheng", title = "Complexity of {$3$-D} floorplans by analysis of graph cuboidal dual hardness", journal = j-TODAES, volume = "15", number = "4", pages = "33:1--33:??", month = sep, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1835420.1835426", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Oct 6 09:42:42 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Interconnect dominated electronic design stimulates a demand for developing circuits on the third dimension, leading to 3-D integration. Recent advances in chip fabrication technology enable 3-D circuit manufacturing. However, there is still a possible barrier of design complexity in exploiting 3-D technologies. This article discusses the impact of migrating from 2-D to 3-D on the difficulty of floorplanning and placement. By looking at a basic formulation of the graph cuboidal dual problem, we show that the 3-D cases and the 3-layer 2.5-D cases are fundamentally more difficult than the 2-D cases in terms of computational complexity. By comparison among these cases, the intrinsic complexity in 3-D floorplan structures is revealed in the hard-to-decide relations between topological connections and geometrical contacts. The results show possible challenges in the future for physical design and CAD of 3-D integrated circuits.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "3-D integration; cuboidal dual; floorplanning; hardness", } @Article{Chang:2010:GEC, author = "Naehyuck Chang and J{\"o}rg Henkel", title = "Guest Editorial: Current Trends in Low-Power Design", journal = j-TODAES, volume = "16", number = "1", pages = "1:1--1:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870110", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bol:2010:NME, author = "David Bol and Denis Flandre and Jean-Didier Legat", title = "Nanometer {MOSFET} Effects on the Minimum-Energy Point of Sub-45nm Subthreshold Logic---Mitigation at Technology and Circuit Levels", journal = j-TODAES, volume = "16", number = "1", pages = "2:1--2:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870111", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Subthreshold operation of digital circuits enables minimum energy consumption. In this article, we observe that minimum energy Emin of subthreshold logic dramatically increases when reaching 45nm CMOS node. We demonstrate by circuit simulation and analytical modeling that this increase comes from the combined effects of variability, gate leakage, and Drain-Induced Barrier Lowering (DIBL) effect. We then investigate the new impact of individual MOSFET parameters Lg, Vt, and Tox on Emin in sub-45nm technologies. We further propose an optimum MOSFET selection, which favors low-Vt mid-Lg devices in 45nm CMOS technology. The use of such optimum MOSFETs yields 35\% Emin reduction for a benchmark multiplier with good speed performances and negligible area overhead.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Calimera:2010:NAC, author = "Andrea Calimera and Enrico Macii and Massimo Poncino", title = "{NBTI}-Aware Clustered Power Gating", journal = j-TODAES, volume = "16", number = "1", pages = "3:1--3:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870112", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The emergence of Negative Bias Temperature Instability (NBTI) as the most relevant source of reliability in sub-90nm technologies has led to a new facet of the traditional trade-off between power and reliability. NBTI effects in fact manifest themselves as an increase of the propagation delay of the devices over time, which adds up to the delay penalty incurred by most low-power design solutions. This implies that, given a desired lifetime of a circuit (i.e., a given performance target at some point in time), a power-managed component will fail earlier than a nonpower-managed one. In this work, we show how it is possible to partially overcome this conflict, by leveraging the benefits in terms of aging provided by power-gating (i.e., by using switches that disconnect a logic block from the ground).", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cong:2010:BLO, author = "Jason Cong and Bin Liu and Rupak Majumdar and Zhiru Zhang", title = "Behavior-Level Observability Analysis for Operation Gating in Low-Power Behavioral Synthesis", journal = j-TODAES, volume = "16", number = "1", pages = "4:1--4:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870113", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Many techniques for power reduction in advanced RTL synthesis tools rely explicitly or implicitly on observability don't-care conditions. In this article we propose a systematic approach to maximize the effectiveness of these techniques by generating power-friendly RTL descriptions in behavioral synthesis. This is done using operation gating, that is, explicitly adding a predicate to an operation based on its observability condition, so that the operation, once identified as unobservable at runtime, can be avoided using RTL power optimization techniques such as clock gating. We first introduce the concept of behavior-level observability and its approximations in the context of behavioral synthesis. We then propose an efficient procedure to compute an approximated behavior-level observability of every operation in a dataflow graph.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Thorolfsson:2010:LPH, author = "Thorlindur Thorolfsson and Samson Melamed and W. Rhett Davis and Paul D. Franzon", title = "Low-Power Hypercube Divided Memory {FFT} Engine Using {$3$D} Integration", journal = j-TODAES, volume = "16", number = "1", pages = "5:1--5:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870114", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article we demonstrate a floating point FFT processor that leverages both 3D integration and a unique hypercube memory division scheme to reduce the power consumption of a 1024 point FFT down to 4.227$ \mu $J. The hypercube memory division scheme lowers the energy per memory access by 59.2\% and increases the total required area by 16.8\%. The use of 3D integration reduces the logic power by 5.2\%. We describe the tool flow required to realize the 3D implementation and perform a thermal analysis of it.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dhiman:2010:VSE, author = "Gaurav Dhiman and Giacomo Marchetti and Tajana Rosing", title = "{vGreen}: a System for Energy-Efficient Management of Virtual Machines", journal = j-TODAES, volume = "16", number = "1", pages = "6:1--6:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870115", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present vGreen, a multitiered software system for energy-efficient virtual machine management in a clustered virtualized environment. The system leverages the use of novel hierarchical metrics that work across the different abstractions in a virtualized environment to capture power and performance characteristics of both the virtual and physical machines. These characteristics are then used to implement policies for scheduling and power management of virtual machines across the cluster.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2010:EEP, author = "Jinsik Kim and Pai H. Chou", title = "Energy-Efficient Progressive Remote Update for Flash-Based Firmware of Networked Embedded Systems", journal = j-TODAES, volume = "16", number = "1", pages = "7:1--7:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870116", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Firmware update over a network connection is an essential but expensive feature for many embedded systems due to not only the relatively high power consumption and limited bandwidth, but also page-granular erasure before rewriting to flash memory. This work proposes a page-level, link-time technique that minimizes not only the size of patching scripts but also perturbation to the firmware memory, over the entire sequence of updates in the system's lifetime. We propose a tool that first clusters functions to minimize caller-callee dependency across pages, and then orders the functions within each page to minimize intrapage perturbation. Experimental results show our technique to reduce the energy consumption of firmware update by 30--42\% over the state-of-the-art.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yu:2010:EPE, author = "Chenjie Yu and Peter Petrov", title = "Energy- and Performance-Efficient Communication Framework for Embedded {MPSoCs} through Application-Driven Release Consistency", journal = j-TODAES, volume = "16", number = "1", pages = "8:1--8:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870117", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a framework for performance-, bandwidth-, and energy-efficient intercore communication in embedded MultiProcessor Systems-on-a-Chip (MPSoC). The methodology seamlessly integrates compiler, operating system, and hardware support to achieve a low-cost communication between synchronized producers and consumers. The technique is especially beneficial for data-streaming applications exploiting pipeline parallelism with computational phases mapped to separate cores. Code transformations utilizing a simple ISA support ensure that producer writes are propagated to consumers with a single interconnect transaction per cache block just prior to the producer exiting its synchronization region. Furthermore, in order to completely eliminate misses to shared data caused by interference with private data and also to minimize the cache energy, we integrate to the proposed framework a cache way partitioning policy based on a simple cache configurability support, which isolates the shared buffers from other cache traffic.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jayakumar:2010:SIV, author = "Nikhil Jayakumar and Sunil P. Khatri", title = "A Simultaneous Input Vector Control and Circuit Modification Technique to Reduce Leakage with Zero Delay Penalty", journal = j-TODAES, volume = "16", number = "1", pages = "9:1--9:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870118", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Leakage power currently comprises a large fraction of the total power consumption of an IC. Techniques to minimize leakage have been researched widely. However, most approaches to reducing leakage have an associated performance penalty. In this article, we present an approach which minimizes leakage by simultaneously modifying the circuit while deriving the input vector that minimizes leakage. In our approach, we selectively modify a gate so that its output (in sleep mode) is in a state which helps minimize the leakage of other gates in its transitive fanout. Gate replacement is performed in a slack-aware manner, to minimize the resulting delay penalty.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2010:SCR, author = "Yu-Ze Wu and Mango C.-T. Chao", title = "Scan-Cell Reordering for Minimizing Scan-Shift Power Based on Nonspecified Test Cubes", journal = j-TODAES, volume = "16", number = "1", pages = "10:1--10:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870119", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents several scan-cell reordering techniques to reduce the signal transitions during the test mode while preserving the don't-care bits in the test patterns for a later optimization. Combined with a pattern-filling technique, the proposed scan-cell reordering techniques can utilize both high response correlations and pattern correlations to simultaneously minimize scan-out and scan-in transitions. Those scan-shift transitions can be further reduced by selectively using the inverse connections between scan cells. In addition, the trade-off between routing overhead and power consumption can also be controlled by the proposed scan-cell reordering techniques.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Singh:2010:AJE, author = "Montek Singh and Steven M. Nowick", title = "{ACM Journal on Emerging Technologies in Computing Systems}", journal = j-TODAES, volume = "16", number = "1", pages = "11:1--11:??", month = nov, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1870109.1870120", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 9 11:12:21 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pedram:2011:CPV, author = "Massoud Pedram", title = "Call for papers: Verification issue and challenges with multicore systems", journal = j-TODAES, volume = "16", number = "2", pages = "12:1--12:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929944", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bernasconi:2011:DRB, author = "Anna Bernasconi and Valentina Ciriani", title = "Dimension-reducible {Boolean} functions based on affine spaces", journal = j-TODAES, volume = "16", number = "2", pages = "13:1--13:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929945", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We define and study a new class of regular Boolean functions called D-reducible. A D-reducible function, depending on all its n input variables, can be studied and synthesized in a space of dimension strictly smaller than n. We show that the D-reducibility property can be efficiently tested, in time polynomial in the representation of f, that is, an initial SOP form of f. A D-reducible function can be efficiently decomposed, giving rise to a new logic form, that we have called DredSOP. This form is shown here to be generally smaller than the corresponding minimum SOP form.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2011:OAE, author = "Yi Wang and Hui Liu and Duo Liu and Zhiwei Qin and Zili Shao and Edwin H.-M. Sha", title = "Overhead-aware energy optimization for real-time streaming applications on multiprocessor {System-on-Chip}", journal = j-TODAES, volume = "16", number = "2", pages = "14:1--14:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929946", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we focus on solving the energy optimization problem for real-time streaming applications on multiprocessor System-on-Chip by combining task-level coarse-grained software pipelining with DVS (Dynamic Voltage Scaling) and DPM (Dynamic Power Management) considering transition overhead, inter-core communication and discrete voltage levels. We propose a two-phase approach to solve the problem. In the first phase, we propose a coarse-grained task parallelization algorithm called RDAG to transform a periodic dependent task graph into a set of independent tasks by exploiting the periodic feature of streaming applications. In the second phase, we propose a scheduling algorithm, GeneS, to optimize energy consumption. GeneS is a genetic algorithm that can search and find the best schedule within the solution space generated by gene evolution.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cong:2011:AMP, author = "Jason Cong and Wei Jiang and Bin Liu and Yi Zou", title = "Automatic memory partitioning and scheduling for throughput and power optimization", journal = j-TODAES, volume = "16", number = "2", pages = "15:1--15:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929947", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Memory bottleneck has become a limiting factor in satisfying the explosive demands on performance and cost in modern embedded system design. Selected computation kernels for acceleration are usually captured by nest loops, which are optimized by state-of-the-art techniques like loop tiling and loop pipelining. However, memory bandwidth bottlenecks prevent designs from reaching optimal throughput with respect to available parallelism. In this paper we present an automatic memory partitioning technique which can efficiently improve throughput and reduce energy consumption of pipelined loop kernels for given throughput constraints and platform requirements. Also, our proposed algorithm can handle general array access beyond affine array references.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2011:MUT, author = "Guihai Yan and Yinhe Han and Hui Liu and Xiaoyao Liang and Xiaowei Li", title = "{MicroFix}: Using timing interpolation and delay sensors for power reduction", journal = j-TODAES, volume = "16", number = "2", pages = "16:1--16:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929948", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditional DVFS schemes are oblivious to fine-grained adaptability resulting from path-grained timing imbalance. With the awareness of such fine-grained adaptability, better power-performance efficiency can be obtained. We propose a new scheme, MicroFix, to exploit such fine-grained adaptability. We first show the potential resulted from the path-grained timing imbalance and then present a new technique, Timing Interpolation, to reap the fine-grained adaptability for power reduction. Moreover, to eliminate the conservative margins of traditional DVFS, unlike the previous approaches such as Razor that reactively handle the delay errors (induced by aggressively scaled voltage/frequency) by enabling error detection and recovery, we propose a proactive approach by error prediction, thereby obviate the high-cost recovery routines.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2011:RSA, author = "Irith Pomeranz and Sudhakar M. Reddy", title = "Reducing the switching activity of test sequences under transparent-scan", journal = j-TODAES, volume = "16", number = "2", pages = "17:1--17:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929949", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Transparent-scan is a test application scheme for scan circuits. It provides unique opportunities for test compaction that do not exist with the standard test application scheme. We show that it also provides unique opportunities for reducing the power dissipation of a scan-based test set. After translating a standard scan-based test set into a transparent-scan sequence, we apply two operations for reducing the power dissipation of the sequence. The first operation attempts to remove a test vector that causes high power dissipation. The second operation attempts to replace a scan clock cycle with a functional clock cycle, or a functional clock cycle with a scan clock cycle, in order to reduce the power dissipation.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cauley:2011:PBC, author = "Stephen Cauley and Venkataramanan Balakrishnan and Y. Charlie Hu and Cheng-Kok Koh", title = "A parallel branch-and-cut approach for detailed placement", journal = j-TODAES, volume = "16", number = "2", pages = "18:1--18:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929950", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We introduce a technique that utilizes distributing computing resources for the efficient optimization of a traditional physical design problem. Specifically, we present a detailed placement strategy designed to exploit distributed computing environments, where the additional computing resources are employed in parallel to improve the optimization time. A Mixed Integer Programming (MIP) model and branch-and-cut optimization strategy are employed to solve the standard cell placement problem. By exploiting the problem structure, our algorithm improves upon the solutions afforded by existing optimization algorithms. First, an efficient batch-branching technique can eliminate several integer decision variables during each step of the optimization procedure. This batch-branching scheme can be performed serially or in parallel.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2011:GRS, author = "Yih-Lang Li and Yu-Ning Chang and Wen-Nai Cheng", title = "A gridless routing system with nonslicing floorplanning-based crosstalk reduction on gridless track assignment", journal = j-TODAES, volume = "16", number = "2", pages = "19:1--19:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929951", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Track assignment, which is an intermediate stage between global routing and detailed routing, provides a good platform for promoting performance, and for imposing additional constraints during routing, such as crosstalk. Gridless track assignment (GTA) has not been addressed in public literature. This work develops a gridless routing system integrating a congestion-driven global router, crosstalk-driven GTA and an enhanced implicit connection-graph-based router. Initial assignment is produced rapidly with a left-edge like algorithm. Crosstalk reduction on the assignment is then transformed to a restricted nonslicing floorplanning problem, and a deterministic O-Tree based algorithm is employed to reassign each net segment. Finally, each panel is partitioned into several subpanels, and the subpanels are reordered using branch and bound algorithm to decrease the crosstalk further.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2011:SBA, author = "Yu Liu and Kaijie Wu and Ramesh Karri", title = "Scan-based attacks on linear feedback shift register based stream ciphers", journal = j-TODAES, volume = "16", number = "2", pages = "20:1--20:??", month = mar, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1929943.1929952", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 1 16:07:45 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Stream cipher is an important class of encryption algorithm that encrypts plaintext messages one bit at a time. Various stream ciphers are deployed in wireless telecommunication applications because they have simple hardware circuitry, are generally fast and consume very low power. On the other hand, scan-based Design-for-Test (DFT) is one of the most popular methods to test IC devices. All flip-flops in the Design Under Test are connected to one or more scan chains and the states of the flip-flops can be scanned out through these chains. In this paper, we present an attack on stream cipher implementations by determining the scan chain structure of the Linear Feedback Shift Registers in their implementations.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Keutzer:2011:SSM, author = "Kurt Keutzer and Peng Li and Li Shang and Hai Zhou", title = "A Special Section on Multicore Parallel {CAD}: Algorithm Design and Programming", journal = j-TODAES, volume = "16", number = "3", pages = "21:1--21:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970354", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ludwin:2011:EDP, author = "Adrian Ludwin and Vaughn Betz", title = "Efficient and Deterministic Parallel Placement for {FPGAs}", journal = j-TODAES, volume = "16", number = "3", pages = "22:1--22:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970355", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We describe a parallel simulated annealing algorithm for FPGA placement. The algorithm proposes and evaluates multiple moves in parallel, and has been incorporated into Altera's Quartus II CAD system. Across a set of 18 industrial benchmark circuits, we achieve geometric average speedups during the quench of 2.7x and 4.0x on four and eight processors, respectively, with individual circuits achieving speedups of up to 3.6x and 5.9x. Over the course of the entire anneal, we achieve speedups of up to 2.8x and 3.7x, with geometric average speedups of 2.1x and 2.4x. Our algorithm is the first parallel placer to optimize for criteria other than wirelength, such as critical path length, and is one of the few deterministic parallel placement algorithms.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Han:2011:DIT, author = "Yiding Han and Koushik Chakraborty and Sanghamitra Roy and Vilasita Kuntamukkala", title = "Design and Implementation of a Throughput-Optimized {GPU} Floorplanning Algorithm", journal = j-TODAES, volume = "16", number = "3", pages = "23:1--23:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970356", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a novel floorplanning algorithm for GPUs. Floorplanning is an inherently sequential algorithm, far from the typical programs suitable for Single-Instruction Multiple-Thread (SIMT)-style concurrency in a GPU. We propose a fundamentally different approach of exploring the floorplan solution space, where we evaluate concurrent moves on a given floorplan. We illustrate several performance optimization techniques for this algorithm in GPUs. To improve the solution quality, we present a comprehensive exploration of the design space, including various techniques to adapt the annealing approach in a GPU.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2011:GBP, author = "Yifang Liu and Jiang Hu", title = "{GPU}-Based Parallelization for Fast Circuit Optimization", journal = j-TODAES, volume = "16", number = "3", pages = "24:1--24:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970357", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The progress of GPU (Graphics Processing Unit) technology opens a new avenue for boosting computing power. This work is an attempt to exploit the GPU for accelerating VLSI circuit optimization. We propose GPU-based parallel computing techniques and apply them on simultaneous gate sizing and threshold voltage assignment, which is a popular method for VLSI performance and power optimization. These techniques include efficient task scheduling and memory organization, all of which are aimed to fully utilize the advantages of GPUs.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hsu:2011:MSS, author = "Chia-Jui Hsu and Jos{\'e} Luis Pino and Shuvra S. Bhattacharyya", title = "Multithreaded Simulation for Synchronous Dataflow Graphs", journal = j-TODAES, volume = "16", number = "3", pages = "25:1--25:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970358", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "For system simulation, Synchronous DataFlow (SDF) has been widely used as a core model of computation in design tools for digital communication and signal processing systems. The traditional approach for simulating SDF graphs is to compute and execute static schedules in single-processor desktop environments. Nowadays, however, multicore processors are increasingly popular desktop platforms for their potential performance improvements through thread-level parallelism. Without novel scheduling and simulation techniques that explicitly explore thread-level parallelism for executing SDF graphs, current design tools gain only minimal performance improvements on multicore platforms. In this article, we present a new multithreaded simulation scheduler, called MSS, to provide simulation runtime speedup for executing SDF graphs on multicore processors.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liao:2011:AUB, author = "Xiongfei Liao and Thambipillai Srikanthan", title = "Accelerating {UNISIM}-Based Cycle-Level Microarchitectural Simulations on Multicore Platforms", journal = j-TODAES, volume = "16", number = "3", pages = "26:1--26:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970359", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "UNISIM has been shown to ease the development of simulators for multi-/many-core systems. However, UNISIM cycle-level simulations of large-scale multiprocessor systems could be very time consuming. In this article, we propose a systematic framework for accelerating UNISIM cycle-level simulations on multicore platforms. The proposed framework relies on exploiting the fine-grained parallelism within the simulated cycles using POSIX threads. A multithreaded simulation engine has been devised from the single-threaded UNISIM SystemC engine to facilitate the exploitation of inherent parallelism. An adaptive technique that manages the overall computation workload by adjusting the number of threads employed at any given time is proposed. In addition, we have introduced a technique to balance the workloads of multithreaded executions.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Garcia-Dopico:2011:NAV, author = "Antonio Garc{\'\i}a-Dopico and Antonio P{\'e}rez and Santiago Rodr{\'\i}guez and Mar{\'\i}a Isabel Garc{\'\i}a", title = "A New Algorithm for {VHDL} Parallel Simulation", journal = j-TODAES, volume = "16", number = "3", pages = "27:1--27:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970360", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes a new algorithm for parallel synchronous simulation of VHDL designs to be executed on desktop computers. Besides executing VHDL processes in parallel, the algorithm focuses on parallelizing the simulation kernel with special emphasis on signal grouping while maintaining language semantics. Synchronous approaches are the most suitable for shared memory multiprocessor (SMP) desktop computers but may be difficult to parallelize because of the low activity detected in most of the designs. The degree of parallelism is increased in this approach by performing an exhaustive VHDL signal dependencies analysis and avoiding any sequential phase in the simulator. VHDL semantics impose a synchronization barrier after each phase, that is, the process and the kernel simulation phase, as the language definition does not allow simultaneous execution of kernel and processes.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zeng:2011:LDP, author = "Zhiyu Zeng and Zhuo Feng and Peng Li and Vivek Sarin", title = "Locality-Driven Parallel Static Analysis for Power Delivery Networks", journal = j-TODAES, volume = "16", number = "3", pages = "28:1--28:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970361", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Large VLSI on-chip Power Delivery Networks (PDNs) are challenging to analyze due to the sheer network complexity. In this article, a novel parallel partitioning-based PDN analysis approach is presented. We use the boundary circuit responses of each partition to divide the full grid simulation problem into a set of independent subgrid simulation problems. Instead of solving exact boundary circuit responses, a more efficient scheme is proposed to provide near-exact approximation to the boundary circuit responses by exploiting the spatial locality of the flip-chip-type power grids. This scheme is also used in a block-based iterative error reduction process to achieve fast convergence.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhu:2011:MPL, author = "Yuhao Zhu and Bo Wang and Yangdong Deng", title = "Massively Parallel Logic Simulation with {GPUs}", journal = j-TODAES, volume = "16", number = "3", pages = "29:1--29:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970362", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we developed a massively parallel gate-level logical simulator to address the ever-increasing computing demand for VLSI verification. To the best of the authors' knowledge, this work is the first one to leverage the power of modern GPUs to successfully unleash the massive parallelism of a conservative discrete event-driven algorithm, CMB algorithm. A novel data-parallel strategy is proposed to manipulate the fine-grain message passing mechanism required by the CMB protocol. To support robust and complete simulation for real VLSI designs, we establish both a memory paging mechanism and an adaptive issuing strategy to efficiently utilize the GPU memory with a limited capacity.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chatterjee:2011:GLS, author = "Debapriya Chatterjee and Andrew Deorio and Valeria Bertacco", title = "Gate-Level Simulation with {GPU} Computing", journal = j-TODAES, volume = "16", number = "3", pages = "30:1--30:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970363", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Functional verification of modern digital designs is a crucial, time-consuming task impacting not only the correctness of the final product, but also its time to market. At the heart of most of today's verification efforts is logic simulation, used heavily to verify the functional correctness of a design for a broad range of abstraction levels. In mainstream industry verification methodologies, typical setups coordinate the validation effort of a complex digital system by distributing logic simulation tasks among vast server farms for months at a time. Yet, the performance of logic simulation is not sufficient to satisfy the demand, leading to incomplete validation processes, escaped functional bugs, and continuous pressure on the EDA industry to develop faster simulation solutions.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bondade:2011:HSC, author = "Rajdeep Bondade and Dongsheng Ma", title = "Hardware-Software Codesign of an Embedded Multiple-Supply Power Management Unit for Multicore {SoCs} Using an Adaptive Global\slash Local Power Allocation and Processing Scheme", journal = j-TODAES, volume = "16", number = "3", pages = "31:1--31:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970364", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power dissipation has become a critical design constraint for the growth of modern multicore systems due to increasing clock frequencies, leakage currents, and system parasitics. To overcome this urgent crisis, this article presents an embedded platform for on-chip power management of a multicore System-on-Chip (SoC). The design involves the development of two key components, from the hardware to the software level. From the hardware perspective, a multiple-supply power management unit is proposed and is implemented using a Single-Inductor Multiple-Output (SIMO) DC-DC converter. To dynamically respond to the sensed instantaneous power demands and to accurately control the power delivery to the processor cores, the power management unit employs a software-defined adaptive global/local power allocation feedback controller.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Stitt:2011:TWD, author = "Greg Stitt and Frank Vahid", title = "Thread Warping: Dynamic and Transparent Synthesis of Thread Accelerators", journal = j-TODAES, volume = "16", number = "3", pages = "32:1--32:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970365", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We introduce thread warping, a dynamic optimization technique that customizes multicore architectures to a given application by dynamically synthesizing threads into custom accelerator circuits on FPGAs (Field-Programmable Gate Arrays). Thread warping builds upon previous dynamic synthesis techniques for single-threaded applications, enabling dynamic architectural adaptation to different amounts of thread-level parallelism, while also exploiting parallelism within each thread to further improve performance. Furthermore, thread warping maintains the important separation of function from architecture, enabling portability of applications to architectures with different quantities of microprocessors and FPGAs, an advantage not shared by static compilation/synthesis approaches. We introduce an approach consisting of CAD tools and operating system support that enables thread warping on potentially any microprocessor/FPGA architecture.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ain:2011:CPV, author = "Antara Ain and Debjit Pal and Pallab Dasgupta and Siddhartha Mukhopadhyay and Rajdeep Mukhopadhyay and John Gough", title = "{Chassis}: a Platform for Verifying {PMU} Integration Using Autogenerated Behavioral Models", journal = j-TODAES, volume = "16", number = "3", pages = "33:1--33:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970367", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power Management Units (PMUs) are large integrated circuits consisting of many predesigned mixed-signal components. PMU integration poses a serious verification problem considering the size of the integrated circuit and the complexity of analog simulation. In this article we present an approach for automatic generation of behavioral models for PMU components from top-down skeleton models, fitted with parameter values estimated by bottom-up parameter extraction algorithms. It is shown that replacing PMU components with these autogenerated hybrid automata-based abstract behavioral models enables significant simulation speedup ({$>$} 20X on our industrial test cases) and helps in early detection of integration errors. The article also justifies the level of accuracy in our models with respect to the goal of verifying integrated PMUs.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yu:2011:MQS, author = "Yue Yu and Shangping Ren and Xiaobo Sharon Hu", title = "A Metric for Quantifying Similarity between Timing Constraint Sets in Real-Time Systems", journal = j-TODAES, volume = "16", number = "3", pages = "34:1--34:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970368", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Real-time systems are systems in which their timing behaviors must satisfy a specified set of timing constraints and they often operate in a real-world environment with scarce resources. As a result, the actual runtime performance of these systems may deviate from the design, either inevitably due to unpredictable factors or by intention in order to improve system's other Quality-of-Service (QoS) properties. In this article, we first introduce a new metric, timing constraint set similarity, to quantify the resemblance between two different timing constraint sets. Because directly calculating the exact value of the metric involves calculating the size of a polytope which is a \#P-hard problem, we instead introduce an efficient method for estimating its bound.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Abouzeid:2011:COS, author = "Fady Abouzeid and Sylvain Clerc and Fabian Firmin and Marc Renaudin and Tiempo Sas and Gilles Sicard", title = "{40nm CMOS} {0.35V}-Optimized Standard Cell Libraries for Ultra-Low Power Applications", journal = j-TODAES, volume = "16", number = "3", pages = "35:1--35:??", month = jun, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1970353.1970369", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 14 11:55:50 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Ultra-low voltage is now a well-known solution for energy constrained applications designed using nanometric process technologies. This work is focused on setting up an automated methodology to enable the design of ultra-low voltage digital circuits exclusively using standard EDA tools. To achieve this goal, a 0.35V energy-delay optimized library was developed. This library, fully compliant with standard library design flow and characterization, was verified through the design and fabrication of a BCH decoder circuit, following a standard front-end to back-end flow. At 0.33V, it performs at 600 kHz with a dynamic energy consumption reduced by a factor 14x from nominal 1.1V.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Qiu:2011:ATB, author = "Meikang Qiu and Edwin H.-M. Sha", title = "2011 {ACM} {TODAES} best paper award", journal = j-TODAES, volume = "16", number = "4", pages = "36:1--36:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003696", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In high-level synthesis for real-time embedded systems using heterogeneous functional units (FUs), it is critical to select the best FU type for each task. However, some tasks may not have fixed execution times. This article models each varied execution time as a probabilistic random variable and solves the heterogeneous assignment with probability (HAP) problem. The solution of the HAP problem assigns a proper FU type to each task such that the total cost is minimized while the timing constraint is satisfied with a guaranteed confidence probability. The solutions to the HAP problem are useful for both hard real-time and soft real-time systems.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sen:2011:COV, author = "Alper Sen", title = "Concurrency-oriented verification and coverage of system-level designs", journal = j-TODAES, volume = "16", number = "4", pages = "37:1--37:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003697", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Correct concurrent System-on-Chips (SoCs) are very hard to design and reason about. In this work, we develop an automated framework complete with concurrency-oriented verification and coverage techniques for system-level designs. Our techniques are different from traditional simulation-based reliability techniques, since concurrency information is often lost in traditional techniques. We preserve concurrency information to obtain unique verification techniques that allow us to predict potential errors (formulated as transaction-level assertions) from error-free simulations. In order to do this, we exploit the inherent concurrency in the designs to generate and analyze novel partial-order simulation traces. Additionally, to evaluate the confidence on verification results and the gauge progress of verification, we develop novel mutation testing based on concurrent coverage metrics.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Fournier:2011:PAC, author = "Laurent Fournier and Avi Ziv and Ekaterina Kutsy and Ofer Strichman", title = "A probabilistic analysis of coverage methods", journal = j-TODAES, volume = "16", number = "4", pages = "38:1--38:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003698", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Coverage is an important measure for the quality and completeness of the functional verification of hardware logic designs. Verification teams spend a significant amount of time looking for bugs in the design and in providing high-quality coverage. This process is performed through the use of various sampling strategies for selecting test inputs. The selection of sampling strategies to achieve the verification goals is typically carried out in an intuitive manner. We studied several commonly used sampling strategies and provide a probabilistic framework for assessing and comparing their relative values. For this analysis, we derived results for two measures of interest: first, the probability of finding a bug within a given number of samplings; and second, the expected number of samplings until a bug is detected.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sun:2011:GDD, author = "Wei-Tsun Sun and Zoran Salcic", title = "{GALS-Designer}: a design framework for {GALS} software systems", journal = j-TODAES, volume = "16", number = "4", pages = "39:1--39:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003699", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "GALS-Designer is a framework for the design of software systems which comply with the formal Globally Asynchronous Locally Synchronous model of computation (GALS). Those systems consist of single or multiple GALS programs and their immediate environment, which can be other programs and any other modules described in SystemC. The framework integrates our libGALS library for writing GALS programs and SystemC. It enables modeling and simulation of single and multiple GALS programs within the single SystemC executable model on the host (simulation) operating system. The same GALS programs can then be run without SystemC on a target operating system for which the libGALS runtime library is available.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mittal:2011:TVA, author = "Kartikey Mittal and Arpit Joshi and Madhu Mutyam", title = "Timing variation-aware scheduling and resource binding in high-level synthesis", journal = j-TODAES, volume = "16", number = "4", pages = "40:1--40:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003700", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to technological scaling, process variations have increased significantly, resulting in large variations in the delay of the functional units. Hence, the worst-case approach is becoming increasingly pessimistic in meeting a certain performance yield. The problem therefore is to increase the performance as much as possible while maintaining the desired yield. In this work, we introduce an integer linear programming (ILP) formulation for scheduling and resource binding in high-level synthesis (HLS) which tries to mitigate the effect of timing variations. In the presence of delay variations of resources, as chained resources can give a better latency and performance yield trade-off, instead of considering them independently, we consider external chaining of resources, that is, two or more resources are connected by external wiring, and exploit operation chaining.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2011:RCM, author = "Xiaofang Wang and Pallav Gupta", title = "Resource-constrained multiprocessor synthesis for floating-point applications on {FPGAs}", journal = j-TODAES, volume = "16", number = "4", pages = "41:1--41:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003701", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Although state-of-the-art field-programmable gate arrays offer exciting new opportunities in exploring low-cost high-performance architectures for data-intensive scientific applications, they also present serious challenges. Multiprocessor-on-programmable-chip, which integrates software programmability and hardware reconfiguration, provides substantial flexibility that results in shorter design cycles, higher performance, and lower cost. In this article, we present an application-specific design methodology for multiprocessor-on-programmable-chip architectures that target applications involving large matrices and floating-point operations. Given an application with specific energy-performance and resource constraints, our methodology aims to customize the architecture to match the diverse computation and communication requirements of the application tasks. Graph-based analysis of the application drives system synthesis that employs a precharacterized, parameterized hardware component library of functional units.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2011:MAO, author = "Yongjoo Kim and Jongeun Lee and Aviral Shrivastava and Yunheung Paek", title = "Memory access optimization in compilation for coarse-grained reconfigurable architectures", journal = j-TODAES, volume = "16", number = "4", pages = "42:1--42:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003702", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Coarse-grained reconfigurable architectures (CGRAs) promise high performance at high power efficiency. They fulfil this promise by keeping the hardware extremely simple, and moving the complexity to application mapping. One major challenge comes in the form of data mapping. For reasons of power-efficiency and complexity, CGRAs use multibank local memory, and a row of PEs share memory access. In order for each row of the PEs to access any memory bank, there is a hardware arbiter between the memory requests generated by the PEs and the banks of the local memory. However, a fundamental restriction remains in that a bank cannot be accessed by two different PEs at the same time.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bruneel:2011:DDF, author = "Karel Bruneel and Wim Heirman and Dirk Stroobandt", title = "Dynamic data folding with parameterizable {FPGA} configurations", journal = j-TODAES, volume = "16", number = "4", pages = "43:1--43:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003703", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In many applications, subsequent data manipulations differ only in a small set of parameter values. Because of their reconfigurability, FPGAs (field programmable gate arrays) can be configured with a specialized circuit each time the parameter values change. This technique is called dynamic data folding. The specialized circuits are smaller and faster than their generic counterparts. However, the overhead involved in generating the configurations for the specialized circuits at runtime is very large when conventional tools are used, and this overhead will in many cases negate the benefit of using optimized configurations. This article introduces an automatic method for generating runtime parameterizable configurations from arbitrary Boolean circuits.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dong:2011:PCS, author = "Wei Dong and Peng Li", title = "Parallel circuit simulation with adaptively controlled projective integration", journal = j-TODAES, volume = "16", number = "4", pages = "44:1--44:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003704", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, a parallel transient circuit simulation approach based on an adaptively-controlled time-stepping scheme is proposed. Different from the widely-used implicit numerical integration techniques in most transient simulators, this work exploits the recently-developed explicit telescopic projective numerical integration method for efficient parallel circuit simulation. Because telescopic projective integration addresses the well-known stability issue of explicit numerical integrations by adopting combinations of inner integrators and outer integrators in a multilevel fashion, the simulation time-step is no longer limited by the smallest time constant in the circuit. With dynamic control of telescopic projective integration, the proposed projective integration framework not only leads to noticeable efficiency improvement in circuit simulation, it also lends itself to straightforward parallelization due to its explicit nature.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Maestro:2011:MEL, author = "Juan Antonio Maestro and Pedro Reviriego and Sanghyeon Baeg and Shijie Wen and Richard Wong", title = "Mitigating the effects of large multiple cell upsets {(MCUs)} in memories", journal = j-TODAES, volume = "16", number = "4", pages = "45:1--45:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003705", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Reliability is a critical issue for memories. Radiation particles that hit the device can cause errors in some cells, which can lead to data corruption. To avoid this problem, memories are protected with per-word error correction codes (ECCs). Typically, single-error correction and double-error detection (SEC-DED) codes are used. As technology scales, errors caused by radiation particles on memories tend to affect more than one cell---what is known as a multiple cell upset (MCU). To ensure that only a single cell is affected in each word, interleaving is used. With interleaving, cells that belong to the same word are placed at a sufficient distance such that an MCU will only affect a single cell on each word.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Healy:2011:IMF, author = "Michael B. Healy and Fayez Mohamood and Hsien-Hsin S. Lee and Sung Kyu Lim", title = "Integrated microarchitectural floorplanning and run-time controller for inductive noise mitigation", journal = j-TODAES, volume = "16", number = "4", pages = "46:1--46:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003706", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a design methodology using two complementary techniques to address high-frequency inductive noise in the early design phase of a microprocessor. First, we propose a noise-aware floorplanning technique that uses microarchitectural profile information to create noise-aware floorplans. Second, we present the design of a dynamic inductive-noise controlling mechanism at the microarchitectural level, which limits the on-die current demand within predefined bounds, regardless of the native power and current characteristics of running applications. By dynamically monitoring the access patterns of microarchitectural modules, our mechanism can effectively limit simultaneous switching activity of close-by modules, thereby leveling voltage ringing at local power-pins.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2011:ICA, author = "Jin-Tai Yan", title = "{IO} connection assignment and {RDL} routing for flip-chip designs", journal = j-TODAES, volume = "16", number = "4", pages = "47:1--47:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003707", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Given a set of IO buffers and a set of bump balls with the capacity constraints between two adjacent bump balls, based on the construction of the Delaunay triangulation and a Manhattan Voronoi diagram, an O(n2) assignment algorithm is proposed to assign all the IO connections in a single redistribution layer for IO connection assignment, where n is the number of bump balls in a flip-chip design. Furthermore, based on the computation of the probabilistic congestion for the assigned IO connections, an O(n2) routing algorithm is proposed to minimize the total wirelength to route all the assigned IO connections while satisfying the capacity constraints for single-layer RDL routing.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2011:CTS, author = "Tak-Yung Kim and Taewhan Kim", title = "Clock Tree synthesis for {TSV}-based {$3$D} {IC} designs", journal = j-TODAES, volume = "16", number = "4", pages = "48:1--48:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003708", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "For the cost-effective implementation of clock trees in through-silicon via (TSV)-based 3D IC designs, we propose core algorithms for 3D clock tree synthesis. For a given abstract tree topology, we propose DLE-3D (\underline{d}eferred \underline{l}ayer \underline{e}mbedding for \underline{l} ICs), which optimally finds the embedding layers of tree nodes, so that the TSV cost required for a tree topology is minimized, and DME-3D (\underline{d}eferred \underline{m}erge \underline{e}mbedding for \underline{l} ICs), which is an extended algorithm of the 2D merging segment, to minimize the total wirelength in 3D design space, with the consideration of the TSV effect on delay. In addition, when an abstract tree topology is not given, we propose NN-3D (\underline{n}earest \underline{n}eighbor selection for \underline{l} ICs), which constructs a (TSV and wirelength) cost-effective abstract tree topology for 3D ICs.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lu:2011:CBP, author = "Jianchao Lu and Baris Taskin", title = "Clock buffer polarity assignment with skew tuning", journal = j-TODAES, volume = "16", number = "4", pages = "49:1--49:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003709", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A clock polarity assignment method is proposed that reduces the peak current on the vdd/gnd rails of an integrated circuit. The impacts of (i) the output capacitive load on the peak current drawn by the sink-level clock buffers, and (ii) the buffer/inverter replacement scheme of polarity assignment on timing accuracy are considered in the formulation. The proposed sink-level-only polarity assignment is performed by a lexi-search algorithm in order to balance the peak current on the clock tree. Most of the previous polarity assignment methods that do not include clock tree resynthesis lead to an undesirable increase in the worst corner clock skew.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2011:ALR, author = "Shaoxi Wang and Xinzhang Jia and Arthur B. Yeh and Lihong Zhang", title = "Analog layout retargeting using geometric programming", journal = j-TODAES, volume = "16", number = "4", pages = "50:1--50:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003710", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "To satisfy the requirements of complex and special analog layout constraints, a new analog layout retargeting method is presented in this article. Our approach uses geometric programming (GP) to achieve new technology design rules, implement device symmetry and matching constraints, and manage parasitics optimization. The GP, a class of nonlinear optimization problem, can be transferred or fitted into a convex optimization problem. Therefore, a global optimum solution can be achieved. Moreover, the GP can address problems with large-scale variables and constraints without setting an initialization variable range. To meet the prerequisites of the GP methodology for analog layout automation, we propose three kinds of mathematical transformations, including negative coefficient transformation, fraction transformation, and maximum of posynomial transformation.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Duarte:2011:HDP, author = "Filipa Duarte and Jos Hulzink and Jun Zhou and Jan Stuijt and Jos Huisken and Harmke {De Groot}", title = "A {36$ \mu $W} heartbeat-detection processor for a wireless sensor node", journal = j-TODAES, volume = "16", number = "4", pages = "51:1--51:??", month = oct, year = "2011", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2003695.2003711", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 22 09:25:48 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In order to provide better services to elderly people, home healthcare monitoring systems have been increasingly deployed. Typically, these systems are based on wireless sensor nodes, and should utilize very low energy during their lifetimes, as they are powered by scavengers. In this article, we present an ultra-low power processing system for a wireless sensor node for very low duty cycle applications. In the CoolBio system-on-chip, we utilized several power reduction techniques at both the architecture level and the circuit level. These techniques include feature extraction, voltage and frequency scaling, clock and power gating and a redesign of key standard cells.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Verbeek:2012:EFS, author = "Freek Verbeek and Julien Schmaltz", title = "Easy Formal Specification and Validation of Unbounded {Networks-on-Chips} Architectures", journal = j-TODAES, volume = "17", number = "1", pages = "1:1--1:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071357", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a formal specification and validation environment to prove safety and liveness properties of parametric -- unbounded -- NoCs architectures described at a high-level of abstraction. The environment improves the GeNoC approach with two new theorems, proving evacuation and starvation freedom. The application of the validation methodology is illustrated on a HERMES NoC with adaptive west-first routing and wormhole switching. This case study illustrates the strong compositional aspect of the GeNoC environment. The complete specification of this HERMES instance, together with the proof that the specification is deadlock-free, starvation free, and all messages eventually leave the network at their correct destination, could be achieved in about a week.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pasha:2012:SLS, author = "Muhammad Adeel Pasha and Steven Derrien and Olivier Sentieys", title = "System-Level Synthesis for Wireless Sensor Node Controllers: a Complete Design Flow", journal = j-TODAES, volume = "17", number = "1", pages = "2:1--2:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071358", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Wireless sensor networks (WSN) is a new and very challenging research field for embedded system design automation. Engineering a WSN node hardware platform is known to be a tough challenge, as the design must enforce many severe constraints, among which energy dissipation is by far the most important one. WSN node devices have until now been designed using off-the-shelf low-power microcontroller units (MCUs), even if their power dissipation is still an issue and hinders the widespread use of this new technology. In this work, we propose a complete system-level flow for an alternative approach based on the concept of hardware microtasks, which relies on hardware specialization and power gating to drastically improve the energy efficiency of the computational/control part of the node.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Aksoy:2012:OAM, author = "Levent Aksoy and Eduardo Costa and Paulo Flores and Jose Monteiro", title = "Optimization Algorithms for the Multiplierless Realization of Linear Transforms", journal = j-TODAES, volume = "17", number = "1", pages = "3:1--3:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071359", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article addresses the problem of finding the fewest numbers of addition and subtraction operations in the multiplication of a constant matrix with an input vector---a fundamental operation in many linear digital signal processing transforms. We first introduce an exact common subexpression elimination (CSE) algorithm that formalizes the minimization of the number of operations as a 0-1 integer linear programming problem. Since there are still instances that the proposed exact algorithm cannot handle due to the NP-completeness of the problem, we also introduce a CSE heuristic algorithm that iteratively finds the most common 2-term subexpressions with the minimum conflicts among the expressions.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Leung:2012:PVI, author = "Mario K. Y. Leung and Eric K. I. Chio and Evangeline F. Y. Young", title = "Postplacement Voltage Island Generation", journal = j-TODAES, volume = "17", number = "1", pages = "4:1--4:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071360", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High power consumption will not only shorten the battery life of handheld devices, but also cause thermal and reliability problems. To lower power consumption, one way is to reduce the supply voltage as in multisupply voltage (MSV) designs. In region-based MSV, a circuit will be partitioned into ``voltage islands'' where each island occupies a contiguous physical space and operates at one supply voltage. In the work of Wu et al. [2005], this voltage supply problem is addressed, and the input placement is partitioned into a set of rectangular voltage islands by a slicing structure. However, the constraint of using a slicing structure prohibits better solutions in their approach.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2012:CMI, author = "Hai Wang and Sheldon X.-D. Tan and Ryan Rakib", title = "Compact Modeling of Interconnect Circuits over Wide Frequency Band by Adaptive Complex-Valued Sampling Method", journal = j-TODAES, volume = "17", number = "1", pages = "5:1--5:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071361", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a new model order-reduction method for compact modeling of interconnect circuits over wide frequency band using a novel complex-valued adaptive sampling and error estimation scheme. We address the outstanding error control problems in the existing sampling-based reduction framework over a frequency band. Our new method, WBMOR, explicitly and efficiently computes the exact residual errors to guide the sampling process. We show by sampling along the imaginary axis and performing a new complex-valued reduction that the reduced model will match exactly with the original model at the sample points. Additionally, we show in theory that the proposed method can achieve the error bound over a given frequency range.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2012:RDP, author = "Jing-Wei Lin and Tsung-Yi Ho and Iris Hui-Ru Jiang", title = "Reliability-Driven Power\slash Ground Routing for Analog {ICs}", journal = j-TODAES, volume = "17", number = "1", pages = "6:1--6:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071362", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Electromigration and voltage drop (IR-drop) are two major reliability issues in modern IC design. Electromigration gradually creates permanently open or short circuits due to excessive current densities; IR-drop causes insufficient power supply, thus degrading performance or even inducing functional errors because of nonzero wire resistance. Both types of failure can be triggered by insufficient wire widths. Although expanding the wire width alleviates electromigration and IR-drop, unlimited expansion not only increases the routing cost, but may also be infeasible due to the limited routing resource. In addition, electromigration and IR-drop manifest mainly in the power/ground (P/G) network. Therefore, taking wire widths into consideration is desirable to prevent electromigration and IR-drop at P/G routing.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ioannides:2012:CDT, author = "Charalambos Ioannides and Kerstin I. Eder", title = "Coverage-Directed Test Generation Automated by Machine Learning --- a Review", journal = j-TODAES, volume = "17", number = "1", pages = "7:1--7:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071363", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increasing complexity and size of digital designs, in conjunction with the lack of a potent verification methodology that can effectively cope with this trend, continue to inspire engineers and academics in seeking ways to further automate design verification. In an effort to increase performance and to decrease engineering effort, research has turned to artificial intelligence (AI) techniques for effective solutions. The generation of tests for simulation-based verification can be guided by machine-learning techniques. In fact, recent advances demonstrate that embedding machine-learning (ML) techniques into a coverage-directed test generation (CDG) framework can effectively automate the test generation process, making it more effective and less error-prone.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pan:2012:ERE, author = "Zhaoliang Pan and Melvin A. Breuer", title = "Error Rate Estimation for Defective Circuits via Ones Counting", journal = j-TODAES, volume = "17", number = "1", pages = "8:1--8:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071364", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With VLSI circuit feature size scaling down, it is becoming more difficult and expensive to achieve a desired level of yield. Error-tolerance employs defective chips that occasionally produce erroneous yet acceptable results in targeted applications, and has been proposed as one way to increase effective yield. These chips are characterized by criteria set by specific applications. Error rate, an upper-bound on how frequent errors occur at an output, is one such criterion. In this article we focus on the following problem: given a combinational logic circuit that is defective, and hence occasionally produces an erroneous output, how can we determine the error rate of each output line by using ones counting?", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Peng:2012:SSE, author = "Huan-Kai Peng and Hsuan-Ming Huang and Yu-Hsin Kuo and Charles H.-P. Wen", title = "Statistical Soft Error Rate {(SSER)} Analysis for Scaled {CMOS} Designs", journal = j-TODAES, volume = "17", number = "1", pages = "9:1--9:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071365", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article re-examines the soft error effect caused by radiation-induced particles beyond the deep submicron regime. Considering the impact of process variations, voltage pulse widths of transient faults are found no longer monotonically diminishing after propagation, as they were formerly. As a result, the soft error rates in scaled electronic designs escape traditional static analysis and are seriously underestimated. In this article we formulate the statistical soft error rate (SSER) problem and present two frameworks to cope with the aforementioned sophisticated issues. The table-lookup framework captures the change of transient-fault distributions implicitly by using a Monte-Carlo approach, whereas the SVR-learning framework does the task explicitly by using statistical learning theory.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gong:2012:FNM, author = "Fang Gong and Xuexin Liu and Hao Yu and Sheldon X. D. Tan and Junyan Ren and Lei He", title = "A Fast Non-{Monte-Carlo} Yield Analysis and Optimization by Stochastic Orthogonal Polynomials", journal = j-TODAES, volume = "17", number = "1", pages = "10:1--10:??", month = jan, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2071356.2071366", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 26 16:38:42 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Performance failure has become a significant threat to the reliability and robustness of analog circuits. In this article, we first develop an efficient non-Monte-Carlo (NMC) transient mismatch analysis, where transient response is represented by stochastic orthogonal polynomial (SOP) expansion under PVT variations and probabilistic distribution of transient response is solved. We further define performance yield and derive stochastic sensitivity for yield within the framework of SOP, and finally develop a gradient-based multiobjective optimization to improve yield while satisfying other performance constraints. Extensive experiments show that compared to Monte Carlo-based yield estimation, our NMC method achieves up to 700X speedup and maintains 98\% accuracy.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2012:ESF, author = "Meng-Huan Wu and Peng-Chih Wang and Cheng-Yang Fu and Ren-Song Tsay", title = "An Extended {SystemC} Framework for Efficient {HW\slash SW} Co-Simulation", journal = j-TODAES, volume = "17", number = "2", pages = "11:1--11:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159543", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose an extended SystemC framework that directly enables software simulation in SystemC. Although SystemC has been widely adopted for system-level simulation of hardware designs nowadays, to complete HW/SW co-simulation, it still requires an additional instruction set simulator (ISS) for software execution. However, the heavy intercommunication overheads between the two heterogeneous simulators would significantly slow down simulation performance. To deal with this issue, our proposed approach automatically generates high-speed and equivalent SystemC models for target software applications that can be directly integrated with hardware models for complete HW/SW co-simulation. In addition, to properly handle multitasking, an efficient OS model is devised to support accurate preemptive scheduling. Since both the generated application model and the OS model are constructed in SystemC modules, our approach avoids heavy intercommunication overheads and achieves over 1,000 times faster simulation than that of the conventional ISS-SystemC approach. Experimental results demonstrate that our extended SystemC approach can perform at 50 to 220 MIPS while offering accurate simulation results.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhou:2012:ONC, author = "Pingqiang Zhou and Ping-Hung Yuh and Sachin S. Sapatnekar", title = "Optimized {$3$D} Network-on-Chip Design Using Simulated Allocation", journal = j-TODAES, volume = "17", number = "2", pages = "12:1--12:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159544", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Three-dimensional (3D) silicon integration technologies have provided new opportunities for Network-on-Chip (NoC) architecture design in Systems-on-Chip (SoCs). In this article, we consider the application-specific NoC architecture design problem in a 3D environment. We present an efficient floorplan-aware 3D NoC synthesis algorithm based on simulated allocation (SAL), a stochastic method for traffic flow routing, and accurate power and delay models for NoC components. We demonstrate that this method finds greatly improved solutions compared to a baseline algorithm reflecting prior work. To evaluate the SAL method, we compare its performance with the widely used simulated annealing (SA) method and show that SAL is much faster than SA for this application, while providing solutions of very similar quality. We then extend the approach from a single-path routing to a multipath routing scheme and explore the trade-off between power consumption and runtime for these two schemes. Finally, we study the impact of various factors on the network performance in 3D NoCs, including the TSV count and the number of 3D tiers. Our studies show that link power and delay can be significantly improved when moving from a 2D to a 3D implementation, but the improvement flattens out as the number of 3D tiers goes beyond a certain point.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sun:2012:PTA, author = "Guangyu Sun and Huazhong Yang and Yuan Xie", title = "Performance\slash Thermal-Aware Design of {$3$D}-Stacked {L2} Caches for {CMPs}", journal = j-TODAES, volume = "17", number = "2", pages = "13:1--13:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159545", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Three-dimensional (3D) stacking technology enables integration of more memory on top of chip multiprocessors (CMPs). As the number of cores and the capacity of on-chip memory increase, the Non-Uniform Cache Architecture (NUCA) becomes more attractive. Compared to 2D cases, 3D stacking provides more options for the design of on-chip memory due to numerous advantages, such as the extra layout dimension, low latency across layers, etc. On the other hand, 3D stacking aggravates the thermal problem due to the increase of power density. In this work, we first study the design of 3D-stacked set-associative L2 caches through managing the placement of cache ways. The evaluation results show that the placement and corresponding management of 3D cache ways have an impact on the performance of CMPs. Then, we show that the efficiency of thermal control is also related to the placement of cache ways. For caches implemented with different memory technologies, the placement and management of cache ways have different effects on power consumption and power distribution. Consequently, we propose techniques to improve the efficiency of thermal control for different memory technologies. The evaluation results show the trade-off between performance and thermal control efficiency.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2012:TAS, author = "Chin-Hsien Wu and Hsin-Hung Lin", title = "Timing Analysis of System Initialization and Crash Recovery for a Segment-Based Flash Translation Layer", journal = j-TODAES, volume = "17", number = "2", pages = "14:1--14:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159546", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recently, the capacity of flash-memory storage systems has grown rapidly, and flash-memory technology has advanced along with the wave of consumer electronics and embedded systems. In order to properly manage product cost and initialization performance, vendors face serious challenges in system design and analysis. Thus, the timing analysis of system initialization and crash recovery for a segment-based flash translation layer has become an important research topic. This article focuses on system initialization, crash recovery, and timing analysis. The timing analysis of system initialization involves the relationship between the size of the main memory and the system initialization time. The timing analysis of crash recovery explains the worst case recovery time. The experiments in this study show that the timing analysis of system initialization and crash recovery can be applied to the segment-based flash translation layer.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Milder:2012:CGH, author = "Peter Milder and Franz Franchetti and James C. Hoe and Markus P{\"u}schel", title = "Computer Generation of Hardware for Linear Digital Signal Processing Transforms", journal = j-TODAES, volume = "17", number = "2", pages = "15:1--15:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159547", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Linear signal transforms such as the discrete Fourier transform (DFT) are very widely used in digital signal processing and other domains. Due to high performance or efficiency requirements, these transforms are often implemented in hardware. This implementation is challenging due to the large number of algorithmic options (e.g., fast Fourier transform algorithms or FFTs), the variety of ways that a fixed algorithm can be mapped to a sequential datapath, and the design of the components of this datapath. The best choices depend heavily on the resource budget and the performance goals of the target application. Thus, it is difficult for a designer to determine which set of options will best meet a given set of requirements. In this article we introduce the Spiral hardware generation framework and system for linear transforms. The system takes a problem specification as input as well as directives that define characteristics of the desired datapath. Using a mathematical language to represent and explore transform algorithms and datapath characteristics, the system automatically generates an algorithm, maps it to a datapath, and outputs a synthesizable register transfer level Verilog description suitable for FPGA or ASIC implementation. The quality of the generated designs rivals the best available handwritten IP cores.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Weng:2012:TOS, author = "Shih-Hung Weng and Yu-Min Kuo and Shih-Chieh Chang", title = "Timing Optimization in Sequential Circuit by Exploiting Clock-Gating Logic", journal = j-TODAES, volume = "17", number = "2", pages = "16:1--16:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159548", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Clock gating is a popular technique for reducing power dissipation. In a circuit with clock gating, the clock signal can be shut off without changing the functionality under certain clock-gating conditions. In this article, we observe that the clock-gating conditions and the next-state function of a Flip-Flop (FF) are correlated and can be used for sequential circuit optimization. We also show that the implementation of the next-state function of any FF can be just an inverter if the clock signal is appropriately gated. By exploiting the flexibility between the clock-gating conditions and the next-state function, we propose an iterative optimization algorithm to improve the timing of sequential circuits. We present experimental results of a set of benchmark circuits with a timing improvement of 10.20\% on average.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kurimoto:2012:YRI, author = "Masanori Kurimoto and Jun Matsushima and Shigeki Ohbayashi and Yoshiaki Fukui and Michio Komoda and Nobuhiro Tsuda", title = "A Yield and Reliability Improvement Methodology Based on Logic Redundant Repair with a Repairable Scan Flip-Flop Designed by Push Rule", journal = j-TODAES, volume = "17", number = "2", pages = "17:1--17:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159549", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a yield improvement methodology which repairs a faulty chip due to logic defect by using a repairable scan flip-flop (R-SFF). Our methodology improves area penalty, which is a large issue for logic repair technology in actual products, by using repair grouping and a redundant cell insertion algorithm and by pushing the design rule for the repairable area of R-SFF. Additionally, compared with the conventional method, we reduce the number of wire connections around redundant cells by improving the replacement method of the faulty cell by the redundant cell. The proposed methodology reduces the total area penalty caused by the logic redundant repair to 3.6\% and improves the yield, that is the number of good chips on a wafer, by 4.7\% when the defect density is 1.0[1/cm$^2$]. Furthermore, we propose the strategy to repair the in-field failures due to latent defect for the chip whose repair function had not been used in the shipment test.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xiang:2012:SFF, author = "Dong Xiang and Zhen Chen and Laung-Terng Wang", title = "Scan Flip-Flop Grouping to Compress Test Data and Compact Test Responses for Launch-on-Capture Delay Testing", journal = j-TODAES, volume = "17", number = "2", pages = "18:1--18:??", month = apr, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2159542.2159550", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 20 17:41:41 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Test data compression is a much more difficult problem for launch-on-capture (LOC) delay testing, because test data for LOC delay testing is much more than that of stuck-at fault testing, and LOC delay fault test generation in the two-frame circuit model can specify many more inputs. A new scan architecture is proposed to compress test stimulus data, compact test responses, and reduce test application time for LOC delay fault testing. The new scan architecture merges a number of scan flip-flops into the same group, where all scan flip-flops in the same group are assigned the same values for all test pairs. Sufficient conditions are presented for including any pair of scan flip-flops into the same group for LOC transition, non-robust path delay, and robust path delay fault testing. Test data for LOC delay testing based on the new scan architecture can be compressed significantly. Test application time can also be reduced greatly. Sufficient conditions are presented to construct a test response compactor for LOC transition, non-robust, and robust path delay fault testing. Folded scan forest and test response compactor are constructed for further test data compression. Sufficient experimental results are presented to show the effectiveness of the method.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ray:2012:ISS, author = "Sandip Ray and Jayanta Bhadra and Magdy S. Abadir and Li-C. Wang and Aarti Gupta", title = "Introduction to special section on verification challenges in the concurrent world", journal = j-TODAES, volume = "17", number = "3", pages = "19:1--19:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209292", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Verbeek:2012:TFV, author = "Freek Verbeek and Julien Schmaltz", title = "Towards the formal verification of cache coherency at the architectural level", journal = j-TODAES, volume = "17", number = "3", pages = "20:1--20:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209293", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Cache coherency is one of the major issues in multicore systems. Formal methods, in particular model-checking, have been successful at verifying high-level protocols, but, to the best of our knowledge, the verification of cache coherency at the architectural level is still an open issue. All existing verification efforts assume a reliable interconnect, that is, messages eventually reach their destination. We discuss the challenge of discharging this assumption at the architectural level where implementation details of the interconnect are mixed with a cache coherency protocol. Our automatic approach is based on a well-defined set of primitives to express architectural models, a generic model of communication fabrics expressed in an automated theorem proving system, and a dedicated algorithm for deadlock and livelock detection. We argue that reliability depends on the interaction between the interconnect and the cache coherency protocol. They must be verified altogether as their combination creates intricate message dependencies. We sketch our verification approach and apply it to a simple write-invalidate protocol on the Spidergon network-on-chip from STMicroelectronics. Our approach is promising. For this simple protocol, networks with tens of agents and hundreds of components can be analyzed within seconds.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Holt:2012:FLP, author = "Jim Holt and Jaideep Dastidar and David Lindberg and John Pape and Peng Yang", title = "A full lifecycle performance verification methodology for multicore systems-on-chip", journal = j-TODAES, volume = "17", number = "3", pages = "21:1--21:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209294", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicore Systems-on-Chip (MCSoC) are comprised of a rich set of processor cores, specialized hardware accelerators, and I/O interfaces. Functional verification of these complex designs is a critical and demanding task, however, focusing only on functional verification is very risky because the motivation for building such systems in the first place is to achieve high levels of system throughput. Therefore a functionally correct MCSoC that does not exhibit sufficient performance will fail in the market. In addition, limiting performance verification efforts to analyzing individual system components in isolation is insufficient due to: (1) the degree of system-level resource contention that an application domain imposes on the MCSoC, and (2) the degree of configuration flexibility that is typically afforded by an MCSoC. These factors motivate system-level performance verification of MCSoC. This article presents an important industrial case study of MCSoC performance verification involving both pre- and postsilicon analysis, highlighting the methodology used, the lessons learned, and recommendations for improvement.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Elwakil:2012:DRM, author = "Mohamed Elwakil and Zijiang Yang", title = "Deterministic replay for message-passing-based concurrent programs", journal = j-TODAES, volume = "17", number = "3", pages = "22:1--22:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209295", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Multicore Communications API (MCAPI) is a new message-passing API that was released by the Multicore Association. MCAPI provides an interface designed for closely distributed embedded systems with multiple cores on a chip and/or chips on a board. Similar to parallel programs in other domains, debugging MCAPI programs is a challenging task due to their nondeterministic behavior. In this article we present a tool that is capable of deterministically replaying MCAPI program executions, which provides valuable insight for MCAPI developers in case of failure.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Deniz:2012:VCM, author = "Etem Deniz and Alper Sen and Jim Holt", title = "Verification and coverage of message passing multicore applications", journal = j-TODAES, volume = "17", number = "3", pages = "23:1--23:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209296", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We describe verification and coverage methods for multicore software that uses message passing libraries for communication. Specifically, we provide techniques to improve reliability of software using the new industry standard MCAPI by the Multicore Association. We develop dynamic predictive verification techniques that allow us to find actual and potential errors in a multicore software. Some of these error types are deadlocks, race conditions, and violation of temporal assertions. We complement our verification techniques with a mutation-testing-based coverage metric. Coverage metrics enable measuring the quality of verification tests. We implemented our techniques in tools and validated them on several multicore programs that use the MCAPI standard. We implement our techniques in tools and experimentally show the effectiveness of our approach. We find errors that are not found using traditional dynamic verification techniques and we can potentially explore execution schedules different than the original program with our coverage tool. This is the first time such predictive verification and coverage metrics have been developed for MCAPI.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Qin:2012:DTG, author = "Xiaoke Qin and Prabhat Mishra", title = "Directed test generation for validation of multicore architectures", journal = j-TODAES, volume = "17", number = "3", pages = "24:1--24:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209297", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Functional validation is widely acknowledged as a major challenge for multicore architectures. Directed tests are promising since a significantly smaller number of directed tests can achieve the same coverage goal compared to constrained-random tests. SAT-based bounded model checking is effective for automated generation of directed tests (counterexamples). While existing approaches focus on clause forwarding between different bounds to reduce the test generation time, this article proposes a novel technique that exploits temporal, structural, and spatial symmetry in multicore designs at the same time. Our proposed technique enables the reuse of the knowledge learned from one core to the remaining cores in multicore architectures (structural symmetry), from one bound to the next for a give property (temporal symmetry), as well as from one property to other properties (spatial symmetry). The experimental results demonstrate that our approach can significantly (3--10 times) reduce overall test generation time compared to existing approaches.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Singh:2012:TRT, author = "Padmaraj Singh and Vijaykrishnan Narayanan and David L. Landis", title = "Targeted random test generation for power-aware multicore designs", journal = j-TODAES, volume = "17", number = "3", pages = "25:1--25:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209298", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicore Register Transfer Level (RTL) model simulations are indispensable in exposing subtle memory subsystem bugs. Validating memory consistency, coherency, and atomicity is a crucial design verification task. Random MultiProcessor (MP) test generators play critical roles in pre- and post-silicon validation. The Advanced Configuration and Power Interface (ACPI) standard supports dynamic frequency and voltage scaling by controlling performance states (P-States), yet multicore verification is generally conducted with cores operating at the P0-State. Independently varying core frequencies introduces new sets of intracore and intercore traffic latencies. The article introduces targeted random MP test generation techniques for multicore P-State functional verification. It develops a simple coverage metric to evaluate MP test effectiveness. The metric is demonstrated on MIP's instruction-set-based random MP tests. A novel technique is introduced to modulate the test address space by the spherical Bessel function. The technique delivers an order of magnitude coverage improvement over completely random tests. The article then outlines minimal P-State combinations to be exercised by MP tests. It also formulates two new methodologies to set up and apply MP tests for effective multicore P-State coverage. The methodologies are termed SimInit and SimTransition. First-level analyses indicate that these methods can deliver 97\% to 100\% improvement over random MP test coverage.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jang:2012:AAA, author = "Wooyoung Jang and David Z. Pan", title = "{A3MAP}: Architecture-aware analytic mapping for networks-on-chip", journal = j-TODAES, volume = "17", number = "3", pages = "26:1--26:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209299", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose novel and global Architecture-Aware Analytic MAPping (A3MAP) algorithms applied to Networks-on-Chip (NoCs) not only with homogeneous Processing Elements (PEs) on a regular mesh network as done by most previous application mapping algorithms but also with heterogeneous PEs on an irregular mesh or custom network. As the main contributions, we develop a simple yet efficient interconnection matrix that can easily model any core graph and network. Then, an application mapping problem is exactly formulated to Mixed Integer Quadratic Programming (MIQP). Since MIQP is NP-hard, we propose two effective heuristics, a successive relaxation algorithm achieving short runtime, called A3MAP-SR and a genetic algorithm achieving high mapping quality, called A3MAP-GA. We also propose a partition-based application mapping approach for large-scale NoCs, which provides better trade-off between performance and runtime. Experimental results show that A3MAP algorithms reduce total hop count, compared to the previous application mapping algorithms optimized for a regular mesh network, called NMAP [Murali and Micheli 2004] and for an irregular mesh and custom network, called CMAP [Tornero et al. 2008]. Furthermore, A3MAP algorithms make packets travel shorter distance than CMAP, which is related to energy consumption.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Foroozannejad:2012:PBM, author = "Mohammad H. Foroozannejad and Trevor Hodges and Matin Hashemi and Soheil Ghiasi", title = "Postscheduling buffer management trade-offs in streaming software synthesis", journal = j-TODAES, volume = "17", number = "3", pages = "27:1--27:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209300", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Streaming applications, which are abundant in many disciplines such as multimedia, networking, and signal processing, require efficient processing of a seemingly infinite sequence of input data. In the context of streaming software synthesis from data flow graphs, we study the inherent trade-off between memory requirement and compilation runtime, under a given task firing schedule. We utilize postscheduling analysis granularity to control the amount of details in characterization of buffer's spatio-temporal footprints. Subsequently, we transform the buffer allocation problem to two-dimensional packing of polygons, where complexity of the packing problem (e.g., polygon shapes) is determined by the analysis granularity. We develop an evolutionary packing optimization algorithm which readily yields buffer allocations. Experimental results highlight the trade-off between complexity of the analysis and the total buffer size of generated implementations. In addition, they show dramatic improvements in total buffer size, if one is willing to pay the additional cost in optimization runtime.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Salamy:2012:ISA, author = "Hassan Salamy and J. Ramanujam", title = "An {ILP} solution to address code generation for embedded applications on digital signal processors", journal = j-TODAES, volume = "17", number = "3", pages = "28:1--28:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209301", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Digital Signal Processors (DSPs) are a family of embedded processors designed under tight memory, area, and cost constraints. Many DSPs use irregular addressing modes where base-plus-offset mode is not supported. However, they often have Address Generation Units (AGUs) that can perform auto-increment/decrement address arithmetic instructions in parallel with Load/Store instructions. This feature can be utilized to reduce the number of explicit address arithmetic instructions and thus reduce the embedded application code size. This code size reduction is essential for this family of DSP as the code usually resides in the ROM and hence the code size directly translates into silicon area. An effective technique for optimized code generation is offset assignment. This is a well-used technique in the literature to decrease the code size by finding an offset assignment that can effectively utilize auto-increment/decrement. This problem is known as simple offset assignment when there is only one address register and as General Offset Assignment (GOA) for multiple available address registers. In this article, we present an optimal Integer Linear Programming (ILP) solution to the offset assignment problem with variable coalescing where more than one variable can share the same memory location. Variable permutation is also formulated to find the best access sequence to achieve the best offset assignment that decreases the code size the most. Experimental results on several benchmarks show the effectiveness of our variable permutation technique as well as the large improvement from the ILP-based solutions compared to heuristics.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Schafer:2012:DCH, author = "Benjamin Carrion Schafer and Kazutoshi Wakabayashi", title = "Divide and conquer high-level synthesis design space exploration", journal = j-TODAES, volume = "17", number = "3", pages = "29:1--29:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209302", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A method to accelerate the Design Space Exploration (DSE) of behavioral descriptions for high-level synthesis based on a divide and conquer method called Divide and Conquer Exploration Algorithm (DC-ExpA) is presented. DC-ExpA parses an untimed behavioral description given in C or SystemC and clusters interdependent operations which are in turn explored independently by inserting synthesis directives automatically in the source code. The method then continues by combining the exploration results to obtain only Pareto-optimal designs. This method accelerates the design space exploration considerably and is compared against two previous methods: an Adaptive Simulated Annealer Exploration Algorithm (ASA-ExpA) that shows good optimality at high runtimes, and a pattern matching method called Clustering Design Space Exploration Acceleration (CDS-ExpA) that is fast but suboptimal. Our proposed method is orthogonal to previous exploration methods that focus on the exploration of resource constraints, allocation, binding, and/or scheduling. Our proposed method on contrary sets local synthesis directives that decide upon the overall architectural structure of the design (e.g., mapping certain arrays to memories or registers). Results show that DC-ExpA explores the design space on average 61\% faster than ASA-ExpA, obtaining comparable results indicated by several quality indicators, for example, distance to reference Pareto-front, hypervolume, and Pareto dominance. Compared to CDS-ExpA it is 69\% slower, but obtains much betters results compared to the same quality indicators.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Karfa:2012:FVC, author = "Chandan Karfa and Chittaranjan Mandal and Dipankar Sarkar", title = "Formal verification of code motion techniques using data-flow-driven equivalence checking", journal = j-TODAES, volume = "17", number = "3", pages = "30:1--30:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209303", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A formal verification method for checking correctness of code motion techniques is presented in this article. Finite State Machine with Datapath (FSMD) models have been used to represent the input and the output behaviors of each synthesis step. The method introduces cutpoints in one FSMD, visualizes its computations as concatenation of paths from cutpoints to cutpoints, and then identifies equivalent finite path segments in the other FSMD; the process is then repeated with the FSMDs interchanged. Unlike many other reported techniques, the method is capable of verifying both uniform and nonuniform code motion techniques. It has been underlined in this work that for nonuniform code motions, identifying equivalent path segments involves model checking of some data-flow properties. Our method automatically identifies the situations where such properties are needed to be checked during equivalence checking, generates the appropriate properties, and invokes the model checking tool NuSMV to verify them. The correctness and the complexity of the method have been dealt with. Experimental results demonstrate the effectiveness of the method.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Linehan:2012:MDA, author = "{\'E}amonn Linehan and Eamonn O'Toole and Siobh{\'a}n Clarke", title = "Model-driven automation for simulation-based functional verification", journal = j-TODAES, volume = "17", number = "3", pages = "31:1--31:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209304", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Developing testbenches for dynamic functional verification of hardware designs is a software-intensive process that lies on the critical path of electronic system design. The increasing capabilities of electronic components is contributing to the construction of complex verification environments that are increasingly difficult to understand, maintain, extend, and reuse across projects. Model-driven software engineering addresses issues of complexity, productivity, and code quality through the use of high-level system models and subsequent automatic transformations. Reasoning about verification testbench decomposition becomes simpler at higher levels of abstraction. In particular, the aspect-oriented paradigm, when applied at the model level, can minimize the overlap in functionality between modules, improving maintainability and reusability. This article presents an aspect-oriented model-driven engineering process and toolset for the development of hardware verification testbenches. We illustrate how this process and toolset supports modularized design and automatic transformation to verification environment-specific models and source code through an industry case study.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Qian:2012:FPS, author = "Haifeng Qian and Sachin S. Sapatnekar and Eren Kursun", title = "{Fast Poisson Solvers} for thermal analysis", journal = j-TODAES, volume = "17", number = "3", pages = "32:1--32:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209305", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurate and efficient thermal analysis for a VLSI chip is crucial, both for sign-off reliability verification and for design-time circuit optimization. To determine an accurate temperature profile, it is important to simulate a die together with its thermal mounts: this requires solving Poisson's equation on a nonrectangular 3D domain. This article presents a class of eigendecomposition-based Fast Poisson Solvers (FPS) for chip-level thermal analysis. We start with a solver that solves a rectangular 3D domain with mixed boundary conditions in $ O(N \cdot \log N) $ time, where $N$ is the dimension of the finite difference matrix. Then we reveal, for the first time in the literature, a strong relation between fast Poisson solvers and Green-function-based methods. Finally, we propose an FPS method that leverages the preconditioned conjugate gradient method to solve nonrectangular 3D domains efficiently. We demonstrate this approach on thermal analysis of an industrial microprocessor, showing accurate results verified by a commercial tool, and that it solves a system of dimension 4.54e6 in only 13 conjugate gradient iterations, with a runtime of 65 seconds, a 15X speedup over the popular ICCG solver.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Guthaus:2012:HPC, author = "Matthew R. Guthaus and Xuchu Hu and Gustavo Wilke and Guilherme Flach and Ricardo Reis", title = "High-performance clock mesh optimization", journal = j-TODAES, volume = "17", number = "3", pages = "33:1--33:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209306", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Clock meshes are extremely effective at producing low-skew regional clock networks that are tolerant of environmental and process variations. For this reason, clock meshes are used in most high-performance designs, but this robustness consumes significant power. In this work, we present two techniques to optimize high-performance clock meshes. The first technique is a mesh perturbation methodology for nonuniform mesh routing. The second technique is a skew-aware buffer placement through iterative buffer deletion. We demonstrate how these optimizations can achieve significant power reductions and a near elimination of short-circuit power. In addition, the total wire length is decreased, the number of required buffers is decreased, and both skew and robustness are improved on average when variation is considered.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2012:LBC, author = "Kuan-Yu Lin and Hong-Ting Lin and Tsung-Yi Ho and Chia-Chun Tsai", title = "Load-balanced clock tree synthesis with adjustable delay buffer insertion for clock skew reduction in multiple dynamic supply voltage designs", journal = j-TODAES, volume = "17", number = "3", pages = "34:1--34:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209307", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power consumption is known to be a crucial issue in current IC designs. To tackle this problem, Multiple Dynamic Supply Voltage (MDSV) designs are proposed as an efficient solution for power savings. However, the increasing variability of clock skew during the switching of power modes leads to an increase in the complication of clock skew reduction in MDSV designs. In this article, we propose a load-balanced clock tree synthesizer with Adjustable Delay Buffer (ADB) insertion for clock skew reduction in MDSV designs. The clock tree synthesizer adopts the Minimum Spanning Tree (MST) metric to estimate the interconnect capacitance and execute the graph-theoretic clustering. The power-mode-guided optimization is also embedded into the clock tree synthesizer for improving additional area overhead in the step of ADB insertion. After constructing the initial buffered clock tree, we insert the ADBs with delay value assignments to reduce clock skew in MDSV designs. The ADBs can be used to produce additional delays, hence the clock latencies and skew become tunable in a clock tree. An efficient algorithm of ADB insertion for the minimization of clock skew, area, and runtime in MDSV designs has been presented. Comparing with the state-of-the-art algorithm of ADB insertion, experimental results show maximum 42.40\% area overhead improvement. With the power-mode-guided optimization, the maximum improvement of area overhead can increase to 47.87\%.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2012:FHA, author = "Chien-Nan Jimmy Liu and Yen-Lung Chen and Chin-Cheng Kuo and I-Ching Tsai", title = "A fast heuristic approach for parametric yield enhancement of analog designs", journal = j-TODAES, volume = "17", number = "3", pages = "35:1--35:??", month = jun, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2209291.2209308", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jul 31 16:58:51 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In traditional yield enhancement approaches, a lot of computation efforts have to be paid first to find the feasible regions and the Pareto fronts, which will become a heavy cost for large analog circuits. In order to reduce the computation efforts, this article proposes a fast heuristic approach that tries to finish all iteration steps of the yield enhancement flow at behavior level. First, a novel force-directed Nominal Point Moving (NPM) algorithm is proposed to find a better nominal point without building the feasible regions. Then, an equation-based behavior-level sizing approach is proposed to map the NPM results at performance level to behavior-level parameters. A fast behavior-level Monte Carlo simulation is also proposed to shorten the iterative yield enhancement flow. Finally, using the obtained behavioral parameters as the sizing targets of each subblock, the device sizing time is significantly reduced instead of sizing from the system-level specifications directly. As demonstrated on several analog circuits, this heuristic approach could be another efficient methodology to help designers improve their analog circuits toward better yield.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tu:2012:MFS, author = "Chia-Heng Tu and Shih-Hao Hung and Tung-Chieh Tsai", title = "{MCEmu}: a Framework for Software Development and Performance Analysis of Multicore Systems", journal = j-TODAES, volume = "17", number = "4", pages = "36:1--36:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348840", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Developing software for heterogeneous multicore systems is particularly challenging even for experienced developers. While emulators have proven useful to application development, very few heterogeneous multicore emulators have been made available by vendors so far, as building an emulator for a heterogeneous multicore system has been a time-consuming and difficult task. Thus, we proposed a framework, called MCEmu, to speed up the process of building a heterogeneous multicore emulator by integrating existing and/or new processor emulators. MCEmu is designed to help system and application development, with a basic multicore board support package, an interprocessor communication library, and tools for debugging, tracing, and performance monitoring. In addition, MCEmu can run on a multicore host system to accelerate the emulation of data parallel applications. We show that MCEmu can be very useful for developing system software before the system becomes available, as it has helped us catch numerous functional and performance bugs which could have been hard to find. In this article, we present the design of MCEmu and demonstrate its capabilities with our case studies.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Alizadeh:2012:FVD, author = "Bijan Alizadeh", title = "Formal Verification and Debugging of Precise Interrupts on High Performance Microprocessors", journal = j-TODAES, volume = "17", number = "4", pages = "37:1--37:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348841", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increased parallelism provided by Out-Of-Order (OOO) and superscalar mechanisms have made the control portion of advanced processors more complicated so that the state-of-the-art formal verification techniques for Register-Transfer-Level (RTL) and gate-level designs cannot scale to the complexity of such complicated processors. Moreover, verification and debugging of exceptions and external interrupts on such processors are nontrivial tasks. Because the exceptions arrival time, the external interrupt arrival time, as well as the microprocessor response time must be precise, verification and debugging require sophisticated hardware and software capabilities. This article proposes techniques for effective verification and debugging of cycle-accurate OOO processors in the event of exceptions and external interrupts. The results show that our techniques reduce the complexity of the verification and debugging processes by reducing the number of simulation cycles (3.3 $ \times $ average reduction) and the number of state variables (8.7 $ \times $ average reduction) to be traced for localizing bugs.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mukherjee:2012:SAA, author = "Subhankar Mukherjee and Pallab Dasgupta and Siddhartha Mukhopadhyay and Scott Little and John Havlicek and Srikanth Chandrasekaran", title = "Synchronizing {AMS} Assertions with {AMS} Simulation: From Theory to Practice", journal = j-TODAES, volume = "17", number = "4", pages = "38:1--38:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348842", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The verification community anticipates the adoption of assertions in the Analog and Mixed-Signal (AMS) domain in the near future. Several questions need to be answered before AMS assertions are brought into practice, such as: (a) How will the languages for AMS assertions be different from the ones in the digital domain? (b) Does the analog simulator have to be assertion aware? (c) If so, then how and where on the time line will the AMS assertion checker synchronize with the analog simulator? and (d) What will be the performance penalty for monitoring AMS assertions accurately over analog simulation? This article attempts to answer these questions through theoretical analysis and empirical results obtained from industrial test cases. We study logics which extend Linear Temporal Logic (LTL) with predicates over real variables, and show that further extensions allowing the binding of real-valued variables across time makes the logic undecidable. We present a toolkit which can integrate with existing AMS simulators for checking AMS assertions on practical designs. We study the problem of synchronizing the AMS simulator with the AMS assertion checker and demonstrate the performance penalty of different synchronization options.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2012:RSP, author = "Hai Lin and Yunsi Fei", title = "Resource Sharing of Pipelined Custom Hardware Extension for Energy-Efficient Application-Specific Instruction Set Processor Design", journal = j-TODAES, volume = "17", number = "4", pages = "39:1--39:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348843", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Application-Specific Instruction set Processor (ASIP) has become an increasingly popular platform for embedded systems because of its high performance, flexibility, and short turn-around time. The hardware extension in ASIPs can speed-up program execution. However, it also incurs area overhead and extra static energy consumption. Traditional datapath merging techniques reduce the circuit overhead by reusing hardware modules for executing multiple operations. However, they introduce structural hazard for multiple custom instructions in sequence, and hence reduce the performance improvement. In this article, we introduce a pipelined configurable structure for the hardware extension in ASIPs, so that structural hazards can be remedied. With multiple subgraphs of operations selected, we design a novel operation-to-hardware mapping algorithm based on Integer Linear Programming (ILP) to automatically construct a resource-efficient pipelined configurable functional unit. Different resource sharing schemes would affect both the hardware overhead and the overall performance improvement. We analyze the design trade-offs between resource efficiency and performance improvement. At the end, we present our design space exploration results by setting the optimization objective to area, area and delay, and delay respectively.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2012:HSC, author = "Hai Lin and Tiansi Hu and Yunsi Fei", title = "A Hardware\slash Software Cooperative Custom Register Binding Approach for Register Spill Elimination in Application-Specific Instruction Set Processors", journal = j-TODAES, volume = "17", number = "4", pages = "40:1--40:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348844", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Application-Specific Instruction set Processor (ASIP) has become an important design choice for embedded systems. It can achieve both high flexibility offered by the base processor core and high performance and energy efficiency offered by the dedicated hardware extensions. Although a lot of efforts have been devoted to computation acceleration, for example, automatic custom instruction identification and synthesis, limited on-chip data storage elements including the register file and data cache have become a potential performance bottleneck. For custom instructions that have more inputs and/or outputs than the generic register file I/O ports, custom registers are added in ASIPs to satisfy the need of additional inputs and outputs, and traditionally they are used only by custom instructions. In this article, we propose a hardware/software cooperative approach with a linear scan register allocation algorithm, which allows base instructions to utilize the existing custom registers in ASIPs for eliminating register spills of the program. The data traffic between the base processor and off-chip memory can be replaced with energy-efficient on-chip communications between the processor core and custom hardware extensions. Our experimental results demonstrate that a significant performance gain can be achieved, orthogonal to improvements by other techniques in ASIP design.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2012:BOD, author = "An-Ping Wang and Jiwon Hahn and Mahshid Roumi and Pai H. Chou", title = "Buffer Optimization and Dispatching Scheme for Embedded Systems with Behavioral Transparency", journal = j-TODAES, volume = "17", number = "4", pages = "41:1--41:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348845", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a buffer minimization scheme with low dispatching overhead for embedded software processes. To accomplish this, we exploit behavioral transparency in the model of computation. In such a model (e.g., synchronous dataflow), the state of buffer requirements is determined completely by the firing sequence of the actors without requiring functional simulation of the actors. Fine-grained buffer allocation incurs high and code pointer overhead while coarse-grained allocation suffers from memory fragmentation. Instead, we propose a medium-grained, ``access-contiguous'' buffer allocation scheme that minimizes the total buffer space and pointer overhead. We formulate the buffer allocation problem as 2D tiles that represent the lifetime of the buffers to minimize their memory occupation spatially and temporally. Experimental results show that our scheme uses less data memory than existing techniques by 26\% on average, or up to 57\% in the best case. Our technique retains code modularity for dynamic configuration and, more importantly, enables many more applications that otherwise would not fit if implemented using previous state-of-the-art techniques.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gately:2012:AJO, author = "Matthew B. Gately and Mark B. Yeary and Choon Yik Tang", title = "An Algorithm for Jointly Optimizing Quantization and Multiple Constant Multiplication", journal = j-TODAES, volume = "17", number = "4", pages = "42:1--42:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348846", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a joint framework for quantization and Multiple Constant Multiplication (MCM) optimization, which yields a computationally efficient implementation of multiplierless multiplication in hardware and software. Frameworks of this nature have been developed in the context of Finite Impulse Response (FIR) filters, where frequency response specifications are used to drive the design. In this work, we look at a general case, considering as given a vector of ideal, real constants, which may come from any application and do not necessarily represent FIR filter coefficients. We first formulate a joint optimization problem for finding a fixed-point vector and a shift-add network that are optimal in terms of quantization error and MCM complexity. We then describe ways to finitize and prune the search space, leading to an efficient algorithm called JOINT\_SOLVE that solves the problem. Finally, via extensive randomized experiments, we show that our joint framework is notably more computationally efficient than a disjointed one, reducing the MCM cost by 15\%--60\% on moderate size problems.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2012:SAH, author = "Yonghwan Kim and Sanghoon Kwak and Taewhan Kim", title = "Synthesis of Adaptable Hybrid Adders for Area Optimization under Timing Constraint", journal = j-TODAES, volume = "17", number = "4", pages = "43:1--43:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348847", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Satisfying the timing constraint is the utmost concern in the integrated circuit design and it is true that most critical timing paths in a circuit cover one or more arithmetic components such as adder, subtractor, and multiplier of which addition logic is commonly involved. This work addresses the problem of redesigning the addition logic (in a form of hybrid adder) on a critical timing path to meet the timing constraint while minimally allocating the required addition logic. Unlike the conventional hybrid adder design schemes in which they assume uniform or specific patterns of input signal arrival times and minimize the latest timing of the output signals, our work extracts the required timing of each output signal as well as the input arrival times directly from the circuit and resynthesizes the addition logic by creating a customized hybrid adder that is best suited, in terms of logic area, for meeting the timing constraint of the circuit. Specifically, we propose a systematic approach of hybrid adder design exploration, basically following the principle of dynamic programming with well-controlled pruning techniques. This work is realistic and practically very useful in that it can be used as a timing optimizer to the computation-intensive circuits with a tight timing budget. We provide a set of diverse experimental data to show how much the proposed hybrid adder scheme is effective in meeting or reducing timing while maintaining the circuit area as minimal as possible.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Backes:2012:SCD, author = "John D. Backes and Marc D. Riedel", title = "The Synthesis of Cyclic Dependencies with {Boolean} Satisfiability", journal = j-TODAES, volume = "17", number = "4", pages = "44:1--44:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348848", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The accepted wisdom is that combinational circuits must have acyclic (i.e., feed-forward) topologies. Yet simple examples suggest that this is incorrect. In fact, introducing cycles (i.e., feedback) into combinational designs can lead to significant savings in area and in delay. Prior work described methodologies for synthesizing cyclic circuits with Sum-Of-Product (SOP) and Binary-Decision Diagram (BDD)-based formulations. Recently, techniques for analyzing and mapping cyclic circuits based on Boolean satisfiability (SAT) were proposed. This article presents a SAT-based methodology for synthesizing cyclic dependencies. The strategy is to generate cyclic functional dependencies through a technique called Craig interpolation. Given a choice of different functional dependencies, a branch-and-bound search is performed to pick the best one. Experiments on benchmark circuits demonstrate the effectiveness of the approach.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bild:2012:SNR, author = "David R. Bild and Robert P. Dick and Gregory E. Bok", title = "Static {NBTI} Reduction Using Internal Node Control", journal = j-TODAES, volume = "17", number = "4", pages = "45:1--45:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348849", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Negative Bias Temperature Instability (NBTI) is a significant reliability concern for nanoscale CMOS circuits. Its effects on circuit timing can be especially pronounced for circuits with standby-mode equipped functional units, because these units can be subjected to static NBTI stress for extended periods of time. This article describes Internal Node Control (INC), in which the inputs to some individual gates are directly manipulated to prevent this static NBTI fatigue. We prove that the INC selection problem is NP-complete and present a linear-time heuristic that can quickly determine near-optimal placements. This near-optimality is confirmed by comparing results for small benchmarks against optimal solutions from a mixed integer linear programming formulation of our problem. We evaluate the heuristic on the ISCAS85 benchmarks and the Synopsys DesignWare Library. Our heuristic reduces static NBTI-induced delay over a ten year period by 30--60\% and can reduce total path delay by an average 9.4\% when NBTI degradation is severe. The INC placements and sleep signal routing require only a 1.6\% increase in area.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2012:CDA, author = "Nai-Wen Chang and Tzu-Yin Lin and Sun-Yuan Hsieh", title = "Conditional Diagnosability of $k$-Ary $n$-Cubes under the {PMC} Model", journal = j-TODAES, volume = "17", number = "4", pages = "46:1--46:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348850", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Processor fault diagnosis plays an important role in measuring the reliability of multiprocessor systems and the diagnosis of many well-known interconnection networks. The conditional diagnosability, which is more general than the classical diagnosability, is to measure the diagnosability of a multiprocessor system under the assumption that all of the neighbors of any node in the system cannot fail at the same time. This study shows that the conditional diagnosability for $k$-ary $n$-cubes under the PMC model is $ 8 n - 7$ for $ k \geq 4$ and $ n \geq 4$.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mondal:2012:SEP, author = "Arijit Mondal and P. P. Chakrabarti and Pallab Dasgupta", title = "Symbolic-Event-Propagation-Based Minimal Test Set Generation for Robust Path Delay Faults", journal = j-TODAES, volume = "17", number = "4", pages = "47:1--47:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348851", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a symbolic-event-propagation-based scheme to generate hazard-free tests for robust path delay faults. This approach identifies all robustly testable paths in a circuit and the corresponding complete set of test vectors. We address the problem of finding a minimal set of test vectors that covers all robustly testable paths. We propose greedy and simulated-annealing-based algorithms to find the same. Results on ISCAS89 benchmark circuits show a considerable reduction in test vectors for covering all robustly testable paths.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2012:LST, author = "Shianling Wu and Laung-Terng Wang and Xiaoqing Wen and Wen-Ben Jone and Michael S. Hsiao and Fangfang Li and James Chien-Mo Li and Jiun-Lang Huang", title = "Launch-on-Shift Test Generation for Testing Scan Designs Containing Synchronous and Asynchronous Clock Domains", journal = j-TODAES, volume = "17", number = "4", pages = "48:1--48:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348852", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a hybrid Automatic Test Pattern Generation (ATPG) technique using the staggered Launch-On-Shift (LOS) scheme followed by the one-hot launch-on-shift scheme for testing delay faults in a scan design containing asynchronous clock domains. Typically, the staggered scheme produces small test sets but needs long ATPG runtime, whereas the one-hot scheme takes short ATPG runtime but yields large test sets. The proposed hybrid technique is intended to reduce test pattern count with acceptable ATPG runtime for multimillion-gate scan designs. In case the scan design contains multiple synchronous clock domains, and each group of synchronous clock domains is treated as a clock group and tested using a launch-aligned or a capture-aligned LOS scheme. By combining these schemes together, we found the pattern counts for two large industrial designs were reduced by approximately 1.6X to 1.8X, while the ATPG runtime was increased by 40\% to 50\%, when compared to the one-hot clocking scheme alone.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Khatib:2012:MRP, author = "Mohammed G. Khatib", title = "Migration-Resistant Policies for Probe-Wear Leveling in {MEMS} Storage Devices", journal = j-TODAES, volume = "17", number = "4", pages = "49:1--49:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348853", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Probes (read/write heads) in a MEMS storage device are susceptible to wear. We study probe wear, and analyze the causes of uneven wear. We show that under real-world workloads some probes can wear one order of magnitude faster than others. This premature expiry has severe consequences for reliability, timing performance, energy efficiency, and lifetime. Wear leveling precludes premature expiry and is thus necessary. We discuss the fundamental differences between probe wear in MEMS storage and medium wear in Flash, calling for a different treatment. We devise three policies to level probe wear. The policies provide a spectrum between best lifetime and least influence on the response time and energy efficiency of a MEMS storage device. We make the case that data migration can be prevented by augmenting the policies with a simple rule. We study the influence of the data layout configuration on the leveling performance of the policies.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lam:2012:EPL, author = "Tak-Kei Lam and Wai-Chung Tang and Xiaoqing Yang and Yu-Liang Wu", title = "{ECR}: a Powerful and Low-Complexity Error Cancellation Rewiring Scheme", journal = j-TODAES, volume = "17", number = "4", pages = "50:1--50:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348854", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Rewiring is known to be a class of logic restructuring technique that is at least equally powerful in flexibility compared to other logic transformation techniques. Especially it is wiring sensitive and is particularly useful for interconnect-based circuit synthesis processes. One of the most well-studied rewiring techniques is the ATPG-based Redundancy Addition and Removal (RAR) technique which adds a redundant alternative wire to make an originally irredundant target wire become redundant and thus removable. In this article, we propose a new Error-Cancellation-based Rewiring scheme (ECR) which can also identify non-RAR-based rewiring operations with high efficiency. In ECR scheme, it is not necessary for alternative wires to be redundant. Based on the notion of error cancellation, we analyze and reformulate the rewiring problem, and a more generalized rewiring scheme is developed to detect more rewiring cases which are not obtainable by existing schemes while it still maintains a low runtime complexity. Comparing with the most recent non-RAR rewiring tool IRRA, the total number of alternative wires found by our approach is about doubled (202\%) while the CPU time used is just slightly more (8\%) upon benchmarks preoptimized by ABC's rewriting. Our experimental results also suggest that the ECR engine is more powerful than IRRA in FPGA technology mapping.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shen:2012:FSF, author = "Ruijing Shen and Sheldon X.-D. Tan and Hai Wang and Jinjun Xiong", title = "Fast Statistical Full-Chip Leakage Analysis for Nanometer {VLSI} Systems", journal = j-TODAES, volume = "17", number = "4", pages = "51:1--51:??", month = oct, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2348839.2348855", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 22 10:59:18 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present a new full-chip statistical leakage estimation considering the spatial correlation condition (strong or weak). The new algorithm can deliver linear time, O ( N ), time complexity, where N is the number of grids on chip. The proposed algorithm adopts a set of uncorrelated virtual variables over grid cells to represent the original physical random variables and the cell size is determined by the spatial correlation length. In this way, each physical variable is always represented by virtual variables locally. We prove the number of neighbor cells for each grid cell is not related to the condition of spatial correlation (from no correlation to 100\% correlated), which leads to linear time complexity in terms of number of gates. We compute the gate leakage by the orthogonal polynomials-based collocation method. The total leakage of a whole chip can be computed by simply summing up the coefficients of corresponding orthogonal polynomials in each grid cell. Furthermore, we develop a look-up table to cache statistical information for each type of gate instead of calculating leakage for every single instance of gate on a chip. As a result, a new statistical leakage characterization in Standard Cell Library (SCL) is put forward. Furthermore, an incremental analysis algorithm is proposed to update the chip-level statistical leakage information efficiently after a few changes are made. The proposed method has no restrictions on static leakage models, or types of leakage distributions. The large circuit examples in 45nm CMOS process demonstrate the proposed algorithm is 1000X faster than a recently proposed grid-based method with similar accuracy and many orders of magnitude times speedup over the Monte Carlo method. Experimental results also show the incremental analysis provides about 10X further speedup. We expect the incremental analysis could achieve more speedup over the full leakage analysis for larger problem sizes.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Coskun:2012:ISS, author = "Ayse Kivilcim Coskun and Yung-Hsiang Lu and Qinru Qiu", title = "Introduction to the special section on adaptive power management for energy and temperature-aware computing systems", journal = j-TODAES, volume = "18", number = "1", pages = "1:1--1:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390192", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lari:2012:HPM, author = "Vahid Lari and Shravan Muddasani and Srinivas Boppu and Frank Hannig and Moritz Schmid and J{\"u}rgen Teich", title = "Hierarchical power management for adaptive tightly-coupled processor arrays", journal = j-TODAES, volume = "18", number = "1", pages = "2:1--2:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390193", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a self-adaptive hierarchical power management technique for massively parallel processor architectures, supporting a new resource-aware parallel computing paradigm called invasive computing. Here, an application can dynamically claim, execute, and release the resources in three phases: resource acquisition (invade), program loading/configuration and execution (infect), and release (retreat). Resource invasion is governed by dedicated decentralized hardware controllers, called invasion controllers (i ctrls), which are integrated into each processing element (PE). Several invasion strategies for claiming linearly connected or rectangular regions of processing resources are implemented. The key idea is to exploit the decentralized resource management inherent to invasive computing for power savings by enabling applications themselves to control the power for processing resources and invasion controllers using a hierarchical power-gating approach. We propose analytical models for estimating various components of energy consumption for faster design space exploration and compare them with the results obtained from a cycle-accurate C++ simulator of the processor array. In order to find optimal design trade-offs, various parameters like (a) energy consumption, (b) hardware cost, and (c) timing overheads are compared for different sizes of power domains. Experimental results show significant energy savings (up to 73\%) for selected characteristical algorithms and different resource utilizations. In addition, we demonstrate the accuracy of our proposed analytical model. Here, estimation errors less than 3.6\% can be reported.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Srivastav:2012:DEE, author = "Meeta Srivastav and M. B. Henry and Leyla Nazhandali", title = "Design of energy-efficient, adaptable throughput systems at near\slash sub-threshold voltage", journal = j-TODAES, volume = "18", number = "1", pages = "3:1--3:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390194", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Voltage scaling has been a prevalent method of saving energy for energy-constrained applications. However, current technology trends which shrink transistors sizes exacerbate process variation effects in voltage-scaled systems. Large variations in transistor parameters result in high variation in performance and power across the chip. These effects, if ignored at the design, stage, will result in unpredictable behavior when deployed in the field. In this article, we leverage the benefits of voltage scaling methodology for obtaining energy efficiency and compensate for the loss in throughput by exploiting parallelism present in the various DSP designs. We show that such a hybrid method consumes 8\%--77\% less power, compared to simple dynamic voltage scaling over different throughputs. We study this system architecture in two different workload environments: static and dynamic. We show that to achieve the highest level of energy efficiency, the number of cores and the operating voltages vary widely between a BASE design versus a process variation-aware (PVA) design. We further demonstrate that the PVA design enjoys an average of 26.9\% and 51.1\% reduction in energy consumption for the static and dynamic designs, respectively. Since different cores will have a wide range of speeds at operating voltages close to near/sub-thresholds due to process variation, we gather characteristic behavior of each core. With knowledge of the core speeds, we can further increase the energy efficiency. Furthermore, in this article, we show that of this methodology will be 49.3\% more energy efficient, compared to that building the system with no knowledge about the characteristics of each core.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sun:2012:STD, author = "Jin Sun and Rui Zheng and Jyothi Velamala and Yu Cao and Roman Lysecky and Karthik Shankar and Janet Roveda", title = "A self-tuning design methodology for power-efficient multi-core systems", journal = j-TODAES, volume = "18", number = "1", pages = "4:1--4:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390195", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article aims to achieve computational reliability and energy efficiency through codevelopment of algorithms, device, and circuit designs for application-specific, reconfigurable architectures. The new methodology characterizes aging-switching activity and aging-supply voltage relationships that are applicable for minimizing power consumption and task execution efficiency in order to achieve low bit energy ratio (BER). In addition, a new dynamic management algorithm (DMA) is proposed to alleviate device degradation and to extend system lifespan. In contrast to traditional workload balancing schemes in which cores are regarded as homogeneous, the new algorithm ranks cores as ``highly competitive,'' ``less competitive,'' and ``not competitive'' according to their various competitiveness. Core competitiveness is evaluated based upon their reliability, temperature, and timing requirements. Consequently, ``competitive'' cores will take charge of the majority of the tasks at relatively high voltage/frequency without violating power and timing budgets, while ``not competitive'' cores will have light workloads to ensure their reliability. The new approach combines intrinsic device characteristics (aging-switching activity and aging-supply voltage curves) into an integrated framework to achieve high reliability and low energy level with graceful degradation of system performance. Experimental results show that the proposed method has achieved up to 20\% power reduction, with about 4\% performance degradation (in terms of accomplished workload and system throughput), compared with traditional workload balancing methods. The new method also improves system mean-time-to-failure (MTTF) by up to 25\%.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Rodrigues:2012:IPP, author = "Rance Rodrigues and Arunachalam Annamalai and Israel Koren and Sandip Kundu", title = "Improving performance per watt of asymmetric multi-core processors via online program phase classification and adaptive core morphing", journal = j-TODAES, volume = "18", number = "1", pages = "5:1--5:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390196", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Asymmetric multi-core processors (AMPs) have been shown to outperform symmetric ones in terms of performance and performance/watt. Improved performance and power efficiency are achieved when the program threads are matched to their most suitable cores. Since the computational needs of a program may change during its execution, the best thread to core assignment will likely change with time. We have, therefore, developed an online program phase classification scheme that allows the swapping of threads when the current needs of the threads justify a change in the assignment. The architectural differences among the cores in an AMP can never match the diversity that exists among different programs and even between different phases of the same program. Consider, for example, a program (or a program phase) that has a high instruction-level parallelism (ILP) and will exhibit high power efficiency if executed on a powerful core. We can not, however, include such powerful cores in the designed AMP, since they will remain underutilized most of the time, and they are not power efficient when the programs do not exhibit a high degree of ILP. Thus, we must expect to see program phases where the designed cores will be unable to support the ILP that the program can exhibit. We, therefore, propose in this article a dynamic morphing scheme. This scheme will allow a core to gain control of a functional unit that is ordinarily under the control of a neighboring core during periods of intense computation with high ILP. This way, we dynamically adjust the hardware resources to the current needs of the application. Our results show that combining online phase classification and dynamic core morphing can significantly improve the performance/watt of most multithreaded workloads.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zanini:2012:OTC, author = "Francesco Zanini and David Atienza and Colin N. Jones and Luca Benini and Giovanni {De Micheli}", title = "Online thermal control methods for multiprocessor systems", journal = j-TODAES, volume = "18", number = "1", pages = "6:1--6:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390197", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With technological advances, the number of cores integrated on a chip is increasing. This in turn is leading to thermal constraints and thermal design challenges. Temperature gradients and hotspots not only affect the performance of the system but also lead to unreliable circuit operation and affect the lifetime of the chip. Meeting temperature constraints and reducing hotspots are critical for achieving reliable and efficient operation of complex multi-core systems. In this article, we analyze the use of four of the most promising families of online control techniques for thermal management of multiprocessors system-on-chip (MPSoC). In particular, in our exploration, we aim at achieving an online smooth thermal control action that minimizes the performance loss as well as the computational and hardware overhead of embedding a thermal management system inside the MPSoC. The definition of the optimization problem to tackle in this work considers the thermal profile of the system, its evolution over time, and current time-varying workload requirements. Thus, this problem is formulated as a finite-horizon optimal control problem, and we analyze the control features of different online thermal control approaches. In addition, we implemented the policies on an MPSoC hardware simulation platform and performed experiments on a cycle-accurate model of the eight-core Niagara multi-core architecture using benchmarks ranging from Web-accessing to playing multimedia. Results show different trade-offs among the analyzed techniques regarding the thermal profile, the frequency setting, the power consumption, and the implementation complexity.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cochran:2012:TPA, author = "Ryan Cochran and Sherief Reda", title = "Thermal prediction and adaptive control through workload phase detection", journal = j-TODAES, volume = "18", number = "1", pages = "7:1--7:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390198", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Elevated die temperature is a true limiter to the scalability of modern processors. With continued technology scaling in order to meet ever-increasing performance demands, it is no longer cost effective to design cooling systems that handle the worst-case thermal behaviors. Instead, cooling systems are designed to handle typical chip operation, while processors must detect and handle rare thermal emergencies. Most processors rely on measurements from integrated thermal sensors and dynamic thermal management (DTM) techniques in order to manage the trade-off between performance and thermal risk. Optimal management requires advanced knowledge of the thermal trajectory based on the current workload behaviors and operating conditions. In this work, we devise novel workload phase classification strategies that automatically discriminate among workload behaviors with respect to the thermal control response. We incorporate workload phase-detection and thermal models into a dynamic voltage and frequency scaling (DVFS) technique that can optimally control temperature during runtime based on thermal predictions. We demonstrate the effectiveness of our proposed techniques in predicting and adaptively controlling the thermal behavior of a real quad-core processor in response to a wide range of workloads. In comparison with state-of-the-art model predictive control (MPC) techniques in previous works on thermal prediction, we demonstrate a 5.8\% improvement in instruction throughput with the same number of thermal violations. In comparison with simple proportional-integral (PI) feedback control techniques, we improve instruction throughput by 3.9\%, while significantly reducing the number of thermal violations.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shi:2012:HND, author = "Liang Shi and Jianhua Li and Chun Jason Xue and Xuehai Zhou", title = "Hybrid nonvolatile disk cache for energy-efficient and high-performance systems", journal = j-TODAES, volume = "18", number = "1", pages = "8:1--8:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390199", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "NAND flash memory has been employed as disk cache in recent years. It has the advantages of high performance, low leakage power, and cost efficiency. However, flash memory's performance is limited by the inability of in-place updates, coarse access granularity, and a limited number of write/erase times. In this article, we propose a hybrid nonvolatile disk cache architecture for high-performance and energy-efficient systems, where the disk cache is implemented with a small-size phase change memory (PCM) and a large-size NAND flash memory. Compared with current flash memory-based disk cache, it has the following advantages. (1) System performance is improved as requests are carefully directed between PCM and flash memory; (2) the energy consumption of disk cache is substantially reduced with significant reduction of additional operations, such as garbage collections; (3) the efficiency of flash memory is improved with the reduction of write activities on flash memory; and (4) lifetime of NAND flash memory is increased with most of the write operations assigned to PCM, where PCM's lifetime is guaranteed to be longer than the lifetime of flash memory. Simulation results show that the proposed methods can substantially improve the system performance, energy consumption, and lifetime of the hybrid disk cache.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Singh:2012:ATA, author = "Amit Kumar Singh and Akash Kumar and Thambipillai Srikanthan", title = "Accelerating throughput-aware runtime mapping for heterogeneous {MPSoCs}", journal = j-TODAES, volume = "18", number = "1", pages = "9:1--9:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390200", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern embedded systems need to support multiple time-constrained multimedia applications that often employ multiprocessor-systems-on-chip (MPSoCs). Such systems need to be optimized for resource usage and energy consumption. It is well understood that a design-time approach cannot provide timing guarantees for all the applications due to its inability to cater for dynamism in applications. However, a runtime approach consumes large computation requirements at runtime and hence may not lend well to constrained-aware mapping. In this article, we present a hybrid approach for efficient mapping of applications in such systems. For each application to be supported in the system, the approach performs extensive design-space exploration (DSE) at design time to derive multiple design points representing throughput and energy consumption at different resource combinations. One of these points is selected at runtime efficiently, depending upon the desired throughput while optimizing for energy consumption and resource usage. While most of the existing DSE strategies consider a fixed multiprocessor platform architecture, our DSE considers a generic architecture, making DSE results applicable to any target platform. All the compute-intensive analysis is performed during DSE, which leaves for minimum computation at runtime. The approach is capable of handling dynamism in applications by considering their runtime aspects and providing timing guarantees. The presented approach is used to carry out a DSE case study for models of real-life multimedia applications: H.263 decoder, H.263 encoder, MPEG-4 decoder, JPEG decoder, sample rate converter, and MP3 decoder. At runtime, the design points are used to map the applications on a heterogeneous MPSoC. Experimental results reveal that the proposed approach provides faster DSE, better design points, and efficient runtime mapping when compared to other approaches. In particular, we show that DSE is faster by 83\% and runtime mapping is accelerated by 93\% for some cases. Further, we study the scalability of the approach by considering applications with large numbers of tasks.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Saladi:2012:CAC, author = "Kalyan Saladi and Harikumar Somakumar and Mahadevan Ganapathi", title = "Concurrency-aware compiler optimizations for hardware description languages", journal = j-TODAES, volume = "18", number = "1", pages = "10:1--10:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390201", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we discuss the application of compiler technology for eliminating redundant computation in hardware simulation. We discuss how concurrency in hardware description languages (HDLs) presents opportunities for expression reuse across different threads. While accounting for discrete event simulation semantics, we extend the data flow analysis framework to concurrent threads. In this process, we introduce a rewriting scheme named $ \partial $VF and a graph representation to model sensitivity relationships among threads. An algorithm for identifying common subexpressions as applied to HDLs is presented. Related issues, such as scheduling correctness, are also considered.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xydis:2012:CLE, author = "Sotirios Xydis and Kiamal Pekmestzi and Dimitrios Soudris and George Economakos", title = "Compiler-in-the-loop exploration during datapath synthesis for higher quality delay-area trade-offs", journal = j-TODAES, volume = "18", number = "1", pages = "11:1--11:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390202", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Design space exploration during high-level synthesis targets the computation of those design solutions which form optimal trade-off points. This quest for optimal trade-offs has been focused on studying the impact of various architectural-level parameters during high-level synthesis algorithms, silently neglecting the trade-offs produced from the combined impact of behavioral-level together with architectural-level parameters. We propose a novel design space, exploration methodology that studies an extended instance of the solution space considering the effects of combining compiler- and architectural-level transformations. It is shown that exploring the design space in a global manner reveals new trade-off points, thus shifting towards higher quality design solutions. We use a combination of upper-bounding conditions together with gradient-based heuristic pruning to efficiently traverse the extended search space. Our exploration framework delivers significant quality improvements without compromising the optimality (Pareto accuracy) of the discovered solutions, together with significant runtime reductions compared to exploring exhaustively the solution space at every allocation scenario.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kurimoto:2012:VWR, author = "Masanori Kurimoto and Takeshi Yamamoto and Satoshi Nakano and Atsuto Hanami and Hiroyuki Kondo", title = "Verification work reduction methodology in low-power chip implementation", journal = j-TODAES, volume = "18", number = "1", pages = "12:1--12:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390203", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In order to achieve satisfactory verification for complicated low-power demands in green products, we propose a verification work reduction methodology. It consists of three step, namely virtual, direct actual, and actual model simulations. Virtual low-power simulation inserts low-power cells, such as isolators or level shifters, virtually and simulates logical behavior for design under test (DUT) based on user-defined power mode. Direct actual low-power simulation replaces behavior models without non-logical pins for some of modules with actual models with non-logical pins, which are Vdd and Gnd, and simulates DUT in mixed level. Actual low-power simulation simulates DUT by using actual models with non-logical pins for all cells and hard macros. We introduce techniques which classify the type of the bugs on which we focus at each verification step and prevent the concerned bugs from leaking to the latter verification step as much as possible. We applied our methodology to an actual chip and could reduce the total simulation period until tape-out by 38.8\% and the total chip development period by 10\%, compared with the conventional methodology.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jing:2012:SFE, author = "Naifeng Jing and Ju-Yueh Lee and Zhe Feng and Weifeng He and Zhigang Mao and Lei He", title = "{SEU} fault evaluation and characteristics for {SRAM}-based {FPGA} architectures and synthesis algorithms", journal = j-TODAES, volume = "18", number = "1", pages = "13:1--13:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390204", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Reliability has become an increasingly important concern for SRAM-based field programmable gate arrays (FPGAs). Targeting SEU (single event upset) in SRAM-based FPGAs, this article first develops an SEU evaluation framework that can quantify the failure sensitivity for each configuration bit during design time. This framework considers detailed fault behavior and logic masking on a post-layout FPGA application and performs logic simulation on various circuit elements for fault evaluation. Applying this framework on MCNC benchmark circuits, we first characterize SEUs with respect to different FPGA circuits and architectures, for example, bidirectional routing and unidirectional routing. We show that in both routing architectures, interconnects not only contribute to the lion's share of the SEU-induced functional failures, but also present higher failure rates per configuration bits than LUTs. Particularly, local interconnect multiplexers in logic blocks have the highest failure rate per configuration bit. Then, we evaluate three recently proposed SEU mitigation algorithms, IPD, IPF, and IPV, which are all logic resynthesis-based with little or no overhead on placement and routing. Different fault mitigating capabilities at the chip level are revealed, and it demonstrates that algorithms with explicit consideration for interconnect significantly mitigate the SEU at the chip level, for example, IPV achieves 61\% failure rate reduction on average against IPF with about 15\%. In addition, the combination of the three algorithms delivers over 70\% failure rate reduction on average at the chip level. The experiments also reveal that in order to improve fault tolerance at the chip level, it is necessary for future fault mitigation algorithms to concern not only LUT or interconnect faults, but also their interactions. We envision that our framework can be used to cast more useful insights for more robust FPGA circuits, architectures, and better synthesis algorithms.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dworak:2012:UIC, author = "Jennifer Dworak and Kundan Nepal and Nuno Alves and Yiwen Shi and Nicholas Imbriglia and R. Iris Bahar", title = "Using implications to choose tests through suspect fault identification", journal = j-TODAES, volume = "18", number = "1", pages = "14:1--14:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390205", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As circuits continue to scale to smaller feature sizes, wearout and latent defects are expected to cause an increasing number of errors in the field. Online error detection techniques, including logic implication-based checker hardware, are capable of detecting at least some of these errors as they occur. However, recovery may be expensive, and the underlying problem may lead to multiple failures of a core over time. In this article, we will investigate the diagnostic capability of logic implications to identify possible failure locations when an error is detected online. We will then utilize this information to select a highly efficient test set that can be used to effectively test the identified suspect locations in both the failing core and in other identical cores in the system.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mok:2012:DSL, author = "Santiago Mok and John Lee and Puneet Gupta", title = "Discrete sizing for leakage power optimization in physical design: a comparative study", journal = j-TODAES, volume = "18", number = "1", pages = "15:1--15:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390206", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While sizing has been studied for over three decades, the absence of a common framework with which to compare methods has made progress difficult to measure. In this article, we compare popular sizing techniques in which gates are chosen from a discrete standard cell library and slew and interconnect effects are accounted for. The difference between sizing methods reduces from roughly 53\% to 8\% between best and worst case after slew propagation is taken into account. In our benchmarks, no one sizing technique consistently outperforms the others.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2012:ECM, author = "John Lee and Puneet Gupta", title = "{ECO} cost measurement and incremental gate sizing for late process changes", journal = j-TODAES, volume = "18", number = "1", pages = "16:1--16:??", month = dec, year = "2012", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2390191.2390207", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jan 12 08:32:04 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Changes in the manufacturing process parameters may create timing violations in a design, making it necessary to perform an engineering change order (ECO) to correct these problems. We present a framework for performing incremental gate sizing for process changes late in the design cycle, and a method for creating initial designs that are robust to late process changes. This includes a method for measuring and estimating ECO cost and for transforming these costs into linear programming optimization problems. In the case of ECOs, the method reduces ECO costs on average, by 89\% in changed area compared to a leading commercial tool. Furthermore, the robust initial designs are, on average, 55\% less likely to need redesign in the future.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kornaros:2013:STC, author = "Georgios Kornaros and Dionisios Pnevmatikatos", title = "A survey and taxonomy of on-chip monitoring of multicore systems-on-chip", journal = j-TODAES, volume = "18", number = "2", pages = "17:1--17:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Billion transistor systems-on-chip increasingly require dynamic management of their hardware components and careful coordination of the tasks that they carry out. Diverse real-time monitoring functions assist towards this objective through the collection of important system metrics, such as throughput of processing elements, communication latency, or resource utilization for each application. The online evaluation of these metrics can result in localized or global decisions that attempt to improve aspects of system behavior, system performance, quality-of-service, power and thermal effects under nominal conditions. This work provides a comprehensive categorization of monitoring approaches used in multiprocessor SoCs. As adaptive systems are encountered in many disciplines, it is imperative to present the prominent research efforts in developing online monitoring methods. To this end we offer a taxonomy that groups strongly related techniques that designers increasingly use to produce more efficient and adaptive chips. The provided classification helps to understand and compare architectural mechanisms that can be used in systems, while one can envisage the innovations required to build real adaptive and intelligent systems-on-chip.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Backasch:2013:RVM, author = "Rico Backasch and Christian Hochberger and Alexander Weiss and Martin Leucker and Richard Lasslop", title = "Runtime verification for multicore {SoC} with high-quality trace data", journal = j-TODAES, volume = "18", number = "2", pages = "18:1--18:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicore System-on-Chip (SoC) implementations of embedded systems are becoming very popular. In these systems it is possible to spread out computations over many cores. On one hand this leads to better energy efficiency if clock frequencies and core voltages are reduced. On the other hand this delivers very high performance to the software developer and thus enables complex software systems to be implemented. Unfortunately, debugging and validation of these systems becomes extremely difficult. Various technological approaches try to solve this dilemma. In this contribution we will show a new approach to observe multi-core SoCs and make their internal operations visible to external analysis tools. Also, we show that runtime verification can be employed to analyze and validate these internal operations while the system operates in its normal environment. The combination of these two approaches delivers unprecedented options to the developer to understand and verify system behavior even in complex multicore SoCs.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Costa:2013:CDO, author = "Jos{\'e} C. Costa and Jos{\'e} C. Monteiro", title = "Coverage-directed observability-based validation for embedded software", journal = j-TODAES, volume = "18", number = "2", pages = "19:1--19:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Motivated by the need for validation methodologies for embedded systems we propose a method for embedded software testing that can be integrated with existing hardware methods. Existing coverage-directed validation methods guarantee the execution of a certain percentage of the program code under test. Yet they do not generally verify whether the statements executed have any influence on the program's output. In the proposed method, a program statement is considered covered not simply for belonging to the executed path, but only if its execution has influence in some observable output. The paths are generated by searching the longest path in terms of the number of statements in the path. Given that not all paths are valid, we check their feasibility using a method based on Mixed Integer Linear Programming (MILP). Variable aliasing is accounted for by representing variables by their memory addresses when building this MILP problem. In this manner, for feasible paths, we obtain immediately the input values that allow the execution of the path. Using these inputs, we determine the statements actually observed. We repeat this process until a user-specified level of coverage has been achieved. In the generation of each new path, the statement coverage obtained so far and the feasibility of previous paths is taken into account. We present results that demonstrate the effectiveness of this methodology.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2013:DRG, author = "Chun-An Chen and Sun-Yuan Hsieh", title = "$ t / t $-Diagnosability of regular graphs under the {PMC} model", journal = j-TODAES, volume = "18", number = "2", pages = "20:1--20:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A system is $ t / t $-diagnosable if, given any collection of test results, the faulty nodes can be isolated to within a set of at most $t$ nodes provided that the number of faulty nodes does not exceed $t$. Given an {$N$}-vertex graph {$G$} that is regular with the common degree $d$ and has no cycle of three or four vertices, this study shows that {$G$} is $ (2 d - 2) / (2 d - 2) $ diagnosable if {$ N \geq 4 d - 30 > 0 $}. Based on this result, the $ t / t $-diagnosabilities of several classes of graphs can be computed efficiently.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2013:SNC, author = "Chen Huang and Bailey Miller and Frank Vahid and Tony Givargis", title = "Synthesis of networks of custom processing elements for real-time physical system emulation", journal = j-TODAES, volume = "18", number = "2", pages = "21:1--21:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Emulating a physical system in real-time or faster has numerous applications in cyber-physical system design and deployment. For example, testing of a cyber-device's software (e.g., a medical ventilator) can be done via interaction with a real-time digital emulation of the target physical system (e.g., a human's respiratory system). Physical system emulation typically involves iteratively solving thousands of ordinary differential equations (ODEs) that model the physical system. We describe an approach that creates custom processing elements (PEs) specialized to the ODEs of a particular model while maintaining some programmability, targeting implementation on field-programmable gate arrays (FPGAs). We detail the PE micro-architecture and accompanying automated compilation and synthesis techniques. Furthermore, we describe our efforts to use a high-level synthesis approach that incorporates regularity extraction techniques as an alternative FPGA-based solution, and also describe an approach using graphics processing units (GPUs). We perform experiments with five models: a Weibel lung model, a Lutchen lung model, an atrial heart model, a neuron model, and a wave model; each model consists of several thousand ODEs and targets a Xilinx Virtex 6 FPGA. Results of the experiments show that the custom PE approach achieves 4X-9X speedups (average 6.7X) versus our previous general ODE-solver PE approach, and 7X-10X speedups (average 8.7X) versus high-level synthesis, while using approximately the same or fewer FPGA resources. Furthermore, the approach achieves speedups of 18X-32X (average 26X) versus an Nvidia GTX 460 GPU, and average speedups of more than 100X compared to a six-core TI DSP processor or a four-core ARM processor, and 24X versus an Intel I7 quad core processor running at 3.06 GHz. While an FPGA implementation costs about 3X-5X more than the non-FPGA approaches, a speedup/dollar analysis shows 10X improvement versus the next best approach, with the trend of decreasing FPGA costs improving speedup/dollar in the future.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Forte:2013:RAA, author = "Domenic Forte and Ankur Srivastava", title = "Resource-aware architectures for adaptive particle filter based visual target tracking", journal = j-TODAES, volume = "18", number = "2", pages = "22:1--22:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "There are a growing number of visual tracking applications now being envisioned for mobile devices. However, since computer vision algorithms such as particle filtering have large computational demands, they can result in high energy consumption and temperatures in mobile devices. Conventional approaches for distributed target tracking with a camera node and a receiver node are either sender-based (SB) or receiver-based (RB). The SB approach uses little energy and bandwidth, but requires a sender with large computational resources. The RB approach fits applications where computational resources are completely unavailable to the sender, but requires very large energy and bandwidth. In this article, we propose three architectures for distributed particle filtering that (i) reduce particle filtering workload and (ii) allow for dynamic migration of workload between nodes participating in tracking. We also discuss an adaptive particle filtering extension that adapts particle filter computational complexity and can be applied to both the conventional and proposed architectures for improved energy efficiency. Results show that the proposed solutions require low additional overhead, improve on tracking system lifetime, balance node temperatures, maintain track of the desired target, and are more effective than conventional approaches in many scenarios.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2013:SRE, author = "Baoxian Zhao and Hakan Aydin and Dakai Zhu", title = "Shared recovery for energy efficiency and reliability enhancements in real-time applications with precedence constraints", journal = j-TODAES, volume = "18", number = "2", pages = "23:1--23:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While Dynamic Voltage Scaling (DVS) remains as a popular energy management technique for modern computing systems, recent research has identified significant and negative impacts of voltage scaling on system reliability. To preserve system reliability under DVS settings, a number of reliability-aware power management (RA-PM) schemes have been recently studied. However, the existing RA-PM schemes normally schedule a separate recovery for each task whose execution is scaled down and are rather conservative. To overcome such conservativeness, we study in this article novel RA-PM schemes based on the shared recovery (SHR) technique. Specifically, we consider a set of frame-based real-time tasks with individual deadlines and a common period where the precedence constraints are represented by a directed acyclic graph (DAG). We first show that the earliest deadline first (EDF) algorithm can always yield a schedule where all timing and precedence constraints are met by considering the effective deadlines of tasks derived from as late as possible (ALAP) policy, provided that the task set is feasible. Then, we propose a shared recovery based frequency assignment technique (namely SHR-DAG) and prove its optimality to minimize energy consumption while preserving the system reliability. To exploit additional slack that arises from early completion of tasks, we also study a dynamic extension for SHR-DAG to improve energy efficiency and system reliability at runtime. The results from our extensive simulations show that, compared to the existing RA-PM schemes, SHR-DAG can achieve up to 35\% energy savings, which is very close to the maximum achievable energy savings. More interestingly, our extensive evaluation also indicates that the new schemes offer non-trivial improvements on system reliability over the existing RA-PM schemes as well.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shen:2013:AAP, author = "Hao Shen and Ying Tan and Jun Lu and Qing Wu and Qinru Qiu", title = "Achieving autonomous power management using reinforcement learning", journal = j-TODAES, volume = "18", number = "2", pages = "24:1--24:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "System level power management must consider the uncertainty and variability that come from the environment, the application and the hardware. A robust power management technique must be able to learn the optimal decision from past events and improve itself as the environment changes. This article presents a novel on-line power management technique based on model-free constrained reinforcement learning (Q-learning). The proposed learning algorithm requires no prior information of the workload and dynamically adapts to the environment to achieve autonomous power management. We focus on the power management of the peripheral device and the microprocessor, two of the basic components of a computer. Due to their different operating behaviors and performance considerations, these two types of devices require different designs of Q-learning agent. The article discusses system modeling and cost function construction for both types of Q-learning agent. Enhancement techniques are also proposed to speed up the convergence and better maintain the required performance (or power) constraint in a dynamic system with large variations. Compared with the existing machine learning based power management techniques, the Q-learning based power management is more flexible in adapting to different workload and hardware and provides a wider range of power-performance tradeoff.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2013:RIB, author = "Jongwon Lee and Jonghee M. Youn and Doosan Cho and Yunheung Paek", title = "Reducing instruction bit-width for low-power {VLIW} architectures", journal = j-TODAES, volume = "18", number = "2", pages = "25:1--25:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "VLIW (very long instruction word) architectures have proven to be useful for embedded applications with abundant instruction level parallelism. But due to the long instruction bus width it often consumes more power and memory space than necessary. One way to lessen this problem is to adopt a reduced bit-width instruction set architecture (ISA) that has a narrower instruction word length. This facilitates a more efficient hardware implementation in terms of area and power by decreasing bus-bandwidth requirements and the power dissipation associated with instruction fetches. In practice, however, it is impossible to convert a given ISA fully into an equivalent reduced bit-width one because the narrow instruction word, due to bit-width restrictions, can encode only a small subset of normal instructions in the original ISA. Consequently, existing processors provide narrow instructions in very limited cases along with severe restrictions on register accessibility. The objective of this work is to explore the possibility of complete conversion, as a case study, of an existing 32-bit VLIW ISA into a 16-bit one that supports effectively all 32-bit instructions. To this objective, we attempt to circumvent the bit-width restrictions by dynamically extending the effective instruction word length of the converted 16-bit operations. Further, we will show that our proposed ISA conversion can create a synergy effect with a VLES (variable length execution set) architecture that is adopted in most recent VLIW processors. According to our experiment, the code size becomes significantly smaller after the conversion to 16-bit VLIW code. Also at a slight run time cost, the machine with the 16-bit ISA consumes much less energy than the original machine.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Majzoobi:2013:LPR, author = "Mehrdad Majzoobi and Joonho Kong and Farinaz Koushanfar", title = "Low-power resource binding by postsilicon customization", journal = j-TODAES, volume = "18", number = "2", pages = "26:1--26:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes the first postsilicon customization method for resource binding to achieve power reduction application specific integrated circuits (ASICs) design. Instead of committing to one configuration of resource binding during synthesis, our new synthesis method produces a diverse set of candidate bindings for the design. To ensure diversity of the resource usage patterns, we introduce a binding candidate formation method based on the orthogonal arrays. Additional control components are added to enable post manufacturing selection of one of the binding candidates. The resource binding candidate that minimizes the power consumption is selected by considering the specific power characteristics of each chip. An efficient methodology for embedding several binding candidates in one design is developed. Evaluations on benchmark designs show the low overhead and the effectiveness of the proposed methods. As an example, applying our method results in an average of 14.2\% (up to 24.0\%) power savings on benchmark circuits for a variation model in 45nm CMOS technology. The power efficiency of our customized postsilicon binding is expected to improve with scaling of the technology and the likely resulting higher process variations.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2013:LPA, author = "Shih-Hsu Huang and Wen-Pin Tu and Chia-Ming Chang and Song-Bin Pan", title = "Low-power anti-aging zero skew clock gating", journal = j-TODAES, volume = "18", number = "2", pages = "27:1--27:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In advanced CMOS technology, the NBTI (negative bias temperature instability) effect results in delay degradations of PMOS transistors. Further, because of clock gating, PMOS transistors in a clock tree often have different active probabilities, leading to different delay degradations. If the degradation difference is not properly controlled, this clock skew may cause the circuit fails to function at some point later in time. Intuitively, the degradation difference can be eliminated, if we increase the active probability of the low-probability clock gates to ensure the clock gates at the same level always having the same active probability. However, this intuitive method may suffer from large power consumption overhead. In this article, we point out, by carefully planning the transistor-level clock signal propagation path, we can have many clock gates whose active probabilities do not affect the degradation difference. Based on that observation, we propose a critical-PMOS-aware clock tree design methodology to eliminate the degradation difference with minimum power consumption overhead. Benchmark data consistently show our approach achieves very good results in terms of both the NBTI-induced clock skew (i.e., the degradation difference) and the power consumption overhead.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2013:CTM, author = "Hai Wang and Sheldon X.-D. Tan and Duo Li and Ashish Gupta and Yuan Yuan", title = "Composable thermal modeling and simulation for architecture-level thermal designs of multicore microprocessors", journal = j-TODAES, volume = "18", number = "2", pages = "28:1--28:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Efficient temperature estimation is vital for designing thermally efficient, lower power and robust integrated circuits in nanometer regime. Thermal simulation based on the detailed thermal structures no longer meets the demanding tasks for efficient design space exploration. The compact and composable model-based simulation provides a viable solution to this difficult problem. However, building such thermal models from detailed thermal structures was not well addressed in the past. In this article, we propose a new compact thermal modeling technique, called ThermComp, standing for thermal modeling with composable modules. ThermComp can be used for fast thermal design space exploration for multicore microprocessors. The new approach builds the composable model from detailed structures for each basic module using the finite difference method and reduces the model complexity by the sampling-based model order reduction technique. These composable models are then used to assemble different multicore architecture thermal models and realized into SPICE-like netlists. The resulting thermal models can be simulated by the general circuit simulator SPICE. ThermComp tries to preserve the accuracy of fine-grained models with the speed of coarse-grained models. Experimental results on a number of multicore microprocessor architectures show the new approach can easily build accurate thermal systems from compact composable models for fast architecture thermal analysis and optimization and is much faster than the existing HotSpot method with similar accuracy.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zeng:2013:IPD, author = "Zhiyu Zeng and Suming Lai and Peng Li", title = "{IC} power delivery: Voltage regulation and conversion, system-level cooptimization and technology implications", journal = j-TODAES, volume = "18", number = "2", pages = "29:1--29:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern IC power delivery systems encompass large on-chip passive power grids and active on-chip or off-chip voltage converters and regulators. While there exists little work targeting on holistic design of such complex IC subsystems, the optimal system-level design of power delivery is critical for achieving power integrity and power efficiency. In this article, we conduct a systematic design analysis on power delivery networks that incorporate Buck Converters (BCs) and on-chip Low-Dropout voltage regulators (LDOs) for the entire chip power supply. The electrical interactions between active voltage converters, regulators as well as passive power grids and their influence on key system design specifications are analyzed comprehensively. With the derived design insights, the system-level codesign of a complete power delivery network is facilitated by a proposed automatic optimization flow in which key design parameters of buck converters and on-chip LDOs as well as on-chip decoupling capacitance are jointly optimized. The experimental results demonstrate significant performance improvements resulted from the proposed system cooptimization in terms of achievable area overhead, supply noise and power efficiency. Impacts of different decoupling capacitance technologies are also investigated.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2013:SRB, author = "Ren-Jie Lee and Hung-Ming Chen", title = "A study of row-based area-array {I/O} design planning in concurrent chip-package design flow", journal = j-TODAES, volume = "18", number = "2", pages = "30:1--30:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "IC-centric design flow has been a common paradigm when designing and optimizing a system. Package and board/system designs are usually followed by almost-ready chip designs, which causes long turn-around time communicating with package and system houses. In this article, the realizations of area-array I/O design methodologies are studied. Different from IC-centric flow, we propose a chip-package concurrent design flow to speed up the design time. Along with the flow, we design the I/O-bump (and P/G-bump) tile that combines I/O (and P/G) and bump into a hard macro with the considerations of I/O power connection and electrostatic discharge (ESD) protection. We then employ an I/O-row based scheme to place I/O-bump tiles with existed metal layers. By such a scheme, it reduces efforts in I/O placement legalization and the redistribution layer (RDL) routing. With the emphasis on package design awareness, the proposed methods map package balls onto chip I/Os, thus providing an opportunity to design chip and package in parallel. Due to this early study of I/O and bump planning, faster convergence can be expected with concurrent design flow. The results are encouraging and the merits of this flow are reassuring.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Guthaus:2013:RAP, author = "Matthew R. Guthaus and Gustavo Wilke and Ricardo Reis", title = "Revisiting automated physical synthesis of high-performance clock networks", journal = j-TODAES, volume = "18", number = "2", pages = "31:1--31:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High-performance clock distribution has been a challenge for nearly three decades. During this time, clock synthesis tools and algorithms have strove to address a myriad of important issues helping designers to create faster, more reliable, and more power efficient chips. This work provides a complete discussion of the high-performance ASIC clock distribution using information gathered from both leading industrial clock designers and previous research publications. While many techniques are only briefly explained, the references summarize the most influential papers on a variety of topics for more in-depth investigation. This article also provides a thorough discussion of current issues in clock synthesis and concludes with insight into future research and design challenges for the community at large.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gester:2013:BAD, author = "Michael Gester and Dirk M{\"u}ller and Tim Nieberg and Christian Panten and Christian Schulte and Jens Vygen", title = "{BonnRoute}: Algorithms and data structures for fast and good {VLSI} routing", journal = j-TODAES, volume = "18", number = "2", pages = "32:1--32:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present the core elements of BonnRoute: advanced data structures and algorithms for fast and high-quality routing in modern technologies. Global routing is based on a combinatorial approximation scheme for min-max resource sharing. Detailed routing uses exact shortest path algorithms, based on a shape-based data structure for pin access and a two-level track-based data structure for long-distance connections. All algorithms are very fast. Compared to an industrial router (on 32 nm and 22 nm chips), BonnRoute is over two times faster, has 5 \% less netlength, 20 \% less vias, and reduces detours by more than 90 \%.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Agarwal:2013:SDS, author = "Amit Agarwal and Jason Cong and Brian Tagiku", title = "The survivability of design-specific spare placement in {FPGA} architectures with high defect rates", journal = j-TODAES, volume = "18", number = "2", pages = "33:1--33:??", month = mar, year = "2013", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Apr 5 18:40:42 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We address the problem of optimizing fault tolerance in FPGA architectures with high defect rates (such as nano-FPGAs) without significantly degrading performance. Our methods address fault tolerance during the placement and reconfiguration stages of FPGA programming. First, we provide several complexity results for both the fault reconfiguration and fault-tolerance placement problems. Then, we propose a placement algorithm which, in the presence of randomly generated faults, optimizes spare placement to maximize the probability that the FPGA can be reconfigured to meet a specified timing constraint. We also give heuristics for reconfiguration after faults have been detected. Despite the hardness results for both the placement and reconfiguration problems, we show our heuristics perform well in simulation (in one scenario, increasing the probability of successful reconfiguration by as much as 55\% compared to a uniform spare placement).", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nadakuditi:2013:BAS, author = "Raj Rao Nadakuditi and Igor L. Markov", title = "On bottleneck analysis in stochastic stream processing", journal = j-TODAES, volume = "18", number = "3", pages = "34:1--34:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491478", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Past improvements in clock frequencies have traditionally been obtained through technology scaling, but most recent technology nodes do not offer such benefits. Instead, parallelism has emerged as the key driver of chip-performance growth. Unfortunately, efficient simultaneous use of on-chip resources is hampered by sequential dependencies, as illustrated by Amdahl's law. Quantifying achievable parallelism in terms of provable mathematical results can help prevent futile programming efforts and guide innovation in computer architecture toward the most significant challenges. To complement Amdahl's law, we focus on stream processing and quantify performance losses due to stochastic runtimes. Using spectral theory of random matrices, we derive new analytical results and validate them by numerical simulations. These results allow us to explore unique benefits of stochasticity and show how and when they outweigh the costs for software streams.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Abouelella:2013:HEI, author = "Fatma Abouelella and Tom Davidson and Wim Meeus and Karel Bruneel and Dirk Stroobandt", title = "How to efficiently implement dynamic circuit specialization systems", journal = j-TODAES, volume = "18", number = "3", pages = "35:1--35:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491479", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Dynamic circuit specialization (DCS) is a technique used to implement FPGA applications where some of the input data, called parameters, change slowly compared to other inputs. Each time the parameter values change, the FPGA is reconfigured by a configuration that is specialized for those new parameter values. This specialized configuration is much smaller and faster than a regular configuration. However, the overhead associated with the specialization process should be minimized to achieve the desired benefits of using the DCS technique. This overhead is represented by both the FPGA resources needed to specialize the FPGA at runtime and by the specialization time. The introduction of parameterized configurations [Bruneel and Stroobandt 2008] has improved the efficiency of DCS implementations. However, the specialization overhead still takes a considerable amount of resources and time. In this article, we explore how to efficiently build DCS systems by presenting a variety of possible solutions for the specialization process and the overhead associated with each of them. We split the specialization process into two main phases: the evaluation and the configuration phase. The PowerPC embedded processor, the MicroBlaze, and a customized processor (CP) are used as alternatives in the evaluation phase. In the configuration phase, the ICAP and a custom configuration interface (SRL configuration) are used as alternatives. Each solution is used to implement a DCS system for three applications: an adaptive finite impulse response (FIR) filter, a ternary content-addressable memory (TCAM), and a regular expression matcher (RegEx). The experiments show that the use of our CP along with the SRL configuration achieves minimum overhead in terms of resources and time. Our CP is 1.8 and 3.5 times smaller than the PowerPC and the area-optimized implementation of the MicroBlaze, respectively. Moreover, the use of the CP enables a more compact representation for the parameterized configuration in comparison to both the PowerPC and the MicroBlaze processors. For instance, in the FIR, the parameterized configuration compiled for our CP is 6--7 times smaller than that for the embedded processors.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cabodi:2013:TBM, author = "Gianpiero Cabodi and Sergio Nocco and Stefano Quer", title = "Thread-based multi-engine model checking for multicore platforms", journal = j-TODAES, volume = "18", number = "3", pages = "36:1--36:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491480", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article describes a multithreaded, portfolio-based approach to model checking, where multiple cores are exploited as the underlying computing framework to support concurrent execution of cooperative engines. We introduce a portfolio-based approach to model checking. Our portfolio is first driven by an approximate runtime predictor that provides a heuristic approximation to a perfect oracle and suggests which engines are more suitable for each verification instance. Scalability and robustness of the overall model-checking effort highly rely on a concurrent, multithreaded model of execution. Following similar approaches in related application fields, we dovetail data partitioning, focused on proving several properties in parallel, and engine partitioning, based on concurrent runs of different model-checking engines competing for completion of the same problem. We investigate concurrency not only to effectively exploit several available engines, which operate independently, but also to show that a cooperative effort is possible. In this case, we adopt a straightforward, light-weight, model of inter-engine communication and data sharing. We provide a detailed description of the ideas, algorithms, and experimental results obtained on the benchmarks from the Hardware Model Checking Competition suites (HWMCC'10 and HWMCC'11).", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2013:AMP, author = "Sehwan Kim and Pai H. Chou", title = "Analysis and minimization of power-transmission loss in locally daisy-chained systems by local energy buffering", journal = j-TODAES, volume = "18", number = "3", pages = "37:1--37:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491481", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power-transmission loss can be a severe problem for low-power embedded systems organized in a daisy-chain topology. The loss can be so high that it can result in failure to power the load in the first place. The first contribution of this article is a recursive algorithm for solving the transmission current on each segment of the daisy chain at a given supply voltage. It enables solving not only the transmission loss but also reports infeasible configurations if the voltage is too low. Using this core algorithm, our second contribution is to find energy-efficient configurations that use local energy buffers (LEBs) to eliminate peak load on the bus without relying on high voltage. Experimental results confirm that our proposed techniques significantly reduce the total energy consumption and enable the deployed system to operate for significantly longer.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gupta:2013:ECR, author = "Saket Gupta and Sachin S. Sapatnekar", title = "Employing circadian rhythms to enhance power and reliability", journal = j-TODAES, volume = "18", number = "3", pages = "38:1--38:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491482", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a novel scheme for saving architectural power by mitigating delay degradations in digital circuits due to bias temperature instability (BTI), inspired by the notion of human circadian rhythms. The method works in two alternating phases. In the first, the compute phase, the circuit is awake and active, operating briskly at a greater-than-nominal supply voltage which causes tasks to complete more quickly. In the second, the idle phase, the circuit is power-gated and put to sleep, enabling BTI recovery. Since the wakeful stage works at an elevated supply voltage, it results in greater aging than operation at the nominal supply voltage, but the sleep state involves a recovery that more than compensates for this differential. We demonstrate, both at the circuit and the architectural levels, that at about the same performance, this approach can result in appreciable BTI mitigation, thus reducing the guardbands necessary to protect against aging, which results in power savings over the conventional design.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tsai:2013:ROC, author = "Mei-Hsiang Tsai and Po-Yang Hsu and Hung-Yi Li and Yi-Huang Hung and Yi-Yu Liu", title = "Routability optimization for crossbar-switch structured {ASIC} design", journal = j-TODAES, volume = "18", number = "3", pages = "39:1--39:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491483", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the routing architecture of a structured application-specific integrated circuit (ASIC), the crossbar is one of the most area-efficient switch blocks. Nevertheless, a dangling wire occurs when there is a routing bend in a crossbar switch. Dangling wires incur longer wire lengths as well as a higher interconnection capacitance. In this article, we tackle dangling wire issues for structured ASIC routability optimization. We first propose a compact graph model for crossbar-switch routing. With our graph model, switch connectivity relations can be removed to keep the 2D structured ASIC routing graph efficient and to speed up the runtime of our routing algorithm. Furthermore, we propose a heuristic dangling-wire-avoidance routing framework containing deferred pin assignment, Steiner point reassignment, and anchor pair insertion in order to minimize dangling wires and channel width. Finally, in order to take routing bends and channel width into account simultaneously, we propose concurrent and sequential integer linear programming (ILP) formulations and ILP variable/constraint degeneration techniques. The experimental results demonstrate that our proposed heuristic routing framework reduces dangling wires by 19\%, channel width by 38\%, and wire length by 13\% to VPR using the crossbar switch (VPR-C). In addition, our sequential ILP router reduces dangling wires by 38\%, channel width by 40\%, and wire length by 15\% compared to VPR-C. Thus, the runtime efficiency of our sequential ILP router is attractive for crossbar-switch structured ASIC routing.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2013:ABF, author = "Sean Shih-Ying Liu and Wan-Ting Lo and Chieh-Jui Lee and Hung-Ming Chen", title = "Agglomerative-based flip-flop merging and relocation for signal wirelength and clock tree optimization", journal = j-TODAES, volume = "18", number = "3", pages = "40:1--40:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491484", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a flip-flop merging algorithm based on agglomerative clustering. Compared to previous state-of-the-art on flip-flop merging, our proposed algorithm outperforms that of Chang et al. [2010] and Wang et al. [2011] in all aspects, including number of flip-flop reductions, increase in signal wirelength, displacement of flip-flops, and execution time. Our proposed algorithm also has minimal disruption to original placement. In comparison with Jiang et al. [2011], Wang et al. [2011], and Chang et al. [2010], our proposed algorithm has the least displacement when relocating merged flip-flops. While previous works on flip-flop merging focus on the number of flip-flop reduction, we further evaluate the power consumption of clock tree after flip-flop merging. To further minimize clock tree wirelength, we propose a framework that determines a preferable location for relocated merged flip-flops for clock tree synthesis (CTS). Experimental results show that our CTS-driven flip-flop merging can reduce clock tree wirelength by an average of 7.82\% with minimum clock network power consumption compared to all of the previous works.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2013:EMA, author = "Yu-Min Lee and Pei-Yu Huang", title = "An efficient method for analyzing on-chip thermal reliability considering process variations", journal = j-TODAES, volume = "18", number = "3", pages = "41:1--41:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491485", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work provides an efficient statistical electrothermal simulator for analyzing on-chip thermal reliability under process variations. Using the collocation-based statistical modeling technique, first, the statistical interpolation polynomial for on-chip temperature distribution can be obtained by performing deterministic electrothermal simulation very few times and by utilizing polynomial interpolation. After that, the proposed simulator not only provides the mean and standard deviation profiles of on-chip temperature distribution, but also innovates the concept of thermal yield profile to statistically characterize the on-chip temperature distribution more precisely, and builds an efficient technique for estimating this figure of merit. Moreover, a mixed-mesh strategy is presented to further enhance the efficiency of the developed statistical electrothermal simulator. Experimental results demonstrate that (1) the developed statistical electrothermal simulator can obtain accurate approximations with orders of magnitude speedup over the Monte Carlo method; (2) comparing with a well-known cumulative distribution function estimation method, APEX [Li et al. 2004], the developed statistical electrothermal simulator can achieve 215$ \times $ speedup with better accuracy; (3) the developed mixed-mesh strategy can achieve an order of magnitude faster over our baseline algorithm and still maintain an acceptable accuracy level.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shi:2013:OSC, author = "Yiyu Shi and Jinjun Xiong and Vladimir Zolotov and Chandu Visweswariah", title = "Order statistics for correlated random variables and its application to at-speed testing", journal = j-TODAES, volume = "18", number = "3", pages = "42:1--42:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491486", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Although order statistics have been studied for several decades, most of the results are based on the assumption of independent and identically distributed (i.i.d.) random variables. In the literature, how to compute the $m$ th order statistics of $n$ correlated random variables is still a problem. This article proposes a recursive algorithm based on statistical min/max operations to compute order statistics for general correlated and not necessarily identically distributed random variables. The algorithm has an {$ O(m n) $} time complexity and {$ O(m + n) $} space complexity. A binary tree-based data structure is further developed to allow selective update of the order statistics with {$ O(n m^2) $} time. As a vehicle to demonstrate the algorithm, we apply it to the path selection algorithm in at-speed testing. A novel metric multilayer process space coverage metric is proposed to quantitatively gauge the quality of path selection. We then show that such a metric is directly linked to the order statistics, and our recursive algorithm can thus be applied. By employing a branch-and-bound path selection algorithm with these techniques, this article shows that selecting an optimal set of paths for a multimillion-gate design can be performed efficiently. Compared to the state of the art, experimental results show both the efficiency of our algorithms and better quality of our path selection.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2013:PSA, author = "Wei Zhao and Junxia Ma and Mohammad Tehranipoor and Sreejit Chakravarty", title = "Power-safe application of tdf patterns to flip-chip designs during wafer test", journal = j-TODAES, volume = "18", number = "3", pages = "43:1--43:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491487", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to high switching activities in test mode, circuit power consumption is higher than its functional operation. Large switching in the circuit during launch-to-capture cycles not only negatively impacts circuit performance causing overkill, but could also burn tester probes during wafer test due to the excessive current they must drive. It is necessary to develop a quick and effective method for evaluating each pattern, identifying high-power patterns considering functional and tester probes' current limits and making the final pattern set power-safe. Compared with previous low-power methods that deal with scan structure modification or pattern filling techniques, the new proposed method takes into account layout information and resistance in the power distribution network and can identify peak current among C4 power bumps. Post-processing steps replace power-unsafe patterns with low-power ones. The final pattern set provides considerable peak current reduction while fault coverage is maintained.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xiang:2013:TCS, author = "Dong Xiang and Jianbo Li and Krishnendu Chakrabarty and Xijiang Lin", title = "Test compaction for small-delay defects using an effective path selection scheme", journal = j-TODAES, volume = "18", number = "3", pages = "44:1--44:??", month = jul, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2491477.2491488", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 27 08:09:07 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Testing for small-delay defects (SDDs) requires fault-effect propagation along the longest testable paths. However, identification of the longest testable paths requires high CPU time, and the sensitization of all such paths leads to large pattern counts. Dynamic test compaction for small-delay defects is therefore necessary to reduce test-data volume. We present a new technique for identifying the longest testable paths through each gate in order to accelerate test generation for SDDs. The resulting test patterns sensitize the longest testable paths that pass through each SDD site. An efficient dynamic test compaction method based on structural analysis is presented to reduce the pattern count substantially, while ensuring that all the longest paths for each SDD are sensitized. Simulation results for a set of ISCAS 89 and IWLS 05 benchmark circuits demonstrate the effectiveness of this method.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Anonymous:2013:CNE, author = "Anonymous", title = "Call for nominations for {Editor-in-Chief}", journal = j-TODAES, volume = "18", number = "4", pages = "44:1--44:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2541012.2541672", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Marculescu:2013:ESS, author = "Diana Marculescu and Chita Das", title = "Editorial to special section on networks on chip: Architecture, tools, and methodologies", journal = j-TODAES, volume = "18", number = "4", pages = "45:1--45:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2541012.2541013", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bogdan:2013:DPM, author = "Paul Bogdan and Radu Marculescu and Siddharth Jain", title = "Dynamic power management for multidomain system-on-chip platforms: an optimal control approach", journal = j-TODAES, volume = "18", number = "4", pages = "46:1--46:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2504904", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Reducing energy consumption in multiprocessor systems-on-chip (MPSoCs) where communication happens via the network-on-chip (NoC) approach calls for multiple voltage/frequency island (VFI)-based designs. In turn, such multi-VFI architectures need efficient, robust, and accurate runtime control mechanisms that can exploit the workload characteristics in order to save power. Despite being tractable, the linear control models for power management cannot capture some important workload characteristics (e.g., fractality, nonstationarity) observed in heterogeneous NoCs; if ignored, such characteristics lead to inefficient communication and resources allocation, as well as high power dissipation in MPSoCs. To mitigate such limitations, we propose a new paradigm shift from power optimization based on linear models to control approaches based on fractal-state equations. As such, our approach is the first to propose a controller for fractal workloads with precise constraints on state and control variables and specific time bounds. Our results show that significant power savings can be achieved at runtime while running a variety of benchmark applications.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2013:NMC, author = "Xi Chen and Zheng Xu and Hyungjun Kim and Paul Gratz and Jiang Hu and Michael Kishinevsky and Umit Ogras", title = "In-network monitoring and control policy for {DVFS} of {CMP} networks-on-chip and last level caches", journal = j-TODAES, volume = "18", number = "4", pages = "47:1--47:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2504905", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In chip design today and for a foreseeable future, the last-level cache and on-chip interconnect is not only performance critical but also a substantial power consumer. This work focuses on employing dynamic voltage and frequency scaling (DVFS) policies for networks-on-chip (NoC) and shared, distributed last-level caches (LLC). In particular, we consider a practical system architecture where the distributed LLC and the NoC share a voltage/frequency domain that is separate from the core domain. This architecture enables the control of the relative speed between the cores and memory hierarchy without introducing synchronization delays within the NoC. DVFS for this architecture is more complex than individual link/core-based DVFS since it involves spatially distributed monitoring and control. We propose an average memory access time (AMAT)-based monitoring technique and integrate it with DVFS based on PID control theory. Simulations on PARSEC benchmarks yield a 27\% energy savings with a negligible impact on system performance.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2013:AVC, author = "Jaekyu Lee and Si Li and Hyesoon Kim and Sudhakar Yalamanchili", title = "Adaptive virtual channel partitioning for network-on-chip in heterogeneous architectures", journal = j-TODAES, volume = "18", number = "4", pages = "48:1--48:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2504906", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Current heterogeneous chip-multiprocessors (CMPs) integrate a GPU architecture on a die. However, the heterogeneity of this architecture inevitably exerts different pressures on shared resource management due to differing characteristics of CPU and GPU cores. We consider how to efficiently share on-chip resources between cores within the heterogeneous system, in particular the on-chip network. Heterogeneous architectures use an on-chip interconnection network to access shared resources such as last-level cache tiles and memory controllers, and this type of on-chip network will have a significant impact on performance. In this article, we propose a feedback-directed virtual channel partitioning (VCP) mechanism for on-chip routers to effectively share network bandwidth between CPU and GPU cores in a heterogeneous architecture. VCP dedicates a few virtual channels to CPU and GPU applications with separate injection queues. The proposed mechanism balances on-chip network bandwidth for applications running on CPU and GPU cores by adaptively choosing the best partitioning configuration. As a result, our mechanism improves system throughput by 15\% over the baseline across 39 heterogeneous workloads.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Abousamra:2013:OCE, author = "Ahmed Abousamra and Alex K. Jones and Rami Melhem", title = "Ordering circuit establishment in multiplane {NoCs}", journal = j-TODAES, volume = "18", number = "4", pages = "49:1--49:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2500752", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Segregating networks-on-chips (NoCs) into data and control planes yields several opportunities for improving power and performance in chip-multiprocessor systems (CMPs). This article describes a hybrid packet/circuit switched multiplane network optimized to reduce latency in order to improve system performance and/or reduce system energy. Unlike traditional circuit preallocation techniques which require timestamps to reserve circuit resources, this article proposes an order-based preallocation scheme. By enforcing the order in which resources are scheduled and utilized rather than a fixed time, the NoC can take advantage of messages that arrive early while naturally tolerating message delays due to contention. Ordered circuit establishment is presented using two techniques. First, D{\'e}j{\`a} Vu switching preestablishes circuits for data messages once a cache hit is detected and prior to the requested data becoming available. Second, using Red Carpet Routing, circuits are proactively reserved for a return data message as a request message traverses the NoC. The reduced communication latency over configured circuits enable system performance improvement or saving NoC energy by reducing voltage and frequency without sacrificing performance. In simulations of 16 and 64 core CMPs, D{\'e}j{\`a} Vu switching enabled average NoC energy savings of 43\% and 53\% respectively. On the other hand, simulations of communication sensitive benchmarks using Red Carpet Routing show speedup in execution time of up to 16\%, with an average of 10\% over a purely packet switched NoC and an average of 8\% over preconfiguring circuits using D{\'e}j{\`a} Vu switching.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2013:DRN, author = "Jinho Lee and Dongwoo Lee and Sunwook Kim and Kiyoung Choi", title = "Deflection routing in {$3$D} network-on-chip with limited vertical bandwidth", journal = j-TODAES, volume = "18", number = "4", pages = "50:1--50:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505011", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes a deflection routing for 3D NoC with serialized TSVs for vertical links. Compared to buffered routing, deflection routing provides area- and power-efficient communication and little loss of performance under low to medium traffic load. Under 3D environments, the deflection routing can yield even better performance than buffered routing when key aspects are properly taken into account. However, the existing deflection routing technique cannot be directly applied because the serialized TSV links will take longer time to send data than ordinary planar links and cause many problems. A naive deflection through a TSV link can cause significantly longer latency and more energy consumption even for communications through planar links. This article proposes a method to mitigate the effect and also solve arising deadlock and livelock problems. Evaluation of the proposed scheme shows its effectiveness in throughput, latency, and energy consumption.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shojaei:2013:FSM, author = "Hamid Shojaei and Twan Basten and Marc Geilen and Azadeh Davoodi", title = "A fast and scalable multidimensional multiple-choice knapsack heuristic", journal = j-TODAES, volume = "18", number = "4", pages = "51:1--51:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2541012.2541014", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Many combinatorial optimization problems in the embedded systems and design automation domains involve decision making in multidimensional spaces. The multidimensional multiple-choice knapsack problem (MMKP) is among the most challenging of the encountered optimization problems. MMKP problem instances appear for example in chip multiprocessor runtime resource management and in global routing of wiring in circuits. Chip multiprocessor resource management requires solving MMKP under real-time constraints, whereas global routing requires scalability of the solution approach to extremely large MMKP instances. This article presents a novel MMKP heuristic, CPH (for Compositional Pareto-algebraic Heuristic), which is a parameterized compositional heuristic based on the principles of Pareto algebra. Compositionality allows incremental computation of solutions. The parameterization allows tuning of the heuristic to the problem at hand. These aspects make CPH a very versatile heuristic. When tuning CPH for computation time, MMKP instances can be solved in real time with better results than the fastest MMKP heuristic so far. When tuning CPH for solution quality, it finds several new solutions for standard benchmarks that are not found by any existing heuristic. CPH furthermore scales to extremely large problem instances. We illustrate and evaluate the use of CPH in both chip multiprocessor resource management and in global routing.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yoon:2013:ACC, author = "Jonghee W. Yoon and Jongeun Lee and Sanghyun Park and Yongjoo Kim and Jinyong Lee and Yunheung Paek and Doosan Cho", title = "Architecture customization of on-chip reconfigurable accelerators", journal = j-TODAES, volume = "18", number = "4", pages = "52:1--52:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2493384", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Integrating coarse-grained reconfigurable architectures (CGRAs) into a System-on-a-Chip (SoC) presents many benefits as well as important challenges. One of the challenges is how to customize the architecture for the target applications efficiently and effectively without performing explicit design space exploration. In this article we present a novel methodology for incremental interconnect customization of CGRAs that can suggest a new interconnection architecture which is able to maximize the performance for a given set of application kernels while minimizing the hardware cost. In our methodology, we translate the problem of interconnect customization into that of inexact graph matching, and we devised a heuristic for A* search algorithm to efficiently solve the inexact graph matching problem. Our experimental results demonstrate that our customization method can quickly find application-optimized interconnections that exhibit 80\% higher performance on average compared to the base architecture which has mesh interconnections, with little energy and hardware increase in interconnections and muxes.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jeyapaul:2013:EEE, author = "Reiley Jeyapaul and Aviral Shrivastava", title = "Enabling energy efficient reliability in embedded systems through smart cache cleaning", journal = j-TODAES, volume = "18", number = "4", pages = "53:1--53:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505012", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Incessant and rapid technology scaling has brought us to a point where today's, and future transistors are susceptible to transient errors induced by energy carrying particles, called soft errors. Within a processor, the sheer size and nature of data in the caches render it most vulnerable to electrical interference on data stored in the cache. Data in the cache is vulnerable to corruption by soft errors, for the time it remains actively unused in the cache. Write-through and early-write-back [Li et al. 2004] cache configurations reduce the time for vulnerable data in the cache, at the cost of increased memory writes and thereby energy. We propose a smart cache cleaning methodology, that enables copying of only specific vulnerable cache blocks into the memory at chosen times, thereby ensuring data cache protection with minimal memory writes. In this work, we first propose a hybrid (software-hardware) methodology. We then propose an improved software solution that utilizes cache write-back functionality available in commodity processors; thereby reducing the hardware overhead required to implement smart cache cleaning for such systems. The parameters involved in the implementation of our Smart Cache Cleaning (SCC) technique enable a means to provide for customizable energy-efficient soft error reduction in the L1 data cache. Given the system requirements of reliability, power-budget and runtime priority of the application, appropriate parameters of the SCC can be customized to trade-off power consumption and L1 data cache reliability. Our experiments over LINPACK and Livermore benchmarks demonstrate 26\% reduced energy-vulnerability product (energy-efficient vulnerability reduction) compared to that of hardware based cache reliability techniques. Our software-only solution achieves same levels of reliability with an additional 28\% performance improvement.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kadayif:2013:HSA, author = "Ismail Kadayif and Mahir Turkcan and Seher Kiziltepe and Ozcan Ozturk", title = "Hardware\slash software approaches for reducing the process variation impact on instruction fetches", journal = j-TODAES, volume = "18", number = "4", pages = "54:1--54:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2489778", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As technology moves towards finer process geometries, it is becoming extremely difficult to control critical physical parameters such as channel length, gate oxide thickness, and dopant ion concentration. Variations in these parameters lead to dramatic variations in access latencies in Static Random Access Memory (SRAM) devices. This means that different lines of the same cache may have different access latencies. A simple solution to this problem is to adopt the worst-case latency paradigm. While this egalitarian cache management is simple, it may introduce significant performance overhead during instruction fetches when both address translation (instruction Translation Lookaside Buffer (TLB) access) and instruction cache access take place, making this solution infeasible for future high-performance processors. In this study, we first propose some hardware and software enhancements and then, based on those, investigate several techniques to mitigate the effect of process variation on the instruction fetch pipeline stage in modern processors. For address translation, we study an approach that performs the virtual-to-physical page translation once, then stores it in a special register, reusing it as long as the execution remains on the same instruction page. To handle varying access latencies across different instruction cache lines, we annotate the cache access latency of instructions within themselves to give the circuitry a hint about how long to wait for the next instruction to become available.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2013:EWD, author = "Guanying Wu and Xubin He and Ningde Xie and Tong Zhang", title = "Exploiting workload dynamics to improve {SSD} read latency via differentiated error correction codes", journal = j-TODAES, volume = "18", number = "4", pages = "55:1--55:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2489792", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a cross-layer codesign approach to reduce SSD read response latency. The key is to cohesively exploit the NAND flash memory device write speed vs. raw storage reliability trade-off at the physical layer and runtime data access workload dynamics at the system level. Leveraging runtime data access workload variation, we can opportunistically slow down NAND flash memory write speed and hence improve NAND flash memory raw storage reliability. This naturally enables an opportunistic use of weaker error correction schemes that can directly reduce SSD read access latency. We develop a disk-level scheduling scheme to effectively smooth the write workload in order to maximize the occurrence of runtime opportunistic NAND flash memory write slowdown. Using 2 bits/cell NAND flash memory with BCH-based error correction correction as a test vehicle, we carry out extensive simulations over various workloads and demonstrate that this developed cross-layer co-design solution can reduce the average SSD read latency by up to 59.4\% without sacrificing the write throughput performance.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2013:IBM, author = "Po-Chun Huang and Yuan-Hao Chang and Tei-Wei Kuo", title = "An index-based management scheme with adaptive caching for huge-scale low-cost embedded flash storages", journal = j-TODAES, volume = "18", number = "4", pages = "56:1--56:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505013", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to its remarkable access performance, shock resistance, and costs, NAND flash memory is now widely adopted in a variety of computing environments, especially in mobile devices such as smart phones, media players and electronic book readers. For the consideration of costs, low-cost embedded flash storages such as flash memory cards are often employed on such devices. Different from solid-state disks, the RAM buffer equipped on low-cost embedded flash storages are very small, for example, limited under several dozens of kilobytes, despite of the rapidly growing capacity of the storages. The significance of effectively utilizing the very limited on-device RAM buffers of embedded flash storages is therefore highlighted, and a novel design of scalable flash management schemes is needed to tackle the new access constraints of MLC NAND flash memory. In this work, a highly scalable design of the flash translation layer is presented with the considerations of the on-device RAM size, user access patterns, address-mapping-information caching and MLC access constraints. Through a series of experiments, it is verified that, with appropriate settings of cache sizes, the proposed management scheme provides comparable performance results to prior arts with much lower requirements on the on-device RAM. In other words, the proposed scheme suggests a strategy to make better use of the on-device RAM, and is suitable for embedded flash storages.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2013:CSL, author = "Bo Zhao and Jun Yang and Youtao Zhang and Yiran Chen and Hai Li", title = "Common-source-line array: an area efficient memory architecture for bipolar nonvolatile devices", journal = j-TODAES, volume = "18", number = "4", pages = "57:1--57:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2500459", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditional array organization of bipolar nonvolatile memories such as STT-MRAM and memristor utilizes two bitlines for cell manipulations. With technology scaling, such bitline pair will soon become the bottleneck for further density improvement. In this article we propose a novel common-source-line array architecture, which uses a shared source-line along the row, leaving only one bitline per column. We elaborate the array design to ensure reliability, and demonstrate its effectiveness on STT-MRAM and memristor memory arrays. Our study results show that with comparable latency and energy, the proposed common-source-line array can save 34\% and 33\% area for Memristor-RAM and STT-MRAM respectively, compared with corresponding dual-bitline arrays.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{DaRolt:2013:NDS, author = "Jean {Da Rolt} and Giorgio {Di Natale} and Marie-Lise Flottes and Bruno Rouzeyre", title = "A novel differential scan attack on advanced {DFT} structures", journal = j-TODAES, volume = "18", number = "4", pages = "58:1--58:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505014", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scan chains insertion is the most common technique to ensure the testability of digital cores, providing high fault coverage. However, for ICs dealing with secret information, scan chains can be used as back doors for accessing secret data thus becoming a threat to system security. So far, advanced test structures used to reduce test costs (e.g., response compaction) and achieve high fault coverage (e.g., X's masking decoder) have been considered as intrinsic countermeasures against these threats. This work proposes a new generic scan-based attack demonstrating that these test structures are not sufficiently effective to prevent leakage through the test infrastructure. This generic attack can be easily adapted to several cryptographic implementations for both symmetric and public key algorithms. The proposed attack is demonstrated on several ciphers.", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2013:PDS, author = "Yao-Lin Chang and I-Lun Tseng", title = "A parallel dual-scanline algorithm for partitioning parameterized 45-degree polygons", journal = j-TODAES, volume = "18", number = "4", pages = "59:1--59:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505015", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In order to use rectangular corner stitching data structures in storing parameterized orthogonal layouts, parameterized polygons in the layouts must be partitioned into rectangles. Likewise, in order to use trapezoidal corner stitching data structures in storing parameterized 45-degree layouts, parameterized polygons in the layouts have to be partitioned into trapezoids. In this article, a parallel polygon partitioning algorithm is proposed; the algorithm is capable of partitioning parameterized orthogonal polygons into parameterized rectangles as well as partitioning parameterized 45-degree polygons into parameterized trapezoids. Additionally, the algorithm can be used to partition fixed-coordinate polygons. By adopting the dual-scanline technique, which involves using two scanlines to concurrently sweep an input polygon, the parallel partitioning algorithm can process vertices and edges of the input polygon efficiently. The parallel polygon partitioning algorithm has been implemented in C++ with the use of OpenMP. Compared with a sequential partitioning program which uses a single scanline, our parallel partitioning program can achieve 20\% to 30\% speedup while partitioning large parameterized polygons or partitioning parameterized polygons with complex constraints.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ramanujam:2013:DBC, author = "Rohit Sunkam Ramanujam and Bill Lin", title = "Destination-based congestion awareness for adaptive routing in {$2$D} mesh networks", journal = j-TODAES, volume = "18", number = "4", pages = "60:1--60:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505055", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The choice of routing algorithm plays a vital role in the performance of on-chip interconnection networks. Adaptive routing is appealing because it offers better latency and throughput than oblivious routing, especially under nonuniform and bursty traffic. The performance of an adaptive routing algorithm is determined by its ability to accurately estimate congestion in the network. In this regard, maintaining global congestion state using a separate monitoring network offers better congestion visibility into distant parts of the network compared to solutions relying only on local congestion. However, the main challenge in designing such routing schemes is to keep the logic and bandwidth overhead as low as possible to fit into the tight power, area, and delay budgets of on-chip routers. In this article, we propose a minimal destination-based adaptive routing strategy (DAR), where every node estimates the delay to every other node in the network, and routing decisions are based on these per-destination delay estimates. DAR outperforms Regional Congestion Awareness (RCA), the best previously known adaptive routing algorithm that uses nonlocal congestion state. The performance improvement is brought about by maintaining fine-grained per-destination delay estimates in DAR that are more accurate than regional congestion metrics measured in RCA. The increased accuracy is a consequence of the fact that the per-destination delay estimates are not corrupted by congestion on links outside the admissible routing paths to the destination. A scalable version of DAR, referred to as SDAR, is also proposed for minimizing the overheads associated with DAR in large network topologies. We show that DAR outperforms local adaptive routing by up to 79\% and RCA by up to 58\% in terms of latency on SPLASH-2 benchmarks. DAR and SDAR also outperform existing adaptive and oblivious routing algorithms in latency and throughput under synthetic traffic patterns on 8$ \times $8 and 16times;16 mesh topologies, respectively.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2013:RAG, author = "Tan Yan and Qiang Ma and Scott Chilstedt and Martin D. F. Wong and Deming Chen", title = "A routing algorithm for graphene nanoribbon circuit", journal = j-TODAES, volume = "18", number = "4", pages = "61:1--61:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505056", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Conventional CMOS devices are facing an increasing number of challenges as their feature sizes scale down. Graphene nanoribbon (GNR) based devices are shown to be a promising replacement of traditional CMOS at future technology nodes. However, all previous works on GNRs focus at the device level. In order to integrate these devices into electronic systems, routing becomes a key issue. In this article, the GNR routing problem is studied for the first time. We formulate the GNR routing problem as a minimum hybrid-cost shortest path problem on triangular mesh (``hybrid'' means that we need to consider both the length and the bending of the routing path). We show that by graph expansion, this minimum hybrid-cost shortest path problem can be solved by applying the conventional shortest path algorithm on the expanded graph. Experimental results show that our GNR routing algorithm effectively handles the hybrid cost.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ayoub:2013:CCM, author = "Raid Ayoub and Rajib Nath and Tajana Simunic Rosing", title = "{CoMETC}: Coordinated management of energy\slash thermal\slash cooling in servers", journal = j-TODAES, volume = "19", number = "1", pages = "1:1--1:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534381", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We introduce a Coordinated Management of Energy, Thermal, and Cooling (CoMETC) technique to minimize cooling and memory energy of server machines. State-of-the-art solutions decouple the optimization of cooling energy costs and energy consumption of CPU and memory subsystems. This results in suboptimal solutions due to thermal dependencies between CPU and memory and the nonlinearity in energy costs of cooling. In contrast, we develop a unified solution that integrates energy, thermal, and cooling management for CPU and memory subsystems to maximize energy savings. CoMETC reduces the operational energy of the memory by clustering active memory pages to a subset of memory modules while accounting for thermal and cooling aspects. At the same time, CoMETC removes hotspots between and within the CPU sockets and reduces the effects of thermal coupling with memory in order to minimize cooling energy costs. We design CoMETC using a control-theoretic approach to guarantee meeting these objectives. We introduce a formal thermal and cooling model to be used for online decisions inside CoMETC. Our experimental results show that CoMETC achieves average cooling and memory energy savings of 58\% compared to state-of-the-art techniques at a performance overhead of less than 0.3\%.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Al-Dujaily:2013:DPB, author = "Ra'ed Al-Dujaily and Nizar Dahir and Terrence Mak and Fei Xia and Alex Yakovlev", title = "Dynamic programming-based runtime thermal management {(DPRTM)}: an online thermal control strategy for {$3$D-NoC} systems", journal = j-TODAES, volume = "19", number = "1", pages = "2:1--2:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534382", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Complex thermal behavior inhibits the advancement of three-dimensional (3D) very-large-scale-integration (VLSI) system designs, as it could lead to ultra-high temperature hotspots and permanent silicon device damage. This article introduces a new runtime thermal management strategy to effectively diffuse and manage heat throughout 3D chip geometry for a better throughput performance in networks on chip (NoC). This strategy employs a dynamic programming-based runtime thermal management (DPRTM) policy to provide online thermal regulation. Reactive and proactive adaptive schemes are integrated to optimize the routing pathways depending on the critical temperature thresholds and traffic developments. Also, when the critical system thermal limit is violated, an urgent throttling will take place. The proposed DPRTM is rigorously evaluated through cycle-accurate simulations, and results show that the proposed approach outperforms conventional approaches in terms of computational efficiency and thermal stability. For example, the system throughput using the DPRTM approach can be improved by 33\% when compared to other adaptive routing strategies for a given thermal constraint. Moreover, the DPRTM implementation presented in this article demonstrates that the hardware overhead is insignificant. This work opens a new avenue for exploring the on-chip adaptability and thermal regulation for future large-scale and 3D many-core integrations.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2013:IPP, author = "Yen-Jen Chang and Hsiang-Yu Lu", title = "Improving the performance of port range check for network packet filtering", journal = j-TODAES, volume = "19", number = "1", pages = "3:1--3:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2523069", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article introduces a high-performance packet filter design in which we propose the partial parallel range check (PPRC) technique for speeding up port range check. Unlike the conventional serial design that uses cascading cells to perform the serial check, PPRC divides the single path into several segments. All PPRC segments perform the range compare simultaneously, that is, parallel check, and then the results of each segment are serialized to generate the final check result. Besides theoretical analyses, we also use UMC 90nm CMOS process to implement the PPRC design and verify its effect on the check performance. Compared to state-of-the-art range check techniques, the results show that the PPRC design with the best configuration can improve check performance by 28\%, at least. In addition, the PPRC design is more stable and energy efficient than related designs, even though it requires more transistors to implement the peripheral circuitry. The range of energy improvement achieved by the PPRC design is about 35\%--70\%.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kritikakou:2013:NOS, author = "Angeliki Kritikakou and Francky Catthoor and Vasilios Kelefouras and Costas Goutis", title = "Near-optimal and scalable intrasignal in-place optimization for non-overlapping and irregular access schemes", journal = j-TODAES, volume = "19", number = "1", pages = "4:1--4:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534383", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Storage-size management techniques aim to reduce the resources required to store elements and to concurrently provide efficient addressing during element accessing. Existing techniques are less appropriate for large iteration spaces with increased numbers of irregularly spread holes. They either have to approximate the accessed regions, leading to overestimation of the final resources, or they require prohibited exploration time to find the storage size. In this work, we present a near-optimal and scalable methodology for storage-size, intrasignal, in-place optimization, that is, to compute the minimum amount of resources required to store the elements of a group (array), for irregular complex access schemes in the target domain of non-overlapping store and load accesses.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2013:LEV, author = "Jianhua Li and Liang Shi and Qingan Li and Chun Jason Xue and Yiran Chen and Yinlong Xu and Wei Wang", title = "Low-energy volatile {STT--RAM} cache design using cache-coherence-enabled adaptive refresh", journal = j-TODAES, volume = "19", number = "1", pages = "5:1--5:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534393", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Spin-Torque Transfer RAM (STT-RAM) is a promising candidate for SRAM replacement because of its excellent features, such as fast read access, high density, low leakage power, and CMOS technology compatibility. However, wide adoption of STT-RAM as cache memories is impeded by its long write latency and high write power. Recent work proposed improving the write performance through relaxing the retention time of STT-RAM cells. The resultant volatile STT-RAM needs to be periodically refreshed to prevent data loss. When volatile STT-RAM is applied as the last-level cache (LLC) in chip multiprocessor (CMP) systems, frequent refresh operations could dissipate significant extra energy. In addition, refresh operations could severely conflict with normal read/write operations to degrade overall system performance. Therefore, minimizing the performance impact caused by refresh operations is crucial for the adoption of volatile STT-RAM. In this article, we propose Cache-Coherence-Enabled Adaptive Refresh (CCear) to minimize the number of refresh operations for volatile STT-RAM, adopted as the LLC for CMP systems. Specifically, CCear interacts with cache coherence protocol and cache management policy to minimize the number of refresh operations on volatile STT-RAM caches. Full-system simulation results show that CCear performs close to an ideal refresh policy with low overhead. Compared with state-of-the-art refresh policies, CCear simultaneously improves the system performance and reduces the energy consumption. Moreover, the performance of CCear could be further enhanced using small filter caches to accommodate the not-refreshed private STT-RAM blocks.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2013:PBA, author = "Xue-Xin Liu and Sheldon X.-D. Tan and Adolfo Adair Palma-Rodriguez and Esteban Tlelo-Cuautle and Guoyong Shi", title = "Performance bound analysis of analog circuits in frequency- and time-domain considering process variations", journal = j-TODAES, volume = "19", number = "1", pages = "6:1--6:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534395", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a new performance bound analysis of analog circuits considering process variations. We model the variations of component values as intervals measured from tested chips and manufacture processes. The new method first applies a graph-based analysis approach to generate the symbolic transfer function of a linear(ized) analog circuit. Then the frequency response bounds (maximum and minimum) are obtained by performing nonlinear constrained optimization in which magnitude or phase of the transfer function is the objective function to be optimized subject to the ranges of process variational parameters. The response bounds given by the optimization-based method are very accurate and do not have the over-conservativeness issues of existing methods. Based on the frequency-domain bounds, we further develop a method to calculate the time-domain response bounds for any arbitrary input stimulus. Experimental results from several analog benchmark circuits show that the proposed method gives the correct bounds verified by Monte Carlo analysis while it delivers one order of magnitude speedup over Monte Carlo for both frequency-domain and time-domain bound analyses. We also show analog circuit yield analysis as an application of the frequency-domain variational bound analysis.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2013:OCC, author = "Chien-Chih Huang and Chin-Long Wey and Jwu-E Chen and Pei-Wen Luo", title = "Optimal common-centroid-based unit capacitor placements for yield enhancement of switched-capacitor circuits", journal = j-TODAES, volume = "19", number = "1", pages = "7:1--7:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534394", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Yield is defined as the probability that the circuit under consideration meets with the design specification within the tolerance. Placement with higher correlation coefficients has fewer mismatches and lower variation of capacitor ratio, thus achieving higher yield performance. This study presents a new optimization criterion that quickly determines if the placement is optimal. The optimization criterion leads to the development of the concepts of C-entries and partitioned subarrays which can significantly reduce the searching space for finding the optimal/near-optimal placements on a sufficiently large array size.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2013:BGM, author = "Irith Pomeranz", title = "Built-in generation of multicycle functional broadside tests with observation points", journal = j-TODAES, volume = "19", number = "1", pages = "8:1--8:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534396", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Functional broadside tests allow overtesting to be avoided as part of a scheme that considers both test generation and the analysis of output responses, by ensuring that delay faults are detected under functional operation conditions. Compared with two-cycle tests, multicycle tests allow more faults to be detected with each test, thus reducing the number of tests that need to be applied. They also provide an opportunity for nonfunctional electrical effects, which are caused by switching between modes of operation, to subside before the clock cycles where delay faults are detected. Built-in test generation facilitates at-speed testing and reduces the test data volume. Motivated by these observations, this article describes the modification of a built-in test generation method for two-cycle functional broadside tests so as to generate multicycle functional broadside tests. The size of the hardware is not increased by the modification. The article investigates the following issues related to this method: (1) the effect of using multicycle tests on the number of tests that need to be applied; (2) fault simulation for tailoring the test generation hardware to a circuit that takes into account, to different extents, the need to allow nonfunctional electrical effects to subside; (3) the insertion of observation points in order to increase the transition fault coverage.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tong:2013:TCT, author = "Jason G. Tong and Marc Boul{\'e} and Zeljko Zilic", title = "Test compaction techniques for assertion-based test generation", journal = j-TODAES, volume = "19", number = "1", pages = "9:1--9:??", month = dec, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2534397", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Dec 17 17:21:29 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Assertions are now widely used in verification as a means to help convey designer intent and also to simplify the detection of erroneous conditions by the firing of assertions. With this expressive modeling power, assertions could also be used for tasks such as helping to assess test coverage and even as a source for test generation. Our work deals with this last aspect, namely, assertion-based test generation. In this article, we present our compacted test generation scheme based on assertions. Novel compaction techniques are presented based on assertion clustering, test-path overlap detection and parallel-path removal. Our compaction approach is experimentally evaluated using nearly 300 assertions to show the amount of reduction that can be obtained in the size of the test sets. This ultimately has a positive impact on verification time in the quest for bugfree designs.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tu:2014:PPP, author = "Chia-Heng Tu and Hui-Hsin Hsu and Jen-Hao Chen and Chun-Han Chen and Shih-Hao Hung", title = "Performance and power profiling for emulated {Android} systems", journal = j-TODAES, volume = "19", number = "2", pages = "10:1--10:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566660", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Simulation is a common approach for assisting system design and optimization. For system-wide optimization, energy and computational resources are often the two most critical issues. Monitoring the energy state of each hardware component and measuring the time spent in each state is needed for accurate energy and performance prediction. For software optimization, it is important to profile the energy and the time consumed by each software construct in a realistic operating environment with a proper workload. However, the conventional approaches of simulation often fail to produce satisfying data. First, building a cycle-accurate simulation environment for a complex system, such as an Android smartphone, is difficult and can take a long time. Second, a slow simulation can significantly alter the behavior of multithreaded, I/O-intensive applications and can affect the accuracy of profiles. Third, existing software-based profilers generally do not work on simulators, which makes it difficult for performance analysis of complicated software, for example, Java applications executed by the Dalvik VM in an Android system. To address these aforementioned problems, we proposed and prototyped a framework, called virtual performance analyzer (VPA). VPA takes advantage of an existing emulator or virtual machine monitor to reduce the complexity of building a simulator. VPA allows the user to selectively and incrementally integrate timing models and power models into the emulator with our carefully designed performance/power monitors, tracing facility, and profiling tools to evaluate and analyze the emulated system. The emulated system can perform at different levels of speed to help verify if the profile data are impacted by the emulation speed. Finally, VPA supports existing software-based profiles and enables non-intrusive tracing/profiling by minimizing the probe effect. Our experimental results show that the VPA framework allows users to quickly establish a performance/power evaluation environment and gather useful information to support system design and software optimization for Android smartphones.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ganeshpure:2014:PDD, author = "Kunal Ganeshpure and Sandip Kundu", title = "Performance-driven dynamic thermal management of {MPSoC} based on task rescheduling", journal = j-TODAES, volume = "19", number = "2", pages = "11:1--11:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566661", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High level of integration has led to the advent of Multiprocessor System-on-Chip (MPSoC) which consists of multiple processor cores and accelerators on the same die. A MPSoC programming model is based on a task graph where tasks are assigned to cores to maximize performance. To address thermal hotspots in MPSoCs, coarse-grain power management techniques based on Dynamic Frequency Scaling (DFS) are widely used. DFS is reactive in nature and has detrimental effects on performance. We propose an alternative solution based on dynamic task rescheduling where a temperature prediction scheme is built into the scheduler. The temperature look-ahead scheme is used for task reassignment or delay insertion in scheduling. Since temperature prediction and task assignment are done at runtime, both must be simple and extremely fast. To that end, we propose a heuristic solution based on a limited branch-and-bound search and compare results against an optimal Integer Linear Programming (ILP)-based solution. The proposed approach is shown to be superior to frequency scaling, and the resulting schedule length is within 5\% to 10\% of the optimal solution as obtained from ILP formulation.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Meyer:2014:CEL, author = "Brett H. Meyer and Adam S. Hartman and Donald E. Thomas", title = "Cost-effective lifetime and yield optimization for {NoC-based} {MPSoCs}", journal = j-TODAES, volume = "19", number = "2", pages = "12:1--12:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2535575", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As manufacturing processes scale, designers are increasingly dependent on techniques to mitigate manufacturing defect and permanent failure. In embedded systems-on-chip, system lifetime and yield can be increased using slack -under-utilization in execution and storage resources-so that when components are defective, data and tasks can be remapped and rescheduled. For any given system, the design space of possible slack allocations is both large and complex, consisting of every possible way to replace each component in the initial system with another from the component library. Based on the observation that useful slack is often quantized, we have developed Critical Quantity Slack Allocation (CQSA), an approach that effectively and efficiently allocates execution and storage slack to jointly optimize system yield and cost. While exploring less than 1.4\% of the slack allocation design space, our approach consistently outperforms alternative slack allocation techniques to find sets of designs within 1.4\% of the lifetime-cost Pareto-optimal front. When applied to yield-cost optimization, our approach again outperforms alternative techniques, exploring less than 1.62\% of the design space to find sets of designs within 4.27\% of the yield-cost Pareto-optimal front. One advantage of managing failure at the system level is that the same techniques that improve lifetime often also improve yield. As a result, with little modification, CQSA is further able to perform effective joint optimization of lifetime and yield, finding designs within 1.6\% of the Pareto-optimal front.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2014:CRM, author = "Jongeun Lee and Seongseok Seo and Jongkyung Paek and Kiyoung Choi", title = "Configurable range memory for effective data reuse on programmable accelerators", journal = j-TODAES, volume = "19", number = "2", pages = "13:1--13:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566662", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While programmable accelerators such as application-specific processors and reconfigurable architectures can dramatically speed up compute-intensive kernels of an application, application performance can still be severely limited by the communication between processors. To minimize the communication overhead, a shared memory such as a scratchpad memory may be employed between the main processor and the accelerator coprocessor. However, this setup poses a significant challenge to the main processor, which now must manage data on the scratchpad explicitly, resulting in superfluous data copying due to the inflexibility of a scratchpad. In this article, we present an enhancement of a scratchpad, Configurable Range Memory (CRM), whose address range can be reprogrammed to minimize unnecessary data copying between processors and therefore promote data reuse on the accelerator, and also present a software management algorithm for the CRM. Our experimental results involving detailed simulation of full multimedia applications demonstrate that our CRM architecture can reduce the communication overhead quite effectively, reducing the kernel execution time by up to 28\% and the application runtime by up to 12.8\%, in addition to considerable system energy reduction, compared to the conventional architecture based on a scratchpad.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hung:2014:AFD, author = "Eddie Hung and Steven J. E. Wilton", title = "Accelerating {FPGA} debug: Increasing visibility using a runtime reconfigurable observation and triggering network", journal = j-TODAES, volume = "19", number = "2", pages = "14:1--14:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566668", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "FPGA technology is commonly used to prototype new digital designs before entering fabrication. Whilst these physical prototypes can operate many orders of magnitude faster than through a logic simulator, a fundamental limitation is their lack of on-chip visibility when debugging. To counter this, trace-buffer-based instrumentation can be installed into the prototype, allowing designers to capture a predetermined window of signal data during live operation for offline analysis. However, instead of requiring the designer to recompile their entire circuit every time the window is modified, this article proposes that an overlay network is constructed using only spare FPGA routing multiplexers to connect all circuit signals through to the trace instruments. Thus, during debugging, designers would only need to reconfigure this network instead of finding a new place-and-route solution. Furthermore, we describe how this network can deliver signals to both the trigger and trace units of these instruments, which are implemented simultaneously using dual-port RAMs. Our results show that new network configurations connecting any subset of signals to 80--90\% of the available RAM capacity can be computed in less than 70 seconds, for a 100,000 LUT circuit, as many times as necessary. Our tool-QuickTrace-is available for download.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Panerati:2014:CEM, author = "Jacopo Panerati and Giovanni Beltrame", title = "A comparative evaluation of multi-objective exploration algorithms for high-level design", journal = j-TODAES, volume = "19", number = "2", pages = "15:1--15:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566669", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a detailed overview and the experimental comparison of 15 multi-objective design-space exploration (DSE) algorithms for high-level design. These algorithms are collected from recent literature and include heuristic, evolutionary, and statistical methods. To provide a fair comparison, the algorithms are classified according to the approach used and examined against a large set of metrics. In particular, the effectiveness of each algorithm was evaluated for the optimization of a multiprocessor platform, considering initial setup effort, rate of convergence, scalability, and quality of the resulting optimization. Our experiments are performed with statistical rigor, using a set of very diverse benchmark applications (a video converter, a parallel compression algorithm, and a fast Fourier transformation algorithm) to take a large spectrum of realistic workloads into account. Our results provide insights on the effort required to apply each algorithm to a target design space, the number of simulations it requires, its accuracy, and its precision. These insights are used to draw guidelines for the choice of DSE algorithms according to the type and size of design space to be optimized.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2014:CPA, author = "Seokhyun Lee and Kiyoung Choi", title = "Critical-path-aware high-level synthesis with distributed controller for fast timing closure", journal = j-TODAES, volume = "19", number = "2", pages = "16:1--16:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566670", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Centralized controllers commonly used in high-level synthesis often require long wires and cause high load capacitance, and that is why critical paths typically occur on paths from controllers to data registers instead of paths from data registers to data registers. However, conventional high-level synthesis has focused on delays within a datapath, making it difficult to solve the timing closure problem during physical synthesis. This article presents hardware architecture with a distributed controller, which makes the timing closure problem much easier. A novel critical-path-aware high-level synthesis flow is also presented for synthesizing such hardware through datapath partitioning, register binding, and controller optimization. We explore the design space related to the number of partitions, which is an important design parameter for target architecture. According to our experiments, the proposed approach reduces the critical path delay excluding FUs by 29.3\% and that including FUs by 10.0\%, with 2.2\% area overhead on average compared to centralized controller architecture.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wei:2014:TSE, author = "Yaoguang Wei and Cliff Sze and Natarajan Viswanathan and Zhuo Li and Charles J. Alpert and Lakshmi Reddy and Andrew D. Huber and Gustavo E. Tellez and Douglas Keller and Sachin S. Sapatnekar", title = "Techniques for scalable and effective routability evaluation", journal = j-TODAES, volume = "19", number = "2", pages = "17:1--17:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566663", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Routing congestion has become a critical layout challenge in nanoscale circuits since it is a critical factor in determining the routability of a design. An unroutable design is not useful even though it closes on all other design metrics. Fast design closure can only be achieved by accurately evaluating whether a design is routable or not early in the design cycle. Lately, it has become common to use a ``light mode'' version of a global router to quickly evaluate the routability of a given placement. This approach suffers from three weaknesses: (i) it does not adequately model local routing resources, which can cause incorrect routability predictions that are only detected late, during detailed routing; (ii) the congestion maps obtained by it tend to have isolated hotspots surrounded by noncongested spots, called ``noisy hotspots'', which further affects the accuracy in routability evaluation; and (iii) the metrics used to represent congestion may yield numbers that do not provide sufficient intuition to the designer, and moreover, they may often fail to predict the routability accurately. This article presents solutions to these issues. First, we propose three approaches to model local routing resources. Second, we propose a smoothing technique to reduce the number of noisy hotspots and obtain a more accurate routability evaluation result. Finally, we develop a new metric which represents congestion maps with higher fidelity. We apply the proposed techniques to several industrial circuits and demonstrate that one can better predict and evaluate design routability and that congestion mitigation tools can perform much better to improve the design routability.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2014:LPS, author = "Irith Pomeranz", title = "Low-power skewed-load tests based on functional broadside tests", journal = j-TODAES, volume = "19", number = "2", pages = "18:1--18:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566664", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article studies the generation of low-power skewed-load tests such that the signal transitions (and line values) they create during their fast functional clock cycles match those of functional broadside tests. Functional broadside tests create functional operation conditions during their fast functional clock cycles. As a result, the signal transitions that occur during these clock cycles can also occur during functional operation. The procedure described in this article matches these signal-transitions on a line-by-line basis when generating low-power skewed-load tests. The procedure accepts a functional broadside test set for transition faults. In one of its basic steps, the procedure modifies a functional broadside test into a skewed-load test. This allows it to retain many of the signal transitions (and line values) of the functional broadside test in the skewed-load test. Experimental results for benchmark circuits demonstrate the extent to which it is possible to match the signal-transitions of skewed-load tests with those of functional broadside tests while achieving the high transition fault coverage that is typical of skewed-load tests.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2014:DTM, author = "Irith Pomeranz", title = "Design-for-testability for multi-cycle broadside tests by holding of state variables", journal = j-TODAES, volume = "19", number = "2", pages = "19:1--19:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566665", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article describes a design-for-testability approach for increasing the transition fault coverage of multi-cycle broadside tests. Earlier methods addressed two-cycle tests. The importance of multi-cycle tests results from the ability to produce more compact test sets than possible with two-cycle tests, from the fact that when multi-cycle tests are applied at-speed, they can detect defects that are not detected by two-cycle tests and from their ability to avoid overtesting of delay faults. The approach described in this article is based on holding the values of selected state variables constant during the functional clock cycles of a multi-cycle broadside test. This allows new tests to be produced, which are different from broadside tests, without relying on nonfunctional toggling of state variables as in earlier methods for two-cycle tests. Experimental results show significant improvements in transition fault coverage using a fixed set of hold configurations for two types of multi-cycle broadside test sets: (1) test sets that are stored and applied from an external tester, and (2) functional broadside test sets that are generated using on-chip hardware.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Biswas:2014:RTC, author = "Sounil Biswas and Hongfei Wang and R. D. (Shawn) Blanton", title = "Reducing test cost of integrated, heterogeneous systems using pass-fail test data analysis", journal = j-TODAES, volume = "19", number = "2", pages = "20:1--20:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2566666", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Stringent quality requirements for integrated, heterogeneous systems have led designers and test engineers to mandate large sets of tests to be applied to these systems, which, in turn, have resulted in increased test cost. However, many of these tests are unnecessary (i.e., redundant), since their outcomes can be reliably predicted using results from other applied tests. A methodology for identifying the redundant tests of an integrated, heterogeneous system that has only binary pass-fail test data is described. This methodology uses decision trees, Boolean minimization, and satisfiability as core components. Feasibility is empirically demonstrated using test data from two commercially fabricated systems, namely, a high-speed serializer/deserializer (HSS) and a phase-locked loop (PLL). Our analysis of test data from {$>$} 38,000 HSS and {$>$} 22,000 PLL circuits show that 14 out of 40 HSS tests and 11 out of 36 PLL tests are redundant.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2014:BBL, author = "Da-Wei Chang and Hsin-Hung Chen and Dau-Jieu Yang and Hsung-Pin Chang", title = "{BLAS}: Block-level adaptive striping for solid-state drives", journal = j-TODAES, volume = "19", number = "2", pages = "21:1--21:??", month = mar, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2555616", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 21 18:21:14 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Increasing the degree of parallelism and reducing the overhead of garbage collection (GC overhead) are the two keys to enhancing the performance of solid-state drives (SSDs). SSDs employ multichannel architectures, and a data placement scheme in an SSD determines how the data are striped to the channels. Without considering the data access pattern, existing fixed and device-level data placement schemes may have either high GC overhead or poor I/O parallelism, resulting in degraded performance. In this article, an adaptive block-level data placement scheme called BLAS is proposed to maximize the I/O parallelism while simultaneously minimizing the GC overhead. In contrast to existing device-level schemes, BLAS allows different data placement policies for blocks with different access patterns. Pages in read-intensive blocks are scattered over various channels to maximize the degree of read parallelism, while pages in each of the remaining blocks are attempted to be gathered in the same physical block to minimize the GC overhead. Moreover, BLAS allows the placement policy for a logical block to be changed dynamically according to the access pattern changes of that block. Finally, a parallelism-aware write buffer management approach is adopted in BLAS to maximize the degree of write parallelism. Performance results show that BLAS yields a significant improvement in the SSD response time when compared to existing device-level schemes. In particular, BLAS outperforms device-level page striping and device-level block striping by factors of up to 8.75 and 7.41, respectively. Moreover, BLAS achieves low GC overhead and is effective in adapting to workload changes.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bathen:2014:STS, author = "Luis Angel D. Bathen and Nikil D. Dutt", title = "{SPMCloud}: Towards the Single-Chip Embedded {ScratchPad} Memory-Based Storage Cloud", journal = j-TODAES, volume = "19", number = "3", pages = "22:1--22:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611755", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The era of cloud computing on-a-chip is enabled by the aggressive move towards many-core platforms and the rapid adoption of Network-on-Chips. As a result, there is a need for large-scale distributed on-chip shared memories that are reliable, low power, and seamlessly manageable. In this work, we propose SPMCloud, a novel scratchpad-memory-based cloud-inspired volatile storage subsystem designed to meet the needs of future-generation many-core platforms. SPMCloud is composed of several concepts, including: (1) a highly scalable data-center-like memory subsystem that exploits two enterprise-network-inspired memory configurations, namely, embedded Network Attached Storage (eNAS) and embedded Storage Area Network ( eSAN), and (2) on-demand allocation of reliable memory space through memory virtualization and the use of embedded RAIDs. Our experimental results on Mediabench/CHStone benchmarks show that the SPMCloud 's fully distributed reliable memory subsystems can achieve 48\% energy savings and 70\% latency reduction on average over state-of-the-art NoC memory reliability techniques. We then evaluate the scalability of the SPMCloud and compare it with traditional SPM allocation policies. The SPMCloud 's dynamic allocator outperforms the best competition by an average 60\% (eNAS) and 46\% (eSAN) when the platform runs at 250 MHz and by an average 80\% (eNAS) and 40\% when running at 1 GHz. Moreover, the SPMCloud achieves an average 83\% energy savings across all configurations (number of cores) with respect to the best competitors when running at 250 MHz and 1 GHz. We then studied the SPM hit ratio across the various allocation policies discussed in this article and showed that on average the SPMCloud 's priority-driven dynamic allocation policy achieves 93.5\% SPM hit ratio, 0.6\% higher hit ratio than the closest allocation policy. We then showed that the eNAS and eSAN achieve an average of 67.9\% and 29\% reduction in execution time, respectively, over the best competitor. Similarly, the eNAS and eSAN achieve an average of 82.7\% and 82.3\% energy savings, respectively, over the best competitor. Furthermore, we evaluated the scalability of the SPMCloud and its performance/energy efficiency when providing support for some of the heavier E-RAID levels, and showed that the eNAS / eSAN configurations with SECDED achieve an average of 51.5\% and 34.9\% reduction in execution time, respectively, over the best competitor with SECDED. Similarly, the eNAS / eSAN configurations with E-RAID Level 1, + SECDED achieve an average of 82.3\% and 75.6\% energy savings, respectively, over the best competitor.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Rosales:2014:MHA, author = "Rafael Rosales and Michael Glass and J{\"u}rgen Teich and Bo Wang and Yang Xu and Ralph Hasholzner", title = "{MAESTRO} --- Holistic Actor-Oriented Modeling of Nonfunctional Properties and Firmware Behavior for {MPSoCs}", journal = j-TODAES, volume = "19", number = "3", pages = "23:1--23:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2594481", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modeling and evaluating nonfunctional properties such as performance, power, and reliability of embedded systems are tasks of utmost importance. In this article, we introduce M AESTRO, a methodology for the modeling and evaluation of nonfunctional properties and embedded firmware of MPSoC architecture components at the Electronic System Level (ESL). In contrast to existing design flows that provide predefined performance models, MAESTRO defines a flexible approach that allows to define virtual prototypes that can be easily customized and extended to evaluate multiple nonfunctional properties of interest at different levels of abstraction. In MAESTRO, a design is composed purely from actor-oriented models. This enables typical ESL features such as automatic design space exploration and synthesizability of HW and SW components, typically missing in very general design flows. Unique to MAESTRO is the separation and coordination of the interaction between application functionality, firmware, and performance models for the evaluation of nonfunctional properties, and their complex interactions within a single Model-of-Computation (MoC). The main advantages of MAESTRO are: (I) Extensible modeling of interdependent nonfunctional properties of heterogeneous MPSoC components; (II) high flexibility to investigate the appropriate trade-off between modeling effort and accuracy of nonfunctional property evaluators; (III) a holistic approach for modeling application functionality as well as firmware affecting the evaluation of nonfunctional properties. Regarding (II), we present a mobile baseband processor platform use-case, executing a GSM paging application. To demonstrate (I) and (III), we present the modeling of a complex ESL processor virtual prototype, running a soft real-time application and equipped with both a power and reliability manager.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2014:ICP, author = "Libo Huang and Zhiying Wang and Nong Xiao and Yongwen Wang and Qiang Dou", title = "Integrated Coherence Prediction: Towards Efficient Cache Coherence on {NoC}-Based Multicore Architectures", journal = j-TODAES, volume = "19", number = "3", pages = "24:1--24:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611756", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicore architectures with Network-on-Chips (NoCs) have been widely recognized as the de facto design for the efficient utilization of the continuously increasing density of transistors on a chip. A key challenge in designing such an NoC-based multicore processor is maintaining cache coherence in an efficient manner. Directory-based protocols avoid the bandwidth overhead of snoop-based protocols, therefore scaling to a large number of cores. However, conventional directory structures add significant indirection delay to cache-to-cache accesses in larger multicore processor. In this article we propose a novel hardware coherence technique, called integrated coherence prediction (ICP). This approach adopts a prediction technique for managing shared data to reduce or eliminate the cache-to-cache delay in coherence accesses. ICP has two unique features that differ from previous coherence prediction techniques. First, ICP introduces a new integrated prediction scheme that combines two kinds of predictors: owner predictor, which predicts the data writers and avoids the indirection through directory, and data predictor, which predicts the access address and prefetches data from remote nodes directly. Second, ICP uses a request replication method to reduce the negative effect of wrong owner prediction operations, thus facilitating overall performance improvement. We present the design and implementation details of the ICP approach. Using detailed full-system simulations, we conclude that the ICP provides a cost-effective solution for designing high-performance multicore processors.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2014:GCM, author = "Po-Chun Huang and Yuan-Hao Chang and Kam-Yiu Lam and Jian-Tao Wang and Chien-Chin Huang", title = "Garbage Collection for Multiversion Index in Flash-Based Embedded Databases", journal = j-TODAES, volume = "19", number = "3", pages = "25:1--25:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611757", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recently, flash-based embedded databases have gained their momentum in various control and monitoring systems, such as cyber-physical systems (CPSes). To support the functionality to access the historical data, a multiversion index is adopted to simultaneously maintain multiple versions of data items, as well as their index information. However, maintaining a multiversion index on flash memory incurs considerable performance overheads on garbage collection, which is to reclaim the spaces occupied by the outdated/invalid data items and their index information on flash memory. In this work, we propose an efficient garbage collection strategy to solve the garbage collection issues of flash-based multiversion databases. In particular, a version-tracking method is proposed to accelerate the performance on the process on identifying/reclaiming the space of invalid data and their indexes, and a pre-summary method is also designed to solve the cascading update problem that is caused by the write-once nature of flash memory and is worsened when more versions refer to the same data item. The capability of the proposed strategy is then verified by analytical and experimental studies.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lim:2014:PMG, author = "Jieun Lim and Nagesh B. Lakshminarayana and Hyesoon Kim and William Song and Sudhakar Yalamanchili and Wonyong Sung", title = "Power Modeling for {GPU} Architectures Using {McPAT}", journal = j-TODAES, volume = "19", number = "3", pages = "26:1--26:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611758", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Graphics Processing Units (GPUs) are very popular for both graphics and general-purpose applications. Since GPUs operate many processing units and manage multiple levels of memory hierarchy, they consume a significant amount of power. Although several power models for CPUs are available, the power consumption of GPUs has not been studied much yet. In this article we develop a new power model for GPUs by utilizing McPAT, a CPU power tool. We generate initial power model data from McPAT with a detailed GPU configuration, and then adjust the models by comparing them with empirical data. We use the NVIDIA's Fermi architecture for building the power model, and our model estimates the GPU power consumption with an average error of 7.7\% and 12.8\% for the microbenchmarks and Merge benchmarks, respectively.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2014:DCC, author = "Chia-Wei Lee and Sun-Yuan Hsieh", title = "Diagnosability of Component-Composition Graphs in the {MM*} Model", journal = j-TODAES, volume = "19", number = "3", pages = "27:1--27:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611759", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Diagnosability is an important metric for measuring the reliability of multiprocessor systems. This article adopts the MM* model and outlines the common properties of a wide class of interconnection networks, called component-composition graphs (CCGs), to determine their diagnosability by using their obtained properties. By applying the results to multiprocessor systems, the diagnosability of hypercube-like networks (including hypercubes, crossed cubes, M{\"o}bius cubes, twisted cubes, locally twisted cubes, generalized twisted cubes, and recursive circulants), star graphs, pancake graphs, bubble-sort graphs, and burnt pancake graphs, all of which belong to the class of CCGs, can also be computed.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Erb:2014:ELF, author = "Dominik Erb and Michael A. Kochte and Matthias Sauer and Stefan Hillebrecht and Tobias Schubert and Hans-Joachim Wunderlich and Bernd Becker", title = "Exact Logic and Fault Simulation in Presence of Unknowns", journal = j-TODAES, volume = "19", number = "3", pages = "28:1--28:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611760", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Logic and fault simulation are essential techniques in electronic design automation. The accuracy of standard simulation algorithms is compromised by unknown or X-values. This results in a pessimistic overestimation of X-valued signals in the circuit and a pessimistic underestimation of fault coverage. This work proposes efficient algorithms for combinational and sequential logic as well as for stuck-at and transition-delay fault simulation that are free of any simulation pessimism in presence of unknowns. The SAT-based algorithms exactly classifiy all signal states. During fault simulation, each fault is accurately classified as either undetected, definitely detected, or possibly detected. The pessimism with respect to unknowns present in classic algorithms is thoroughly investigated in the experimental results on benchmark circuits. The applicability of the proposed algorithms is demonstrated on larger industrial circuits. The results show that, by accurate analysis, the number of detected faults can be significantly increased without increasing the test-set size.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2014:EFG, author = "Jackey Z. Yan and Natarajan Viswanathan and Chris Chu", title = "An Effective Floorplan-Guided Placement Algorithm for Large-Scale Mixed-Size Designs", journal = j-TODAES, volume = "19", number = "3", pages = "29:1--29:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611761", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article we propose an effective algorithm flow to handle modern large-scale mixed-size placement, both with and without geometry constraints. The basic idea is to use floorplanning to guide the placement of objects at the global level. The flow consists of four steps: (1) The objects in the original netlist are clustered into blocks; (2) floorplanning is performed on the blocks; (3) the blocks are shifted within the chip region to further optimize the wirelength; (4) with large macro-locations fixed, incremental placement is applied to place the remaining objects. There are several advantages to handling placement at the global level with a floorplanning technique. First, the problem size can be significantly reduced. Second, exact Half-Perimeter WireLength (HPWL) can be minimized. Third, better object distribution can be achieved so that legalization only needs to handle minor overlaps among small objects in a block. Fourth, macro-rotation and various geometry constraints can be handled. To demonstrate the effectiveness of this new flow, we implement a high-quality and efficient floorplan-guided placer called FLOP. We also construct the Modern Mixed-Size (MMS) placement benchmarks that can effectively represent the complexities of modern mixed-size designs and the challenges faced by modern mixed-size placers. Compared with most state-of-the-art mixed-size placers and leading macroplacers, experimental results show that FLOP achieves the best HPWL and easily obtains legal solutions on all circuits with all geometry constraints satisfied.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kang:2014:IRA, author = "Minseok Kang and Taewhan Kim", title = "Integrated Resource Allocation and Binding in Clock Mesh Synthesis", journal = j-TODAES, volume = "19", number = "3", pages = "30:1--30:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611762", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The clock distribution network in a synchronous digital circuit delivers a clock signal to every storage element, that is, clock sink in the circuit. However, since the continued technology scaling increases PVT (process-voltage-temperature) variation, the increase of clock-skew variation is highly likely to cause performance degradation or system failure at runtime. Recently, to mitigate the clock-skew variation, many researchers have taken a profound interest in the clock mesh network. However, though the structure of the clock mesh network is excellent in tolerating timing variations, it demands significantly high power consumption due to the use of excessive mesh wire and buffer resources. Thus, optimizing the resources required in the mesh clock synthesis while maintaining the variation tolerance is crucially important. The three major tasks that greatly affect the cost of the resulting clock mesh are: (1) mesh segment allocation, (2) mesh buffer allocation and sizing, and (3) clock sink binding to mesh segments. Previous clock mesh optimization approaches solve the three tasks sequentially, one by one at a time, to manage the runtime complexity of the tasks at the expense of losing the quality of results. However, since the three tasks are tightly interrelated, simultaneously optimizing all three tasks is essential, if the runtime is ever permitted, to synthesize an economical clock mesh network. In this work, we propose an approach that is able to tackle the problem in an integrated fashion by combining the three tasks into an iterative framework of incremental updates and solving them simultaneously to find a globally optimal allocation of mesh resources while taking into account the clock-skew tolerance constraints. The core parts of this work are a precise analysis on the relation among the resource optimization tasks and an establishment of a mechanism for effective and efficient integration of the tasks. In particular, to handle the runtime problem, we propose a set of speedup techniques, that is, modeling the RC circuit for eliminating redundant matrix multiplications, exploiting a sliding-window scheme, and quickly estimating the buffer sizing effect, which are fitted into our context of fast clock-skew estimation in mesh resource optimization as well as an invention of early decision policies. Through extensive experiments with benchmark circuits, it is shown that our proposed clock mesh synthesizer is able to reduce the worst-case clock skew, total mesh wirelength, total size of mesh driving buffers, and total clock mesh power consumption including short-circuit power by 25.0\%, 13.2\%, 10.9\%, and 11.0\% on average compared to that produced by the best-known clock mesh synthesis method (MeshWorks), respectively.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Boghrati:2014:IAP, author = "Baktash Boghrati and Sachin S. Sapatnekar", title = "Incremental Analysis of Power Grids Using Backward Random Walks", journal = j-TODAES, volume = "19", number = "3", pages = "31:1--31:??", month = jun, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2611763", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jun 21 07:58:42 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power grid design and analysis is a critical part of modern VLSI chip design and demands tools for accurate modeling and efficient analysis. The process of power grid design is inherently iterative, during which numerous small changes are made to an initial design, either to enhance the design or to fix design constraint violations. Due to the large sizes of power grids in modern chips, updating the solution for these perturbations can be a computationally intensive task. In this work, we first introduce an accurate modeling methodology for power grids that, contrary to conventional models, can result in asymmetrical equations. Next, we propose an efficient and accurate incremental solver that utilizes the backward random walks to identify the region of influence of the perturbation. The solution of the network is then updated for this significantly smaller region only. The proposed algorithm is capable of handling both symmetrical and asymmetrical power grid equations. Moreover, it can handle consecutive perturbations without any degradation in the quality of the solution. Experimental results show speedups of up to 13$ \times $ for our incremental solver, as compared to a full resolve of the power grid.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Schneider:2014:QNE, author = "Reinhard Schneider and Dip Goswami and Samarjit Chakraborty and Unmesh Bordoloi and Petru Eles and Zebo Peng", title = "Quantifying Notions of Extensibility in {FlexRay} Schedule Synthesis", journal = j-TODAES, volume = "19", number = "4", pages = "32:1--32:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2647954", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "FlexRay has now become a well-established in-vehicle communication bus at most original equipment manufacturers (OEMs) such as BMW, Audi, and GM. Given the increasing cost of verification and the high degree of crosslinking between components in automotive architectures, an incremental design process is commonly followed. In order to incorporate FlexRay-based designs in such a process, the resulting schedules must be extensible, that is: (i) when messages are added in later iterations, they must preserve deadline guarantees of already scheduled messages, and (ii) they must accommodate as many new messages as possible without changes to existing schedules. Apart from extensible scheduling having not received much attention so far, traditional metrics used for quantifying them cannot be trivially adapted to FlexRay schedules. This is because they do not exploit specific properties of the FlexRay protocol. In this article we, for the first time, introduce new notions of extensibility for FlexRay that capture all the protocol-specific properties. In particular, we focus on the dynamic segment of FlexRay and we present a number of metrics to quantify extensible schedules. Based on the introduced metrics, we propose strategies to synthesize extensible schedules and compare the results of different scheduling algorithms. We demonstrate the applicability of the results with industrial-size case studies and also show that the proposed metrics may also be visually represented, thereby allowing for easy interpretation.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pan:2014:SPM, author = "Gung-Yu Pan and Jing-Yang Jou and Bo-Cheng Lai", title = "Scalable Power Management Using Multilevel Reinforcement Learning for Multiprocessors", journal = j-TODAES, volume = "19", number = "4", pages = "33:1--33:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2629486", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Dynamic power management has become an imperative design factor to attain the energy efficiency in modern systems. Among various power management schemes, learning-based policies that are adaptive to different environments and applications have demonstrated superior performance to other approaches. However, they suffer the scalability problem for multiprocessors due to the increasing number of cores in a system. In this article, we propose a scalable and effective online policy called MultiLevel Reinforcement Learning (MLRL). By exploiting the hierarchical paradigm, the time complexity of MLRL is O ( n lg n ) for n cores and the convergence rate is greatly raised by compressing redundant searching space. Some advanced techniques, such as the function approximation and the action selection scheme, are included to enhance the generality and stability of the proposed policy. By simulating on the SPLASH-2 benchmarks, MLRL runs 53\% faster and outperforms the state-of-the-art work with 13.6\% energy saving and 2.7\% latency penalty on average. The generality and the scalability of MLRL are also validated through extensive simulations.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2014:WLL, author = "Yoon Seok Yang and Reeshav Kumar and Gwan Choi and Paul V. Gratz", title = "{WaveSync}: Low-Latency Source-Synchronous Bypass Network-on-Chip Architecture", journal = j-TODAES, volume = "19", number = "4", pages = "34:1--34:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2647950", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "WaveSync is a network-on-chip architecture for a globally asynchronous locally-synchronous (GALS) design. The WaveSync design facilitates low-latency communication leveraging the source-synchronous clock sent along with the data to time components in the datapath of a downstream router, reducing the number of synchronizations needed. WaveSync accomplishes this by partitioning the router components at each node into different clock domains, each synchronized with one of the orthogonal incoming source-synchronous clocks in a GALS 2D mesh network. The data and clock subsequently propagate through each node/router synchronously until the destination is reached, regardless of the number of hops this may take. As long as the data travels in the path of clock propagation and no congestion is encountered, it will be propagated without latching as if in a long combinatorial path, with both the clock and the data accruing delay at the same rate. The result is that the need for synchronization between the mesochronous nodes and/or the asynchronous control associated with the typical GALS network is completely eliminated. To further reduce the latency overhead of synchronization, for those occasions when synchronization is still required (when a flit takes a turn or arrives at the destination), we propose a novel less-than-one-cycle synchronizer. The proposed WaveSync network outperforms conventional GALS networks by 87--90\% in average latency, synthesized using a 45nm CMOS library.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jose:2014:IAH, author = "John Jose and Madhu Mutyam", title = "Implementation and Analysis of History-Based Output Channel Selection Strategies for Adaptive Routers in Mesh {NoCs}", journal = j-TODAES, volume = "19", number = "4", pages = "35:1--35:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2647952", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The efficiency and effectiveness of an adaptive router in an NoC-based multicore system is evaluated by the performance it achieves under varying inter-core communication traffic. A well-designed selection strategy plays an important role in an adaptive router to act upon dynamic traffic variations. The effectiveness of a selection strategy depends on what metric is used to represent congestion, how precisely this metric captures the actual congestion, and how much cost is involved in capturing the congestion on a real-time scale. Congestion is formed over a period of time due to cumulative and chain reaction effects. We propose novel history-based selection strategies that could be used with any adaptive, deadlock-free, minimal routing in mesh NoCs. Buffer occupancy time and rate of flit flow across reachable ports of neighboring routers in the recent past are captured, propagated, and maintained in a cost-effective way to compute the selection metric. Experimental results on real and synthetic workloads show that our proposed selection strategies significantly outperform state-of-the-art techniques.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tsai:2014:PAE, author = "Kun-Lin Tsai and Hao-Tse Chen and Yo-An Lin", title = "Power and Area Efficiency {NoC} Router Design for Application-Specific {SoC} by Using Buffer Merging and Resource Sharing", journal = j-TODAES, volume = "19", number = "4", pages = "36:1--36:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2633604", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Network-on-Chip (NoC) is an efficient on-chip communication architecture specifically for System-on-a-Chip (SoC) design. However, the input buffers of a NoC router often take a significant portion of the silicon area and power consumption. Besides, the performance of a NoC is also greatly affected by the buffer size. In this article, a static buffer merging and resource sharing method is proposed for the application-specific SoC minimizing the NoC buffer. When given an application-specific task graph and the dataflow distribution, the proposed method statically merges rarely used buffers and generates the suitable number of input buffers for each router at design timely. The merged buffer is shared by several input directions. The experimental result shows that the buffer can be utilized more effectively after the resource sharing. Based on the synthesized design with TSMC 90nm technology, the proposed method reduces an average of 42.23\% area and 35.13\% power while providing similar performance.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hatami:2014:MSN, author = "Nadereh Hatami and Rafal Baranowski and Paolo Prinetto and Hans-Joachim Wunderlich", title = "Multilevel Simulation of Nonfunctional Properties by Piecewise Evaluation", journal = j-TODAES, volume = "19", number = "4", pages = "37:1--37:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2647955", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the technology shrinks, nonfunctional properties (NFPs) such as reliability, vulnerability, power consumption, or heat dissipation become as important as system functionality. As NFPs often influence each other, depend on the application and workload of a system, and exhibit nonlinear behavior, NFP simulation over long periods of system operation is computationally expensive, if feasible at all. This article presents a piecewise evaluation method for efficient NFP simulation. Simulation time is divided into intervals called evaluation windows, within which the NFP models are partially linearized. High-speed functional system simulation is achieved by parallel execution of models at different levels of abstraction. A trade-off between simulation speed and accuracy is met by adjusting the size of the evaluation window. As an example, the piecewise evaluation technique is applied to analyze aging caused by two mechanisms, namely Negative Bias Temperature Instability (NBTI) and Hot Carrier Injection (HCI), in order to identify reliability hotspots. Experiments show that the proposed technique yields considerable simulation speedup at a marginal loss of accuracy.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ravi:2014:HLT, author = "Srivaths Ravi and Michael Joseph", title = "High-Level Test Synthesis: a Survey from Synthesis Process Flow Perspective", journal = j-TODAES, volume = "19", number = "4", pages = "38:1--38:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2627754", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High-level test synthesis is a special class of high-level synthesis having testability as one of the important components. This article presents a detailed survey on recent developments in high-level test synthesis from a synthesis process flow perspective. It also presents a survey on controller synthesis techniques for testability.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Juan:2014:SPT, author = "Da-Cheng Juan and Siddharth Garg and Diana Marculescu", title = "Statistical Peak Temperature Prediction and Thermal Yield Improvement for {$3$D} Chip Multiprocessors", journal = j-TODAES, volume = "19", number = "4", pages = "39:1--39:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2633606", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Thermal issues have become critical roadblocks for achieving highly reliable three-dimensional (3D) integrated circuits (ICs). The presence of process variations further exacerbates these problems. In this article, we propose techniques for the efficient evaluation and mitigation of the impact of leakage power variations on the temperature profile of 3D Chip Multiprocessors (CMPs). Experimental results demonstrate that, due to the impact of process variations, a 4-tier 3D implementation can be more than 40C hotter and 23\% leakier than its 2D counterpart. To determine the maximum temperature of each fabricated 3D IC, we propose an accurate learning-based model for peak temperature prediction. Based on the learning model, we then propose two post-fabrication techniques to increase the thermal yield of 3D CMPs: (1) tier restacking and (2) thermally-aware die matching. Experimental results show that: (1) the proposed prediction model achieves more than 98\% accuracy, and (2) the proposed thermally-aware, post-fabrication optimization techniques significantly improve the thermal yield from only 51\% to 99\% for 3D CMPs.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Livramento:2014:HTD, author = "Vinicius S. Livramento and Chrystian Guth and Jos{\'e} Lu{\'\i}s G{\"u}ntzel and Marcelo O. Johann", title = "A Hybrid Technique for Discrete Gate Sizing Based on {Lagrangian} Relaxation", journal = j-TODAES, volume = "19", number = "4", pages = "40:1--40:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2647956", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Discrete gate sizing has attracted a lot of attention recently as the EDA industry faces the challenge of optimizing large standard cell-based circuits. The discrete nature of the problem, along with complex timing models, stringent design constraints, and ever-increasing circuit sizes, make the problem very difficult to tackle. Lagrangian Relaxation (LR) is an effective technique to handle complex constrained optimization problems and therefore has been successfully applied to solve the gate sizing problem. This article proposes an improved Lagrangian relaxation formulation for discrete gate sizing that relaxes timing, maximum gate input slew, and maximum gate output capacitance constraints. Based on such formulation, we propose a hybrid technique composed of three steps. First, a topological greedy heuristic solves the LR formulation. Such a heuristic is applied assuming a slightly increased target clock period (backoff factor) to better explore the solution space. Second, a delay recovery heuristic reestablishes the original target clock with small power overhead. Third, a power recovery heuristic explores the remaining slacks to further reduce power. Experiments on the ISPD 2012 Contest benchmarks show that our hybrid technique provides less leakage power than the state-of-the-art work for every circuit from the ISPD 2012 Contest infrastructure, achieving up to 24\% less leakage. In addition, our technique achieves a much better compromise between leakage reduction and runtime, obtaining, on average, 9\% less leakage power while running 8.8 times faster.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ho:2014:USS, author = "Yenpo Ho and Garng M. Huang and Peng Li", title = "Understanding {SRAM} Stability via Bifurcation Analysis: Analytical Models and Scaling Trends", journal = j-TODAES, volume = "19", number = "4", pages = "41:1--41:??", month = aug, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2647957", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Aug 25 19:03:51 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the past decades, aggressive scaling of transistor feature size has been a primary force driving higher Static Random Access Memory (SRAM) integration density. Due to technology scaling, nanometer SRAM designs become increasingly vulnerable to stability challenges. The traditional way of analyzing stability is through the use of Static Noise Margins (SNMs). SNMs are not capable of capturing the key nonlinear dynamics associated with memory operations, leading to imprecise characterization of stability. This work rigorously develops dynamic stability concepts and, more importantly, captures them in physically based analytical models. By leveraging nonlinear stability theory, we develop analytical models that characterize the minimum required amplitude and duration of injected current noises that can flip the SRAM state. These models, which are parameterized in key design, technology, and operating condition parameters, provide important design insights and offer a basis for predicting scaling trends of SRAM dynamic stability.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2014:EBT, author = "Naehyuck Chang and David Z. Pan and Yuan Xie", title = "Editorial: {{\booktitle{ACM Transactions on Design Automation of Electronics Systems}}} and Beyond", journal = j-TODAES, volume = "20", number = "1", pages = "1:1--1:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2676865", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hu:2014:GLI, author = "Wei Hu and Dejun Mu and Jason Oberg and Baolei Mao and Mohit Tiwari and Timothy Sherwood and Ryan Kastner", title = "Gate-Level Information Flow Tracking for Security Lattices", journal = j-TODAES, volume = "20", number = "1", pages = "2:1--2:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2676548", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High-assurance systems found in safety-critical infrastructures are facing steadily increasing cyber threats. These critical systems require rigorous guarantees in information flow security to prevent confidential information from leaking to an unclassified domain and the root of trust from being violated by an untrusted party. To enforce bit-tight information flow control, gate-level information flow tracking (GLIFT) has recently been proposed to precisely measure and manage all digital information flows in the underlying hardware, including implicit flows through hardware-specific timing channels. However, existing work in this realm either restricts to two-level security labels or essentially targets two-input primitive gates and several simple multilevel security lattices. This article provides a general way to expand the GLIFT method for multilevel security. Specifically, it formalizes tracking logic for an arbitrary Boolean gate under finite security lattices, presents a precise tracking logic generation method for eliminating false positives in GLIFT logic created in a constructive manner, and illustrates application scenarios of GLIFT for enforcing multilevel information flow security. Experimental results show various trade-offs in precision and performance of GLIFT logic created using different methods. It also reveals the area and performance overheads that should be expected when expanding GLIFT for multilevel security.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2014:CTS, author = "Chun-Kai Wang and Yeh-Chi Chang and Hung-Ming Chen and Ching-Yu Chin", title = "Clock Tree Synthesis Considering Slew Effect on Supply Voltage Variation", journal = j-TODAES, volume = "20", number = "1", pages = "3:1--3:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2651401", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work tackles a problem of clock power minimization within a skew constraint under supply voltage variation. This problem is defined in the ISPD 2010 benchmark. Unlike mesh and cross link that reduce clock skew uncertainty by multiple driving paths, our focus is on controlling skew uncertainty in the structure of the tree. We observe that slow slew amplifies supply voltage variation, which induces larger path delay variation and skew uncertainty. To obtain the optimality, we formulate a symmetric clock tree synthesis as a mathematical programming problem in which the slew effect is considered by an NLDM-like cell delay variation model. A symmetry-to-asymmetry tree transformation is proposed to further reduce wire loading. Experimental results show that the proposed four methods save up to 20\% of clock tree capacitance loading. Beyond controlling slew to suppress supply-voltage-variation-induced skew, we also discuss the strategies of clock tree synthesis under variant variation scenarios and the limitations of the ISPD 2010 benchmark.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2014:SIS, author = "Lingyi Liu and Shobha Vasudevan", title = "Scaling Input Stimulus Generation through Hybrid Static and Dynamic Analysis of {RTL}", journal = j-TODAES, volume = "20", number = "1", pages = "4:1--4:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2676549", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We enhance STAR, an automatic technique for functional input vector generation for design validation. STAR statically analyzes the source code of the Register-Transfer Level (RTL) design. The STAR approach is a hybrid between RTL symbolic execution and concrete simulation that offsets the disadvantages of both. The symbolic execution, which follows the concrete simulation path, extracts constraints for that path. The guard in the path constraints is then mutated and passed to an SMT solver. A satisfiable assignment generates a valid input vector. However, STAR suffers the problem of path explosion during symbolic execution. In this article, we present an explored symbolic state caching method to attack path explosion. Explored symbolic states are states starting from which all subpaths have been explored. Each explored symbolic state is stored in the form of bitmap encoding of branches to ease comparison. When the explored symbolic state is reached again in the following symbolic execution, all subpaths can be pruned. In addition, we use two types of optimizations: (a) dynamic UD chain slicing; and (b) local conflict resolution to improve the running efficiency of STAR. We demonstrate that the results of the enhanced STAR are promising in showing high coverage on benchmark RTL designs, and the runtime of the test generation process is reduced from several hours to less than 20 minutes.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sinha:2014:DGP, author = "Sharad Sinha and Thambipillai Srikanthan", title = "Dataflow Graph Partitioning for Area-Efficient High-Level Synthesis with Systems Perspective", journal = j-TODAES, volume = "20", number = "1", pages = "5:1--5:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2660769", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Area efficiency in datapath synthesis is a widely accepted goal of high-level synthesis. Applications represented by their dataflow graphs are synthesized using resource sharing principles to reduce the area. However, existing resource sharing algorithms focus on absolute area reduction and maximal resource sharing. This kind of a design approach leads to constraints on how often, in terms of number of clock cycles, a new set of input data can be fed to an application. It also leads to very large multiplexers in case of very big dataflow graphs with hundreds of nodes. An adaptive dataflow graph partitioning algorithm is proposed that partitions a graph taking into account a user-defined constraint on how often a new set of input data (generally referred to as data initiation interval) is available. At the same time, a resource sharing algorithm is applied to such partitions in order to reduce area. Multiple design points are generated for a given dataflow graph with different area and time measures to enable a designer to make decisions. We demonstrate our graph partitioning algorithm using synthetically generated large dataflow graphs and on some benchmark applications.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gange:2014:SOS, author = "Graeme Gange and Harald S{\o}ndergaard and Peter J. Stuckey", title = "Synthesizing Optimal Switching Lattices", journal = j-TODAES, volume = "20", number = "1", pages = "6:1--6:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2661632", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The use of nanoscale technologies to create electronic devices has revived interest in the use of regular structures for defining complex logic functions. One such structure is the switching lattice, a two-dimensional lattice of four-terminal switches. We show how to directly construct switching lattices of polynomial size from arbitrary logic functions; we also show how to synthesize minimal-sized lattices by translating the problem to the satisfiability problem for a restricted class of quantified Boolean formulas. The synthesis method is an anytime algorithm that uses modern SAT solving technology and dichotomic search. It improves considerably on an earlier proposal for creating switching lattices for arbitrary logic functions.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cheng:2014:ECD, author = "An-Che Cheng and Chia-Chih (Jack) Yen and Celina G. Val and Sam Bayless and Alan J. Hu and Iris Hui-Ru Jiang and Jing-Yang Jou", title = "Efficient Coverage-Driven Stimulus Generation Using Simultaneous {SAT} Solving, with Application to {SystemVerilog}", journal = j-TODAES, volume = "20", number = "1", pages = "7:1--7:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2651400", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "SystemVerilog provides powerful language constructs for verification, and one of them is the covergroup functional coverage model. This model is designed as a complement to assertion verification, that is, it has the advantage of defining cross-coverage over multiple coverage points. In this article, a coverage-driven verification (CDV) approach is formulated as a simultaneous Boolean satisfiability (SAT) problem that is based on covergroups. The coverage bins defined by the functional model are converted into Conjunction Normal Form (CNF) and then solved together by our proposed simultaneous SAT algorithm PLNSAT to generate stimuli for improving coverage. The basic PLNSAT algorithm is then extended in our second proposed algorithm GPLNSAT, which exploits additional information gleaned from the structure of SystemVerilog covergroups. Compared to generating stimuli separately, the simultaneous SAT approaches can share learned knowledge across each coverage target, thus reducing the overall solving time drastically. Experimental results on a UART circuit and the largest ITC benchmark circuits show that the proposed algorithms can achieve 10.8x speedup on average and outperform state-of-the-art techniques in most of the benchmarks.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2014:SUM, author = "Xueliang Li and Guihai Yan and Yinhe Han and Xiaowei Li", title = "{SmartCap}: Using Machine Learning for Power Adaptation of {Smartphone}'s Application Processor", journal = j-TODAES, volume = "20", number = "1", pages = "8:1--8:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2651402", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power efficiency is increasingly critical to battery-powered smartphones. Given that the using experience is most valued by the user, we propose that the power optimization should directly respect the user experience. We conduct a statistical sample survey and study the correlation among the user experience, system runtime activities, and computational performance of an application processor. We find that there exists a minimal frequency requirement, called ``saturated frequency''. Above this frequency, the device consumes more power but provides little improvements in user experience. This study motivates an intelligent self-adaptive scheme, SmartCap, that automatically identifies the most power-efficient state of the application processor. Compared to prior Linux power adaptation schemes, SmartCap can help save power from 11\% to 84\%, depending on applications, with little decline in user experience.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shih:2014:COR, author = "Wen-Li Shih and Yi-Ping You and Chung-Wen Huang and Jenq Kuen Lee", title = "Compiler Optimization for Reducing Leakage Power in Multithread {BSP} Programs", journal = j-TODAES, volume = "20", number = "1", pages = "9:1--9:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2668119", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multithread programming is widely adopted in novel embedded system applications due to its high performance and flexibility. This article addresses compiler optimization for reducing the power consumption of multithread programs. A traditional compiler employs energy management techniques that analyze component usage in control-flow graphs with a focus on single-thread programs. In this environment the leakage power can be controlled by inserting on and off instructions based on component usage information generated by flow equations. However, these methods cannot be directly extended to a multithread environment due to concurrent execution issues. This article presents a multithread power-gating framework composed of multithread power-gating analysis (MTPGA) and predicated power-gating (PPG) energy management mechanisms for reducing the leakage power when executing multithread programs on simultaneous multithreading (SMT) machines. Our multithread programming model is based on hierarchical bulk-synchronous parallel (BSP) models. Based on a multithread component analysis with dataflow equations, our MTPGA framework estimates the energy usage of multithread programs and inserts PPG operations as power controls for energy management. We performed experiments by incorporating our power optimization framework into SUIF compiler tools and by simulating the energy consumption with a post-estimated SMT simulator based on Wattch toolkits. The experimental results show that the total energy consumption of a system with PPG support and our power optimization method is reduced by an average of 10.09\% for BSP programs relative to a system without a power-gating mechanism on leakage contribution set to 30\%; and the total energy consumption is reduced by an average of 4.27\% on leakage contribution set to 10\%. The results demonstrate our mechanisms are effective in reducing the leakage energy of BSP multithread programs.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Maric:2014:HCD, author = "Bojan Maric and Jaume Abella and Francisco J. Cazorla and Mateo Valero", title = "Hybrid Cache Designs for Reliable Hybrid High and Ultra-Low Voltage Operation", journal = j-TODAES, volume = "20", number = "1", pages = "10:1--10:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2658988", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Geometry scaling of semiconductor devices enables the design of ultra-low-cost (e.g., below 1 USD) battery-powered resource-constrained ubiquitous devices for environment, urban life, and body monitoring. These sensor-based devices require high performance to react in front of infrequent particular events as well as extreme energy efficiency in order to extend battery lifetime during most of the time when low performance is required. In addition, they require real-time guarantees. The most suitable technological solution for these devices consists of using hybrid processors able to operate at: (i) high voltage to provide high performance and (ii) near-/subthreshold voltage to provide ultra-low energy consumption. However, the most efficient SRAM memories for each voltage level differ and trading off different SRAM designs is mandatory. This is particularly true for cache memories, which occupy most of the processor's area. In this article, we propose new, simple, single-Vcc-domain hybrid L1 cache architectures suitable for reliable hybrid high and ultra-low voltage operation. In particular, the cache is designed by combining heterogeneous SRAM cell types: some of the cache ways are optimized to satisfy high-performance requirements during high voltage operation, whereas the rest of the ways provide ultra-low energy consumption and reliability during near-/subthreshold voltage operation. We analyze the performance, energy, and power impact of the proposed cache designs when using them to implement L1 caches in a processor. Experimental results show that our hybrid caches can efficiently and reliably operate across a wide range of voltages, consuming little energy at near-/subthreshold voltage as well as providing high performance at high voltage without decreasing reliability levels to provide strong performance guarantees, as required for our target market.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Baek:2014:DHD, author = "Seungcheol Baek and Hyung Gyu Lee and Chrysostomos Nicopoulos and Jongman Kim", title = "Designing Hybrid {DRAM\slash PCM} Main Memory Systems Utilizing Dual-Phase Compression", journal = j-TODAES, volume = "20", number = "1", pages = "11:1--11:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2658989", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The last few years have witnessed the emergence of a promising new memory technology, namely Phase-Change Memory (PCM). Due to its inherent ability to scale deeply into the nanoscale regime and its low power consumption, PCM is increasingly viewed as an attractive alternative for the memory subsystem of future microprocessor architectures. However, PCM is marred by a duo of potentially show-stopping deficiencies, that is, poor write performance (especially when compared to the prevalent and ubiquitous DRAM technology) and limited durability. These weaknesses have urged designers to develop various supporting architectural techniques to aid and complement the operation of the PCM while mitigating its innate flaws. One promising such solution is the deployment of hybridized memory architectures that fuse DRAM and PCM, in order to combine the best attributes of each technology. In this article, we introduce a novel Dual-Phase Compression (DPC) scheme and its architectural design aimed at DRAM/PCM hybrids, which caters to the limitations of PCM technology while optimizing memory performance. The DPC technique is specifically optimized for PCM-based environments and is transparent to the operation of the remaining components of the memory subsystem. Furthermore, the proposed architecture is imbued with a multifaceted wear-leveling technique to enhance the durability and prolong the lifetime of the PCM. Extensive simulations with traces from real applications running on a full-system simulator demonstrate 20.4\% performance improvement and 46.9\% energy reduction, on average, as compared to a baseline DRAM/PCM hybrid implementation. Additionally, the multifaceted wear-leveling technique is shown to significantly prolong the lifetime of the PCM.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kuo:2014:RCS, author = "Hsien-Kai Kuo and Bo-Cheng Charles Lai and Jing-Yang Jou", title = "Reducing Contention in Shared Last-Level Cache for Throughput Processors", journal = j-TODAES, volume = "20", number = "1", pages = "12:1--12:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2676550", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Deploying the Shared Last-Level Cache (SLLC) is an effective way to alleviate the memory bottleneck in modern throughput processors, such as GPGPUs. A commonly used scheduling policy of throughput processors is to render the maximum possible thread-level parallelism. However, this greedy policy usually causes serious cache contention on the SLLC and significantly degrades the system performance. It is therefore a critical performance factor that the thread scheduling of a throughput processor performs a careful trade-off between the thread-level parallelism and cache contention. This article characterizes and analyzes the performance impact of cache contention in the SLLC of throughput processors. Based on the analyses and findings of cache contention and its performance pitfalls, this article formally formulates the aggregate working-set-size-constrained thread scheduling problem that constrains the aggregate working-set size on concurrent threads. With a proof to be NP-hard, this article has integrated a series of algorithms to minimize the cache contention and enhance the overall system performance on GPGPUs. The simulation results on NVIDIA's Fermi architecture have shown that the proposed thread scheduling scheme achieves up to 61.6\% execution time enhancement over a widely used thread clustering scheme. When compared to the state-of-the-art technique that exploits the data reuse of applications, the improvement on execution time can reach 47.4\%. Notably, the execution time improvement of the proposed thread scheduling scheme is only 2.6\% from an exhaustive searching scheme.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sinha:2014:FAI, author = "Roopak Sinha and Alain Girault and Gregor Goessler and Partha S. Roop", title = "A Formal Approach to Incremental Converter Synthesis for System-on-Chip Design", journal = j-TODAES, volume = "20", number = "1", pages = "13:1--13:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2663344", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A system-on-chip (SoC) contains numerous intellectual property blocks, or IPs. Protocol mismatches between IPs may affect the system-level functionality of the SoC. Mismatches are addressed by introducing converters to control inter-IP interactions. Current approaches towards converter generation find limited practical application as they use restrictive models, lack formal rigour, handle a small subset of commonly encountered mismatches, and/or are not scalable. We propose a formal technique for SoC design using incremental converter synthesis. The proposed formulation provides precise models for protocols and requirements, and provides a scalable algorithm that allows adding multiple components and requirements to an SoC incrementally. We prove that the technique is sound and complete. Experimental results obtained using real-life AMBA benchmarks show the scalability and wide range of mismatches handled by our approach.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Aksoy:2014:MDF, author = "Levent Aksoy and Paulo Flores and Jose Monteiro", title = "Multiplierless Design of Folded {DSP} Blocks", journal = j-TODAES, volume = "20", number = "1", pages = "14:1--14:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2663343", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article addresses the problem of minimizing the implementation cost of the time-multiplexed constant multiplication (TMCM) operation that realizes the multiplication of an input variable by a single constant selected from a set of multiple constants at a time. It presents an efficient algorithm, called orpheus, that finds a multiplierless TMCM design by sharing logic operators, namely adders, subtractors, adders/subtractors, and multiplexors (MUXes). Moreover, this article introduces folded design architectures for the digital signal processing (DSP) blocks, such as finite impulse response (FIR) filters and linear DSP transforms, and describes how these folded DSP blocks can be efficiently realized using TMCM operations optimized by orpheus. Experimental results indicate that orpheus can find better solutions than existing TMCM algorithms, yielding TMCM designs requiring less area. They also show that the folded architectures lead to alternative designs with significantly less area, but incurring an increase in latency and energy consumption, compared to the parallel architecture.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{BasiriM:2014:EHB, author = "Mohamed Asan {Basiri M.} and Noor Mahammad Sk", title = "An Efficient Hardware-Based Higher Radix Floating Point {MAC} Design", journal = j-TODAES, volume = "20", number = "1", pages = "15:1--15:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2667224", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes an effective way of implementing a multiply accumulate circuit (MAC) for high-speed floating point arithmetic operations. The real-world applications related to digital signal processing and the like demand high-performance computation with greater accuracy. In general, digital signals are represented as a sequence of signed/unsigned fixed/floating point numbers. The final result of a MAC operation can be computed by feeding the mantissa of the previous MAC result as one of the partial products to a Wallace tree multiplier or Braun multiplier. Thus, the separate accumulation circuit can be avoided by keeping the circuit depth still within the bounds of the Wallace tree multiplier, namely $ O (\log_2 n) $, or Braun multiplier, namely $ O (n) $. In this article, three kinds of floating point MACs are proposed. The experimental results show 48.54\% of improvement in worst path delay achieved by the proposed floating point MAC using a radix-2 Wallace structure compared with a conventional floating point MAC without a pipeline using a 45nm technology library. The same proposed design gives 39.92\% of improvement in worst path delay without a pipeline using a radix-4 Braun structure as compared with a conventional design. In this article, a radix-32 $ Q_{32.32}$-format-based floating point MAC is proposed using a Wallace tree/Braun multiplier. Also this article discusses the msb prediction problem and its solution in floating point arithmetic that is not available in modern fused multiply-add designs. The performance results show comparisons between the proposed floating point MAC with various floating point MAC designs for radix-2, -4, -8, and -16. The proposed design has lesser depth than a conventional floating point MAC as well as a lower area requirement than other ways of floating point MAC implementation, both with/without a pipeline.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bolchini:2014:DHE, author = "Cristiana Bolchini and Chiara Sandionigi", title = "Design of Hardened Embedded Systems on Multi-{FPGA} Platforms", journal = j-TODAES, volume = "20", number = "1", pages = "16:1--16:??", month = nov, year = "2014", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2676551", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Nov 19 11:18:40 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The aim of this article is the definition of a reliability-aware methodology for the design of embedded systems on multi-FPGA platforms. The designed system must be able to detect the occurrence of faults globally and autonomously, in order to recover or to mitigate their effects. Two categories of faults are identified, based on their impact on the device elements; (i) recoverable faults, transient problems that can be fixed without causing a lasting effect namely and (ii) nonrecoverable faults, those that cause a permanent problem, making the portion of the fabric unusable. While some aspects can be taken from previous solutions available in literature, several open issues exist. In fact, no complete design methodology handling all the peculiar issues of the considered scenario has been proposed yet, a gap we aim at filling with our work. The final system exposes reliability properties and increases its overall lifetime and availability.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lu:2015:EEB, author = "Jingwei Lu and Pengwen Chen and Chin-Chih Chang and Lu Sha and Dennis Jen-Hsin Huang and Chin-Chi Teng and Chung-Kuan Cheng", title = "{ePlace}: Electrostatics-Based Placement Using {Fast Fourier Transform} and {Nesterov}'s Method", journal = j-TODAES, volume = "20", number = "2", pages = "17:1--17:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699873", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We develop a flat, analytic, and nonlinear placement algorithm, ePlace, which is more effective, generalized, simpler, and faster than previous works. Based on the analogy between placement instance and electrostatic system, we develop a novel placement density function eDensity, which models every object as positive charge and the density cost as the potential energy of the electrostatic system. The electric potential and field distribution are coupled with density using a well-defined Poisson's equation, which is numerically solved by spectral methods based on fast Fourier transform (FFT). Instead of using the conjugate gradient (CG) nonlinear solver in previous placers, we propose to use Nesterov's method which achieves faster convergence. The efficiency bottleneck on line search is resolved by predicting the steplength using a closed-form equation of Lipschitz constant. The placement performance is validated through experiments on the ISPD 2005 and ISPD 2006 benchmark suites, where ePlace outperforms all state-of-the-art placers (Capo10.5, FastPlace3.0, RQL, MAPLE, ComPLx, BonnPlace, POLAR, APlace3, NTUPlace3, mPL6) with much shorter wirelength and shorter or comparable runtime. On average, of all the ISPD 2005 benchmarks, ePlace outperforms the leading placer BonnPlace with 2.83\% shorter wirelength and runs 3.05$ \times $ faster; and on average, of all the ISPD 2006 benchmarks, ePlace outperforms the leading placer MAPLE with 4.59\% shorter wirelength and runs 2.84$ \times $ faster.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Guo:2015:RDS, author = "Qi Guo and Tianshi Chen and Zhi-Hua Zhou and Olivier Temam and Ling Li and Depei Qian and Yunji Chen", title = "Robust Design Space Modeling", journal = j-TODAES, volume = "20", number = "2", pages = "18:1--18:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2668118", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Architectural design spaces of microprocessors are often exponentially large with respect to the pending processor parameters. To avoid simulating all configurations in the design space, machine learning and statistical techniques have been utilized to build regression models for characterizing the relationship between architectural configurations and responses (e.g., performance or power consumption). However, this article shows that the accuracy variability of many learning techniques over different design spaces and benchmarks can be significant enough to mislead the decision-making. This clearly indicates a high risk of applying techniques that work well on previous modeling tasks (each involving a design space, benchmark, and design objective) to a new task, due to which the powerful tools might be impractical. Inspired by ensemble learning in the machine learning domain, we propose a robust framework called ELSE to reduce the accuracy variability of design space modeling. Rather than employing a single learning technique as in previous investigations, ELSE employs distinct learning techniques to build multiple base regression models for each modeling task. This is not a trivial combination of different techniques (e.g., always trusting the regression model with the smallest error). Instead, ELSE carefully maintains the diversity of base regression models and constructs a metamodel from the base models that can provide accurate predictions even when the base models are far from accurate. Consequently, we are able to reduce the number of cases in which the final prediction errors are unacceptably large. Experimental results validate the robustness of ELSE: compared with the widely used artificial neural network over 52 distinct modeling tasks, ELSE reduces the accuracy variability by about 62\%. Moreover, ELSE reduces the average prediction error by 27\% and 85\% for the investigated MIPS and POWER design spaces, respectively.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Taouil:2015:YIW, author = "Mottaqiallah Taouil and Said Hamdioui and Erik Jan Marinissen", title = "Yield Improvement for {$3$D} Wafer-to-Wafer Stacked {ICs} Using Wafer Matching", journal = j-TODAES, volume = "20", number = "2", pages = "19:1--19:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699832", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Three-Dimensional Stacked IC (3D-SIC) using Through-Silicion Vias (TSVs) is an emerging technology that provides heterogeneous integration, higher performance, and lower power consumption compared to traditional ICs. Stacking 3D-SICs using Wafer-to-Wafer (W2W) has several advantages such as high stacking throughput, high TSV density, and the ability to handle thin wafers and small dies. However, it suffers from low-compound yield as the stacking of good dies on bad dies and vice versa cannot be prevented. This article investigates wafer matching as a means for yield improvement. It first defines a complete wafer matching framework consisting of different scenarios, each a combination of a matching process (defines the order of wafer selection), a matching criterion (defines whether good or bad dies are matched), wafer rotation (defines either wafers are rotated or not), and a repository type. The repository type specifies whether either the repository is filled immediately after each wafer selection (i.e., running repository) or after all wafers are matched (i.e., static repository). A mapping of prior work on the framework shows that existing research has mainly explored scenarios based on static repositories. Therefore, the article analyzes scenarios based on running repositories. Simulation results show that scenarios based on running repositories improve the compound yield with up to 13.4\% relative to random W2W stacking; the improvement strongly depends on the number of stacked dies, die yield, repository size, as well as on the used matching process. Moreover, the results reveal that scenarios based on running repositories outperform those of static repositories in terms of yield improvement at significant runtime reduction (three orders of magnitude) and lower memory complexity (from exponential to linear in terms of stack size).", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2015:CDC, author = "Naiwen Chang and Eddie Cheng and Sunyuan Hsieh", title = "Conditional Diagnosability of {Cayley} Graphs Generated by Transposition Trees under the {PMC} Model", journal = j-TODAES, volume = "20", number = "2", pages = "20:1--20:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699854", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Processor fault diagnosis has played an essential role in measuring the reliability of a multiprocessor system. The diagnosability of many well-known multiprocessor systems has been widely investigated. Conditional diagnosability is a novel measure of diagnosability by adding a further condition that any fault set cannot contain all the neighbors of every node in the system. Several known structural properties of Cayley graphs are exhibited. Based on these properties, we investigate the conditional diagnosability of Cayley graphs generated by transposition trees under the PMC model and show that it is 4n-11 for n {$>$}= 4 except for the n -dimensional star graph for which it has been shown to be 8 n -21 for n {$>$}= 5 (refer to Chang and Hsieh [2014]).", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Duan:2015:DDO, author = "Qing Duan and Jun Zeng and Krishnendu Chakrabarty and Gary Dispoto", title = "Data-Driven Optimization of Order Admission Policies in a Digital Print Factory", journal = j-TODAES, volume = "20", number = "2", pages = "21:1--21:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699836", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "On-demand digital print service is an example of a real-time embedded enterprise system. It offers mass customization and exemplifies personalized manufacturing services. Once a print order is submitted to the print factory by a client, the print service provider (PSP) needs to make a real-time decision on whether to accept or refuse this order. Based on the print factory's current capacity and the order's properties and requirements, an order is refused if its acceptance is not profitable for the PSP. The order is accepted with the most appropriate due date in order to maximize the profit that can result from this order. We have developed an automated learning-based order admission framework that can be embedded into an enterprise environment to provide real-time admission decisions for new orders. The framework consists of three classifiers: Support Vector Machine (SVM), Decision Tree (DT), and Bayesian Probabilistic Model (BPM). The classifiers are trained by history orders and used to predict completion status for new orders. A decision integration technique is implemented to combine the results of the classifiers and predict due dates. Experimental results derived using real factory data from a leading print service provider and Weka open-source software show that the order completion status prediction accuracy is significantly improved by the decision integration strategy. The proposed multiclassifier model also outperforms a standalone regression model.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2015:DES, author = "Cheng-Yen Lin and Chung-Wen Huang and Chi-Bang Kuan and Shi-Yu Huang and Jenq-Kuen Lee", title = "The Design and Experiments of a {SID}-Based Power-Aware Simulator for Embedded Multicore Systems", journal = j-TODAES, volume = "20", number = "2", pages = "22:1--22:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699834", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Embedded multicore systems are playing increasingly important roles in the design of consumer electronics. The objective of such systems is to optimize both performance and power characteristics of mobile devices. However, currently there are no power metrics supporting popular application design platforms (such as SID) that application developers use to develop their applications. This hinders the ability of application developers to optimize power consumption. In this article we present the design and experiments of a SID-based power-aware simulation framework for embedded multicore systems. The proposed power estimation flow includes two phases: IP-level power modeling and power-aware system simulation. The first phase employs PowerMixer$^{IP}$ to construct the power model for the processor IP and other major IPs, while the second phase involves a power abstract interpretation method for summarizing the simulation trace, then, with a CPE module, estimating the power consumption based on the summarized trace information and the input of IP power models. In addition, a Manager component is devised to map each digital signal processor (DSP) component to a host thread and maintain the access to shared resources. The aim is to maintain the simulation performance as the number of simulated DSP components increases. A power-profiling API is also supported that developers of embedded software can use to tune the granularity of power-profiling for a specific code section of the target application. We demonstrate via case studies and experiments how application developers can use our SID-based power simulator for optimizing the power consumption of their applications. We characterize the power consumption of DSP applications with the DSPstone benchmark and discuss how compiler optimization levels with SIMD intrinsics influence the performance and power consumption. A histogram application and an augmented-reality application based on human-face-based RMS (recognition, mining, and synthesis) application are deployed as running examples on multicore systems to demonstrate how our power simulator can be used by developers in the optimization process to illustrate different views of power dissipations of applications.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Asadinia:2015:PLP, author = "Marjan Asadinia and Mohammad Arjomand and Hamid Sarbazi Azad", title = "Prolonging Lifetime of {PCM}-Based Main Memories through On-Demand Page Pairing", journal = j-TODAES, volume = "20", number = "2", pages = "23:1--23:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699867", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With current memory scalability challenges, Phase-Change Memory (PCM) is viewed as an attractive replacement to DRAM. The preliminary concern for PCM applicability is its limited write endurance that results in fast wear-out of memory cells. Worse, process variation in the deep-nanometer regime increases the variation in cell lifetime, resulting in an early and sudden reduction in main memory capacity due to the wear-out of a few cells. Recent studies have proposed redirection or correction schemes to alleviate this problem, but all suffer poor throughput or latency. In this article, we show that one of the inefficiency sources in current schemes, even when wear-leveling algorithms are used, is the nonuniform write endurance limit incurred by process variation, that is, when some memory pages have reached their endurance limit, other pages may be far from their limit. In this line, we present a technique that aims to displace a faulty page to a healthy page. This technique, called On-Demand Page Paired PCM (OD3P, for short), when applied at page level, can improve PCM time-to-failure by 20\% on average for different multithreaded and multiprogrammed workloads while also improving IPC by 14\% on average compared to previous page-level techniques. The comparison between line-level OD3P and previous line-level techniques reveals about 2$ \times $ improvement of lifetime and performance.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2015:OAA, author = "Xing Huang and Genggeng Liu and Wenzhong Guo and Yuzhen Niu and Guolong Chen", title = "Obstacle-Avoiding Algorithm in {X}-Architecture Based on Discrete Particle Swarm Optimization for {VLSI} Design", journal = j-TODAES, volume = "20", number = "2", pages = "24:1--24:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699862", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Obstacle-avoiding Steiner minimal tree (OASMT) construction has become a focus problem in the physical design of modern very large-scale integration (VLSI) chips. In this article, an effective algorithm is presented to construct an OASMT based on X-architecturex for a given set of pins and obstacles. First, a kind of special particle swarm optimization (PSO) algorithm is proposed that successfully combines the classic genetic algorithm (GA), and greatly improves its own search capability. Second, a pretreatment strategy is put forward to deal with obstacles and pins, which can provide a fast information inquiry for the whole algorithm by generating a precomputed lookup table. Third, we present an efficient adjustment method, which enables particles to avoid all the obstacles by introducing some corner points of obstacles. Finally, an excellent refinement method is discussed to further enhance the quality of the final routing tree, which can improve the quality of the solution by 7.93\% on average. To our best knowledge, this is the first time to specially solve the single-layer obstacle-avoiding problem in X-architecture. Experimental results show that the proposed algorithm can further shorten wirelength in the presence of obstacles. And it achieves the best solution quality in a reasonable runtime among the existing algorithms.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2015:MBW, author = "Hung-Sheng Chang and Yuan-Hao Chang and Pi-Cheng Hsiu and Tei-Wei Kuo and Hsiang-Pang Li", title = "Marching-Based Wear-Leveling for {PCM}-Based Storage Systems", journal = j-TODAES, volume = "20", number = "2", pages = "25:1--25:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699831", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Improving the performance of storage systems without losing the reliability and sanity/integrity of file systems is a major issue in storage system designs. In contrast to existing storage architectures, we consider a PCM-based storage architecture to enhance the reliability of storage systems. In PCM-based storage systems, the major challenge falls on how to prevent the frequently updated (meta)data from wearing out their residing PCM cells without excessively searching and moving metadata around the PCM space and without extensively updating the index structures of file systems. In this work, we propose an adaptive wear-leveling mechanism to prevent any PCM cell from being worn out prematurely by selecting appropriate data for swapping with constant search/sort cost. Meanwhile, the concept of indirect pointers is designed in the proposed mechanism to swap data without any modification to the file system's indexes. Experiments were conducted based on well-known benchmarks and realistic workloads to evaluate the effectiveness of the proposed design, for which the results are encouraging.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2015:APB, author = "Gang Chen and Kai Huang and Christian Buckl and Alois Knoll", title = "Applying Pay-Burst-Only-Once Principle for Periodic Power Management in Hard Real-Time Pipelined Multiprocessor Systems", journal = j-TODAES, volume = "20", number = "2", pages = "26:1--26:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699865", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Pipelined computing is a promising paradigm for embedded system design. Designing a power management policy to reduce the power consumption of a pipelined system with nondeterministic workload is, however, nontrivial. In this article, we study the problem of energy minimization for coarse-grained pipelined systems under hard real-time constraints and propose new approaches based on an inverse use of the pay-burst-only-once principle. We formulate the problem by means of the resource demands of individual pipeline stages and propose two new approaches, a quadratic programming-based approach and fast heuristic, to solve the problem. In the quadratic programming approach, the problem is transformed into a standard quadratic programming with box constraint and then solved by a standard quadratic programming solver. Observing the problem is NP-hard, the fast heuristic is designed to solve the problem more efficiently. Our approach is scalable with respect to the numbers of pipeline stages. Simulation results using real-life applications are presented to demonstrate the effectiveness of our methods.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yonga:2015:ABE, author = "Franck Yonga and Michael Mefenza and Christophe Bobda", title = "{ASP}-Based Encoding Model of Architecture Synthesis for Smart Cameras in Distributed Networks", journal = j-TODAES, volume = "20", number = "2", pages = "27:1--27:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2701419", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A synthesis approach based on Answer Set Programming (ASP) for heterogeneous system-on-chips to be used in distributed camera networks is presented. In such networks, the tight resource limitations represent a major challenge for application development. Starting with a high-level description of applications, the physical constraints of the target devices, and the specification of network configuration, our goal is to produce optimal computing infrastructures made of a combination of hardware and software components for each node of the network. Optimization aims at maximizing speed while minimizing chip area and power consumption. Additionally, by performing the architecture synthesis simultaneously for all cameras in the network, we are able to minimize the overall utilization of communication resources and consequently reduce power consumption. Because of its reconfiguration capabilities, a Field Programmable Gate Array (FPGA) has been chosen as the target device, which enhances the exploration of several design alternatives. We present several realistic network scenarios to evaluate and validate the proposed synthesis approach.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2015:AIP, author = "Lok-Won Kim and Dong-U Lee and John Villasenor", title = "Automated Iterative Pipelining for {ASIC} Design", journal = j-TODAES, volume = "20", number = "2", pages = "28:1--28:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2660768", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We describe an automated pipelining approach for optimally balanced pipeline implementation that achieves low area cost as well as meeting timing requirements. Most previous automatic pipelining methods have focused on Instruction Set Architecture (ISA)-based designs and the main goal of such methods generally has been maximizing performance as measured in terms of instructions per clock (IPC). By contrast, we focus on datapath-oriented designs (e.g., DSP filters for image or communication processing applications) in ASIC design flows. The goal of the proposed pipelining approach is to find the optimally pipelined design that not only meets the user-specified target clock frequency, but also seeks to minimize area cost of a given design. Unlike most previous approaches, the proposed methods incorporate the use of accurate area and timing information (iteratively achieved by synthesizing every interim pipelined design) to achieve higher accuracy during design exploration. When compared with exhaustive design exploration that considers all possible pipeline patterns, the two heuristic pipelining methods presented here involve only a small area penalty (typically under 5\%) while offering dramatically reduced computational complexity. Experimental validation is performed with commercial ASIC design tools and described for applications including polynomial function evaluation, FIR filters, matrix multiplication, and discrete wavelet transform filter designs with a 90nm standard cell library.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2015:GDU, author = "Irith Pomeranz", title = "A Generalized Definition of Unnecessary Test Vectors in Functional Test Sequences", journal = j-TODAES, volume = "20", number = "2", pages = "29:1--29:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699853", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A class of static test compaction procedures for functional test sequences is based on the omission of unnecessary test vectors. According to the definition used by these procedures, a test vector is unnecessary if all the target faults continue to be detected after it is omitted. This article introduces a more general definition of unnecessary test vectors that allows additional ones to be omitted. According to this definition, a test vector is unnecessary if every target fault can be detected by a sequence that is obtained after omitting the vector, and possibly other vectors. The article develops a procedure for omitting test vectors based on this definition and discusses its effects on the storage requirements and test application time.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Baranowski:2015:RSN, author = "Rafal Baranowski and Michael A. Kochte and Hans-Joachim Wunderlich", title = "Reconfigurable Scan Networks: Modeling, Verification, and Optimal Pattern Generation", journal = j-TODAES, volume = "20", number = "2", pages = "30:1--30:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699863", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Efficient access to on-chip instrumentation is a key requirement for post-silicon validation, test, debug, bringup, and diagnosis. Reconfigurable scan networks, as proposed by, for example, IEEE Std 1687-2014 and IEEE Std 1149.1-2013, emerge as an effective and affordable means to cope with the increasing complexity of on-chip infrastructure. Reconfigurable scan networks are often hierarchical and may have complex structural and functional dependencies. Common approaches for scan verification based on static structural analysis and functional simulation are not sufficient to ensure correct operation of these types of architectures. To access an instrument in a reconfigurable scan network, a scan-in bit sequence must be generated according to the current state and structure of the network. Due to sequential and combinational dependencies, the access pattern generation process ( pattern retargeting ) poses a complex decision and optimization problem. This article presents the first generalized formal model that considers structural and functional dependencies of reconfigurable scan networks and is directly applicable to 1687-2014-based and 1149.1-2013-based scan architectures. This model enables efficient formal verification of complex scan networks, as well as automatic generation of access patterns. The proposed pattern generation method supports concurrent access to multiple target scan registers ( access merging ) and generates short scan-in sequences.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Beznia:2015:TAR, author = "Kamel Beznia and Ahcene Bounceur and Reinhardt Euler and Salvador Mir", title = "A Tool for Analog\slash {RF BIST} Evaluation Using Statistical Models of Circuit Parameters", journal = j-TODAES, volume = "20", number = "2", pages = "31:1--31:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699837", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Testing analog integrated circuits is expensive in terms of both test equipment and time. To reduce the cost, Design-For-Test techniques (DFT) such as Built-In Self-Test (BIST) have been developed. For a given Circuit Under Test (CUT), the choice of a suitable technique should be made at the design stage as a result of the analysis of test metrics such as test escapes and yield loss. However, it is very hard to carry out this estimation for analog/RF circuits by using fault simulation techniques. Instead, the estimation of parametric test metrics is made possible by Monte Carlo circuit-level simulations and the construction of statistical models. These models represent the output parameter space of the CUT in which the test metrics are defined. In addition, models of the input parameter space may be required to accelerate the simulations and obtain higher confidence in the DFT choices. In this work, we describe a methodological flow for the selection of most adequate statistical models and several techniques that can be used for obtaining these models. Some of these techniques have been integrated into a Computer-Aided Test (CAT) tool for the automation of the process of test metrics estimation. This estimation is illustrated for the case of a BIST solution for CMOS imager pixels that requires the use of advanced statistical modeling techniques.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gupte:2015:FAT, author = "Adwait Gupte and Sudhanshu Vyas and Phillip H. Jones", title = "A Fault-Aware Toolchain Approach for {FPGA} Fault Tolerance", journal = j-TODAES, volume = "20", number = "2", pages = "32:1--32:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699838", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the size and density of silicon chips continue to increase, maintaining acceptable manufacturing yields has become increasingly difficult. Recent works suggest that lithography techniques are reaching their limits with respect to enabling high yield fabrication of small-scale devices, thus there is an increasing need for techniques that can tolerate fabrication time defects. One candidate technology to help combat these defects is reconfigurable hardware. The flexible nature of reconfigurable devices, such as Field Programmable Gate Arrays (FPGAs), makes it possible for them to route around defective areas of a chip after the device has been packaged and deployed into the field. This work presents a technique that aims to increase the effective yield of FPGA manufacturing by re-claiming a portion of chips that would be ordinarily classified as unusable. In brief, we propose a modification to existing commercial toolchain flows to make them fault aware. A phase is added to identify faults within the chip. The locations of these faults are then used by the toolchain to avoid faults during the placement and routing phase. Specifically, we have applied our approach to the Xilinx commercial toolchain flow and evaluated its tolerance to both logic and routing resource faults. Our findings show that, at a cost of 5--10\% in device frequency performance, the modified toolchain flow can tolerate up to 30\% of logic resources being faulty and, depending on the nature of the target application, can tolerate 1--30\% of the device's routing resources being faulty. These results provide strong evidence that commercial toolchains not designed for the purpose of tolerating faults can still be greatly leveraged in the presence of faults to place and route circuits in an efficient manner.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2015:RBA, author = "Jiliang Zhang and Yaping Lin and Gang Qu", title = "Reconfigurable Binding against {FPGA} Replay Attacks", journal = j-TODAES, volume = "20", number = "2", pages = "33:1--33:??", month = feb, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699833", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 3 14:46:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The FPGA replay attack, where an attacker downgrades an FPGA-based system to the previous version with known vulnerabilities, has become a serious security and privacy concern for FPGA design. Current FPGA intellectual property (IP) protection mechanisms target the protection of FPGA configuration bitstreams by watermarking or encryption or binding. However, these mechanisms fail to prevent replay attacks. In this article, based on a recently reported PUF-FSM binding method that protects the usage of configuration bitstreams, we propose to reconfigure both the physical unclonable functions (PUFs) and the locking scheme of the finite state machine (FSM) in order to defeat the replay attack. We analyze the proposed scheme and demonstrate how replay attack would fail in attacking systems protected by the reconfigurable binding method. We implement two ways to build reconfigurable PUFs and propose two practical methods to reconfigure the locking scheme. Experimental results show that the two reconfigurable PUFs can generate significantly distinct responses with average reconfigurability of more than 40\%. The reconfigurable locking schemes only incur a timing overhead less than 1\%.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Srivastav:2015:DUL, author = "Meeta Srivastav and Mohammed Ehteshamuddin and Kyle Stegner and Leyla Nazhandali", title = "Design of Ultra-Low Power Scalable-Throughput Many-Core {DSP} Applications", journal = j-TODAES, volume = "20", number = "3", pages = "34:1--34:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2720018", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a system-level solution in designing process variation aware (PVA) scalable-throughput many-core systems for energy constrained applications. In our proposed methodology, we leverage the benefits of voltage scaling for obtaining energy efficiency while compensating for the loss in throughput by exploiting parallelism present in various DSP designs. We demonstrate that such a hybrid method consumes 6.27\%--28.15\% less power as compared to simple dynamic voltage scaling over different workload environments. Design details of a prototype chip fabricated on 90 nm technology node and its findings are presented. Chip consists of 8 homogeneous FIR cores, which are capable of running from near-threshold to nominal voltages. In our 20 chip population, we observe 7\% variation in speed among the cores at nominal voltage (0.9V) and 26\% at near threshold voltage (0.55V). We also observe 54\% variation in power consumption of the cores. For any desired throughput, the optimum number of cores and their optimum operating voltage is chosen based on the speed and power characteristics of the cores present inside the chip. We will also present analysis on energy-efficiency of such systems based on changes in ambient temperature.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jafari:2015:LUD, author = "Fahimeh Jafari and Zhonghai Lu and Axel Jantsch", title = "Least Upper Delay Bound for {VBR} Flows in Networks-on-Chip with Virtual Channels", journal = j-TODAES, volume = "20", number = "3", pages = "35:1--35:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2733374", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Real-time applications such as multimedia and gaming require stringent performance guarantees, usually enforced by a tight upper bound on the maximum end-to-end delay. For FIFO multiplexed on-chip packet switched networks we consider worst-case delay bounds for Variable Bit-Rate (VBR) flows with aggregate scheduling, which schedules multiple flows as an aggregate flow. VBR Flows are characterized by a maximum transfer size ($L$), peak rate ($p$), burstiness ($ \sigma $), and average sustainable rate ($ \rho $). Based on network calculus, we present and prove theorems to derive per-flow end-to-end Equivalent Service Curves (ESC), which are in turn used for computing Least Upper Delay Bounds (LUDBs) of individual flows. In a realistic case study we find that the end-to-end delay bound is up to 46.9\% more accurate than the case without considering the traffic peak behavior. Likewise, results also show similar improvements for synthetic traffic patterns. The proposed methodology is implemented in C++ and has low run-time complexity, enabling quick evaluation for large and complex SoCs.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bombieri:2015:MRR, author = "Nicola Bombieri and Franco Fummi and Sara Vinco", title = "A Methodology to Recover {RTL IP} Functionality for Automatic Generation of {SW} Applications", journal = j-TODAES, volume = "20", number = "3", pages = "36:1--36:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2720019", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the advent of heterogeneous multiprocessor system-on-chips (MPSoCs), hardware/software partitioning is again on the rise both in research and in product development. In this new scenario, implementing intellectual-property (IP) blocks as SW applications rather than dedicated HW is an increasing trend to fully exploit the computation power provided by the MPSoC CPUs. On the other hand, whole libraries of IP blocks are available as RTL descriptions, most of them without a corresponding high-level SW implementation. In this context, this article presents a methodology to automatically generate SW applications in C++, by starting from existing RTL IPs implemented in hardware description language (HDL). The methodology exploits an abstraction algorithm to eliminate implementation details typical of HW descriptions (such as cycle-accurate functionality and data types) to guarantee relevant performance of the generated code. The experimental results show that, in many cases, the C++ code automatically generated in a few seconds with the proposed methodology is as efficient as the corresponding code manually implemented from scratch.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Holst:2015:HTL, author = "Stefan Holst and Michael E. Imhof and Hans-Joachim Wunderlich", title = "High-Throughput Logic Timing Simulation on {GPGPUs}", journal = j-TODAES, volume = "20", number = "3", pages = "37:1--37:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2714564", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Many EDA tasks such as test set characterization or the precise estimation of power consumption, power droop and temperature development, require a very large number of time-aware gate-level logic simulations. Until now, such characterizations have been feasible only for rather small designs or with reduced precision due to the high computational demands. The new simulation system presented here is able to accelerate such tasks by more than two orders of magnitude and provides for the first time fast and comprehensive timing simulations for industrial-sized designs. Hazards, pulse-filtering, and pin-to-pin delay are supported for the first time in a GPGPU accelerated simulator, and the system can easily be extended to even more realistic delay models and further applications. A sophisticated mapping with efficient memory utilization and access patterns as well as minimal synchronizations and control flow divergence is able to use the full potential of GPGPU architectures. To provide such a mapping, we combine for the first time the versatility of event-based timing simulation and multi-dimensional parallelism used in GPU-based gate-level simulators. The result is a throughput-optimized timing simulation algorithm, which runs many simulation instances in parallel and at the same time fully exploits gate-parallelism within the circuit.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xu:2015:DCD, author = "Tong Xu and Peng Li and Savithri Sundareswaran", title = "Decoupling Capacitance Design Strategies for Power Delivery Networks with Power Gating", journal = j-TODAES, volume = "20", number = "3", pages = "38:1--38:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2700825", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power gating is a widely used leakage power saving strategy in modern chip designs. However, power gating introduces unique power integrity issues and trade-offs between switching and rush current (wake-up) supply noises. At the same time, the amount of power saving intrinsically trades off with power integrity. In addition, these trade-offs significantly vary with supply voltage. In this article, we propose systemic decoupling capacitors (decaps) optimization strategies that optimally trade-off between power integrity and leakage saving. Specially, new global decap and reroutable decap design concepts are proposed to relax the tight interaction between power integrity and leakage saving of power gated PDNs with a single supply voltage level. Furthermore, we propose a flexible decap allocation technique to deal with the design trade-offs under multiple supply voltage levels. The proposed strategies are implemented in an automatic design flow for choosing the optimal amount of local decaps, global decaps and reroutable decaps. The conducted experiments demonstrate that leakage saving can be increased significantly compared with the conventional PDN design approach with a single supply voltage level using the proposed techniques without jeopardizing power integrity. For PDN designs operating at two supply voltage levels, the optimal performance is achieved at each voltage level.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Firouzi:2015:AVA, author = "Farshad Firouzi and Fangming Ye and Krishnendu Chakrabarty and Mehdi B. Tahoori", title = "Aging- and Variation-Aware Delay Monitoring Using Representative Critical Path Selection", journal = j-TODAES, volume = "20", number = "3", pages = "39:1--39:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2746237", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Process together with runtime variations in temperature and voltage, as well as transistor aging, degrade path delay and may eventually induce circuit failure due to timing variations. Therefore, in-field tracking of path delays is essential, and to respond to this need, several delay sensor designs have been proposed in the literature. However, due to the significant overhead of these sensors and the large number of critical paths in today's IC, it is infeasible to monitor the delay of every critical path in silicon. We present an aging- and variation-aware representative path selection technique based on machine learning that allows to measure the delay of a small set of paths and infer the delay of a larger pool of paths that are likely to fail due to delay variations. Simulation results for benchmark circuits highlight the accuracy of the proposed approach for predicting critical-path delay based on the selected representative paths.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Park:2015:SGA, author = "Heejong Park and Avinash Malik and Zoran Salcic", title = "Scheduling Globally Asynchronous Locally Synchronous Programs for Guaranteed Response Times", journal = j-TODAES, volume = "20", number = "3", pages = "40:1--40:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2740961", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Safety-critical software systems need to guarantee functional correctness and bounded response times to external input events. Programs designed using reactive programming languages, based on formal mathematical semantics, can be automatically verified for functional correctness guarantees. Real-time guarantees on the other hand are much harder to achieve. In this article we provide a static analysis framework for guaranteeing response times for reactive programs developed using the Globally Asynchronous Locally Synchronous (GALS) model of computation. The proposed approach is applicable to scheduling of GALS programs for different target architectures with single or multiple processors or cores. A Satisfiability Modulo Theory (SMT) formulation in the quantifier free linear real arithmetic (QF\_LRA) logic is used for scheduling. A novel technique to encode rendezvous used in synchronization of globally asynchronous processes in the presence of locally synchronous parallelism and arbitrary preemption into QF\_LRA logic is presented. Finally, our SMT formulation is shown to produce schedules in reasonable time.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yi:2015:ESF, author = "Qiuping Yi and Zijiang Yang and Jian Liu and Chen Zhao and Chao Wang", title = "Explaining Software Failures by Cascade Fault Localization", journal = j-TODAES, volume = "20", number = "3", pages = "41:1--41:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2738038", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "During software debugging, a significant amount of effort is required for programmers to identify the root cause of a manifested failure. In this article, we propose a cascade fault localization method to help speed up this labor-intensive process via a combination of weakest precondition computation and constraint solving. Our approach produces a cause tree, where each node is a potential cause of the failure and each edge represents a casual relationship between two causes. There are two main contributions of this article that differentiate our approach from existing methods. First, our method systematically computes all potential causes of a failure and augments each cause with a proper context for ease of comprehension by the user. Second, our method organizes the potential causes in a tree structure to enable on-the-fly pruning based on domain knowledge and feedback from the user. We have implemented our new method in a software tool called CaFL, which builds upon the LLVM compiler and KLEE symbolic virtual machine. We have conducted experiments on a large set of public benchmarks, including real applications from GNU Coreutils and Busybox. Our results show that in most cases the user has to examine only a small fraction of the execution trace before identifying the root cause of the failure.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2015:SLO, author = "Jong Chul Lee and Roman Lysecky", title = "System-Level Observation Framework for Non-Intrusive Runtime Monitoring of Embedded Systems", journal = j-TODAES, volume = "20", number = "3", pages = "42:1--42:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2717310", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the complexity of embedded systems rapidly increases, the use of traditional analysis and debug methods encounters significant challenges in monitoring, analyzing, and debugging the complex interactions of various software and hardware components. This situation is further exacerbated for in-situ debugging and verification in which traditional debug and trace interfaces that require physical access are unavailable, infeasible, or cost prohibitive. In this article, we present a system-level observation framework that provides minimally intrusive methods for dynamically monitoring and analyzing deeply integrated hardware and software components within embedded systems. The system-level observation framework monitors hardware and software events by inserting additional logic for detecting designer-specified events within hardware cores to observe complex interaction across hardware and software boundaries at runtime, and provides visibility for monitoring complex execution behavior of software applications without affecting the system execution.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2015:LRR, author = "Qi Zhang and Xuandong Li and Linzhang Wang and Tian Zhang and Yi Wang and Zili Shao", title = "{Lazy-RTGC}: a Real-Time Lazy Garbage Collection Mechanism with Jointly Optimizing Average and Worst Performance for {NAND} Flash Memory Storage Systems", journal = j-TODAES, volume = "20", number = "3", pages = "43:1--43:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2746236", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to many attractive and unique properties, NAND flash memory has been widely adopted in mission-critical hard real-time systems and some soft real-time systems. However, the nondeterministic garbage collection operation in NAND flash memory makes it difficult to predict the system response time of each data request. This article presents Lazy-RTGC, a real-time lazy garbage collection mechanism for NAND flash memory storage systems. Lazy-RTGC adopts two design optimization techniques: on-demand page-level address mappings, and partial garbage collection. On-demand page-level address mappings can achieve high performance of address translation and can effectively manage the flash space with the minimum RAM cost. On the other hand, partial garbage collection can provide the guaranteed system response time. By adopting these techniques, Lazy-RTGC jointly optimizes both the average and the worst system response time, and provides a lower bound of reclaimed free space. Lazy-RTGC is implemented in FlashSim and compared with representative real-time NAND flash memory management schemes. Experimental results show that our technique can significantly improve both the average and worst system performance with very low extra flash-space requirements.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sharma:2015:AIE, author = "Namita Sharma and Preeti Ranjan Panda and Francky Catthoor and Praveen Raghavan and Tom {Vander Aa}", title = "Array Interleaving --- An Energy-Efficient Data Layout Transformation", journal = j-TODAES, volume = "20", number = "3", pages = "44:1--44:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2747875", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Optimizations related to memory accesses and data storage make a significant difference to the performance and energy of a wide range of data-intensive applications. These techniques need to evolve with modern architectures supporting wide memory accesses. We investigate array interleaving, a data layout transformation technique that achieves energy efficiency by combining the storage of data elements from multiple arrays in contiguous locations, in an attempt to exploit spatial locality. The transformation reduces the number of memory accesses by loading the right set of data into vector registers, thereby minimizing redundant memory fetches. We perform a global analysis of array accesses, and account for possibly different array behavior in different loop nests that might ultimately lead to changes in data layout decisions for the same array across program regions. Our technique relies on detailed estimates of the savings due to interleaving, and also the cost of performing the actual data layout modifications. We also account for the vector register widths and the possibility of choosing the appropriate granularity for interleaving. Experiments on several benchmarks show a 6--34\% reduction in memory energy due to the strategy.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Roy:2015:LAM, author = "Sudip Roy and Partha P. Chakrabarti and Srijan Kumar and Krishnendu Chakrabarty and Bhargab B. Bhattacharya", title = "Layout-Aware Mixture Preparation of Biochemical Fluids on Application-Specific Digital Microfluidic Biochips", journal = j-TODAES, volume = "20", number = "3", pages = "45:1--45:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2714562", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The recent proliferation of digital microfluidic (DMF) biochips has enabled rapid on-chip implementation of many biochemical laboratory assays or protocols. Sample preprocessing, which includes dilution and mixing of reagents, plays an important role in the preparation of assays. The automation of sample preparation on a digital microfluidic platform often mandates the execution of a mixing algorithm, which determines a sequence of droplet mix-split steps (usually represented as a mixing graph). However, the overall cost and performance of on-chip mixture preparation not only depends on the mixing graph but also on the resource allocation and scheduling strategy, for instance, the placement of boundary reservoirs or dispensers, mixer modules, storage units, and physical design of droplet-routing pathways. In this article, we first present a new mixing algorithm based on a number-partitioning technique that determines a layout-aware mixing tree corresponding to a given target ratio of a number of fluids. The mixing graph produced by the proposed method can be implemented on a chip with a fewer number of crossovers among droplet-routing paths as well as with a reduced reservoir-to-mixer transportation distance. Second, we propose a routing-aware resource-allocation scheme that can be used to improve the performance of a given mixing algorithm on a chip layout. The design methodology is evaluated on various test cases to demonstrate its effectiveness in mixture preparation with the help of two representative mixing algorithms. Simulation results show that on average, the proposed scheme can reduce the number of crossovers among droplet-routing paths by 89.7\% when used in conjunction with the new mixing algorithm, and by 75.4\% when an earlier algorithm [Thies et al. 2008] is used.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Suresh:2015:AGU, author = "Chandra K. H. Suresh and Sule Ozev and Ozgur Sinanoglu", title = "Adaptive Generation of Unique {IDs} for Digital Chips through Analog Excitation", journal = j-TODAES, volume = "20", number = "3", pages = "46:1--46:??", month = jun, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2732408", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Aug 7 08:47:44 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Globalization of the integrated circuit design and manufacturing flow has successfully ameliorated design complexity and fabrication cost challenges, and helped deliver cost-effective products while meeting stringent time-to-market requirements. On the flip side, it has resulted in various forms of security vulnerabilities in the supply chain that involves designers, fabs, test facilities, and distributors until the end-product reaches customers. One of the biggest threats to semiconductor industry today is the entry of aged, reject, or cloned parts, that is, counterfeit chips, into the supply chain, leading to annual revenue losses in the order of billions of dollars. While traceability of chips between trusted parties can help monitor the supply chain at various points in the flow, existing solutions are in the form of integrating costly hardware units on chip, or utilizing easy-to-circumvent inspection-based detection techniques. In this article, we propose a technique for adaptive unique ID generation that leverages process variations, enabling chip traceability. The proposed method stimulates digital chips with an analog signal from the supply lines, which serve as primary inputs to each gate in the signal path. Using a sinusoidal signal that exercises the transistors as gain components, we create a chip-specific response that can be post-processed into a digital ID. The proposed technique enables quick and cost-effective authenticity validation that requires no on-chip hardware support. Our simulation and experimentation on actual chips show that the proposed technique is capable of generating unique IDs even in the presence of environmental noise.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2015:MBF, author = "Hai-Bao Chen and Ying-Chi Li and Sheldon X.-D. Tan and Xin Huang and Hai Wang and Ngai Wong", title = "{$H$}-Matrix-Based Finite-Element-Based Thermal Analysis for {$3$D} {ICs}", journal = j-TODAES, volume = "20", number = "4", pages = "47:1--47:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2714563", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose an efficient finite-element-based (FE-based) method for both steady and transient thermal analyses of high-performance integrated circuits based on the hierarchical matrix ( H -matrix) representation. H -matrix has been shown to provide a data-sparse way to approximate the matrices and their inverses with almost linear-space and time complexities. In this work, we apply the H -matrix concept for solving heating diffusion problems modeled by parabolic partial differential equations (PDEs) based on the finite element method. We show that the matrix from a FE-based steady and transient thermal analysis can be represented by H -matrix without any approximation, and its inverse and Cholesky factors can be evaluated by H -matrix with controlled accuracy. We then show and prove that the memory and time complexities of the solver are bounded by O ( k$_1$ N log N ) and O ( k$_1^2$ N log$^2$ N ), respectively, where k$_1$ is a small quantity determined by accuracy requirements and N is the number of unknowns in the system. The comparison with existing product-quality LU solvers, CSPARSE and UMFPACK, on a number of 3D IC thermal matrices, shows that the new method is much more memory efficient than these methods, which however prevents CPU time comparison with those methods on large examples. But the proposed method can solve all the given thermal circuits with decent scalabilities, which shows good agreement with the predicted theoretical results.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Heyse:2015:TTM, author = "Karel Heyse and Brahim {Al Farisi} and Karel Bruneel and Dirk Stroobandt", title = "{TCONMAP}: Technology Mapping for Parameterised {FPGA} Configurations", journal = j-TODAES, volume = "20", number = "4", pages = "48:1--48:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2751558", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Parameterised configurations are FPGA configuration bitstreams in which the bits are defined as functions of user-defined parameters. From a parameterised configuration, it is possible to quickly and efficiently derive specialised, regular configuration bitstreams by evaluating these functions. The specialised bitstreams have different properties and functionality depending on the chosen values of the parameters. The most important application of parameterised configurations is the generation of specialised configuration bitstreams for Dynamic Circuit Specialisation, a technique for optimising circuits at runtime using partial reconfiguration of the FPGA. Generating and using parameterised configurations requires a new FPGA tool flow. In this article, we present a new technology mapping algorithm for parameterised designs, called TCONMAP, that can be used to produce parameterised configurations in which both the configuration of the logic blocks and routing is a function of the parameters. In our experiments, we demonstrate that in using TCONMAP, the depth and area of the mapped circuit is close to the minimal depth and area attainable. Both Dynamic Circuit Specialisation and fine-grained modular reconfiguration are extracted by TCONMAP from the HDL description of the design requiring only simple parameter annotations.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Peter:2015:CBS, author = "Steffen Peter and Tony Givargis", title = "Component-Based Synthesis of Embedded Systems Using Satisfiability Modulo Theories", journal = j-TODAES, volume = "20", number = "4", pages = "49:1--49:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2746235", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Constraint programming solvers, such as Satisfiability Modulo Theory (SMT) solvers, are capable tools in finding preferable configurations for embedded systems from large design spaces. However, constructing SMT constraint programs is not trivial, in particular for complex systems that exhibit multiple viewpoints and models. In this article we propose CoDeL: a component-based description language that allows system designers to express components as reusable building blocks of the system with their parameterizable properties, models, and interconnectivity. Systems are synthesized by allocating, connecting, and parameterizing the components to satisfy the requirements of an application. We present an algorithm that transforms component-based design spaces, expressible in CoDeL, to an SMT program, which, solved by state-of-the-art SMT solvers, determines the satisfiability of the synthesis problem, and delivers a correct-by-construction system configuration. Evaluation results for use cases in the domain of scheduling and mapping of distributed real-time processes confirm, first, the performance gain of SMT compared to traditional design space exploration approaches, second, the usability gains by expressing design problems in CoDeL, and third, the capability of the CoDeL/SMT approach to support the design of embedded systems.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mirtar:2015:AAA, author = "Ali Mirtar and Sujit Dey and Anand Raghunathan", title = "An Application Adaptation Approach to Mitigate the Impact of Dynamic Thermal Management on Video Encoding", journal = j-TODAES, volume = "20", number = "4", pages = "50:1--50:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2753758", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to limitations of cooling methods such as using fan and heat sink, dynamic thermal management (DTM) is being widely adopted to manage the temperature of computing systems. However, application of DTM can reduce the system performance and thereby affect the quality of real-time applications. Real-time video encoding, which has high computational need and hard deadlines, is a commonly used application that can be severely affected by the usage of DTM. We study the effect of DTM on a widely used H.264 video encoder and formulate a multidimensional optimization problem to maximize video quality and minimize bit rate while ensuring that the video encoder can run in real time in spite of DTM effects. We model the effects of adapting encoding parameters on video quality, bit rate, and encoder speed. We propose a dynamic application adaptation method to efficiently solve the optimization problem by optimally adapting the encoding parameters in response to DTM effects. In addition, we show that the proposed dynamic application adaptation method would reduce the need for cooling methods such as forced convection cooling. We implement the proposed approach on an Intel\reg CoreTM 2 Duo platform where dynamic voltage and frequency scaling (DVFS) is used for DTM. Our measurements with several videos reveal that when DTM is applied, the video quality is affected significantly. However, using the proposed adaptation algorithm, the encoder can run in real time, and the quality loss is minimized with only a marginal increase in the bit rate.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2015:VPI, author = "Da-Wei Chang and Hsin-Hung Chen and Wei-Jian Su", title = "{VSSD}: Performance Isolation in a Solid-State Drive", journal = j-TODAES, volume = "20", number = "4", pages = "51:1--51:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2755560", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Performance isolation is critical in shared storage systems, a popular storage solution. In a shared storage system, interference between requests from different users can affect the accuracy of I/O cost accounting, resulting in poor performance isolation. Recently, NAND flash-memory-based solid-state drives (SSDs) have been increasingly used in shared storage systems. However, interference in SSD-based shared storage systems has not been addressed. In this article, two types of interference, namely, queuing delay (QD) interference and garbage collection (GC) interference, are identified in a shared SSD. Additionally, a framework called VSSD is proposed to address these types of interference. VSSD is composed of two components: the FACO credit-based I/O scheduler designed to address QD interference and the ViSA flash translation layer designed to address GC interference. The VSSD framework aims to be implemented in the firmware running on an SSD controller. With VSSD, interference in an SSD can be eliminated and performance isolation can be ensured. Both synthetic and application workloads are used to evaluate the effectiveness of the proposed VSSD framework. The performance results show the following. First, QD and GC interference exists and can result in poor performance isolation between users on SSD-based shared storage systems. Second, VSSD is effective in eliminating the interference and achieving performance isolation between users. Third, the overhead of VSSD is insignificant.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Duan:2015:AAP, author = "Qing Duan and Abhishek Koneru and Jun Zeng and Krishnendu Chakrabarty and Gary Dispoto", title = "Accurate Analysis and Prediction of Enterprise Service-Level Performance", journal = j-TODAES, volume = "20", number = "4", pages = "52:1--52:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2757279", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "An enterprise service-level performance time series is a sequence of data points that quantify demand, throughput, average order-delivery time, quality of service, or end-to-end cost. Analytical and predictive models of such time series can be embedded into an enterprise information system (EIS) in order to provide meaningful insights into potential business problems and generate guidance for appropriate solutions. Time-series analysis includes periodicity detection, decomposition, and correlation analysis. Time-series prediction can be modeled as a regression problem to forecast a sequence of future time-series datapoints based on the given time series. The state-of-the-art (baseline) methods employed in time-series prediction generally apply advanced machine-learning algorithms. In this article, we propose a new univariate method for dealing with midterm time-series prediction. The proposed method first analyzes the hierarchical periodic structure in one time series and decomposes it into trend, season, and noise components. By discarding the noise component, the proposed method only focuses on predicting repetitive season and smoothed trend components. As a result, this method significantly improves upon the performance of baseline methods in midterm time-series prediction. Moreover, we propose a new multivariate method for dealing with short-term time-series prediction. The proposed method utilizes cross-correlation information derived from multiple time series. The amount of data taken from each time series for training the regression model is determined by results from hierarchical cross-correlation analysis. Such a data-filtering strategy leads to improved algorithm efficiency and prediction accuracy. By combining statistical methods with advanced machine-learning algorithms, we have achieved a significantly superior performance in both short-term and midterm time-series predictions compared to state-of-the-art (baseline) methods.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Heo:2015:IAS, author = "Ingoo Heo and Minsu Kim and Yongje Lee and Changho Choi and Jinyong Lee and Brent Byunghoon Kang and Yunheung Paek", title = "Implementing an Application-Specific Instruction-Set Processor for System-Level Dynamic Program Analysis Engines", journal = j-TODAES, volume = "20", number = "4", pages = "53:1--53:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2746238", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In recent years, dynamic program analysis (DPA) has been widely used in various fields such as profiling, finding bugs, and security. However, existing solutions have their own weaknesses. Software solutions provide flexibility in DPA but they suffer from tremendous performance overhead. In contrast, core-level hardware engines rely on specialized integrated logics and attain extremely fast computation, but they have a limited functional extensibility because the logics are tightly coupled with the host processor. To mend this, a prior system-level approach utilizes an existing channel to integrate their hardware without necessitating the host architecture modification and introduced great potential in performance. Nevertheless, the prior work does not address the detailed design and implementation of the engine, which is quite essential to leverage the deployment on real systems. To address this, in this article, we propose an implementation of programmable DPA hardware engine, called program analysis unit (PAU). PAU is an application-specific instruction-set processor (ASIP) whose instruction set is customized to reflect common features of various DPA methods. With the specialized architecture and programmability of software, our PAU aims at fast computation and sufficient flexibility. In our case studies on several DPA techniques, we show that our ASIP approach can be successfully applicable to complex DPA schemes while providing hardware-backed power in performance and software-based flexibility in analysis. Recent experiments on our FPGA prototype revealed that the performance of PAU is 4.7-13.6 times faster than pure software DPA, and the power/area consumption is also acceptably small compared to today's mobile processors.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jiang:2015:CLF, author = "Lei Jiang and Bo Zhao and Jun Yang and Youtao Zhang", title = "Constructing Large and Fast On-Chip Cache for Mobile Processors with Multilevel Cell {STT--MRAM} Technology", journal = j-TODAES, volume = "20", number = "4", pages = "54:1--54:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2764903", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern mobile processors integrating an increasing number of cores into one single chip demand large-capacity, on-chip, last-level caches (LLCs) in order to achieve scalable performance improvements. However, adopting traditional memory technologies such as SRAM and embedded DRAM (eDRAM) leakage and scalability problems. Spin-transfer torque magnetic RAM (STT-MRAM) is a novel nonvolatile memory technology that has emerged as a promising alternative for constructing on-chip caches in high-end mobile processors. STT-MRAM has many advantages, such as short read latency, zero leakage from the memory cell, and better scalability than eDRAM and SRAM. Multilevel cell (MLC) STT-MRAM further enlarges capacity and reduces per-bit cost by storing more bits in one cell. However, MLC STT-MRAM has long write latency which limits the effectiveness of MLC STT-MRAM-based LLCs. In this article, we address this limitation with three novel designs: line pairing (LP), line swapping (LS), and dynamic LP/LS enabler (DLE). LP forms fast cache lines by reorganizing MLC soft bits which are faster to write. LS dynamically stores frequently-written data into these fast cache lines. We then propose a dynamic LP/LS enabler (DLE) to enable LP and LS only if they help to improve the overall cache performance. Our experimental results show that the proposed designs improve system performance by 9--15\% and reduce energy consumption by 14--21\% for various types of mobile processors.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Samavatian:2015:ALL, author = "Mohammad Hossein Samavatian and Mohammad Arjomand and Ramin Bashizade and Hamid Sarbazi-Azad", title = "Architecting the Last-Level Cache for {GPUs} using {STT}-{RAM} Technology", journal = j-TODAES, volume = "20", number = "4", pages = "55:1--55:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2764905", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Future GPUs should have larger L2 caches based on the current trends in VLSI technology and GPU architectures toward increase of processing core count. Larger L2 caches inevitably have proportionally larger power consumption. In this article, having investigated the behavior of GPGPU applications, we present an efficient L2 cache architecture for GPUs based on STT-RAM technology. Due to its high-density and low-power characteristics, STT-RAM technology can be utilized in GPUs where numerous cores leave a limited area for on-chip memory banks. They have, however, two important issues, high energy and latency of write operations, that have to be addressed. Low retention time STT-RAMs can reduce the energy and delay of write operations. Nevertheless, employing STT-RAMs with low retention time in GPUs requires a thorough study on the behavior of GPGPU applications. Based on this investigation, we have architectured a two-part STT-RAM-based L2 cache with low-retention (LR) and high-retention (HR) parts. The proposed two-part L2 cache exploits a dynamic threshold regulator (DTR) to efficiently regulate the write threshold for migration of the data blocks from HR to LR, based on the behavior of the applications. Also, a Data and Access type Aware Cache Search mechanism (DAACS) is hired for handling the search of the requested data blocks in two parts of the cache. The STT-RAM L2 cache architecture proposed in this article can improve IPC by up to 171\% (20\% on average), and reduce the average consumed power by 28.9\% compared to a conventional L2 cache architecture with equal on-chip area.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Indrusiak:2015:FSN, author = "Leandro Soares Indrusiak and James Harbin and Osmar {Marchi Dos Santos}", title = "Fast Simulation of Networks-on-Chip with Priority-Preemptive Arbitration", journal = j-TODAES, volume = "20", number = "4", pages = "56:1--56:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2755559", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "An increasingly time-consuming part of the design flow of on-chip multiprocessors is the simulation of the interconnect architecture. The accurate simulation of state-of-the art network-on-chip interconnects can take hours, and this process is repeated for each design iteration because it provides valuable insights on communication latencies that can greatly affect the overall performance of the system. In this article, we identify a time-predictable network-on-chip architecture and show that its timing behaviour can be predicted using models which are far less complex than the architecture itself. We then explore such a feature to produce simplified and lightweight simulation models that can produce latency figures with more than 90\% accuracy and simulate more than 1,000 times faster when compared to a cycle-accurate model of the same interconnect.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2015:FES, author = "Irith Pomeranz", title = "{FOLD}: Extreme Static Test Compaction by Folding of Functional Test Sequences", journal = j-TODAES, volume = "20", number = "4", pages = "57:1--57:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2764455", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article introduces a new approach to extreme static test compaction for functional test sequences that modifies the sequence in order to enhance the ability to omit test vectors from it and thus compact it. In the new approach, modification of the sequence and omission of test vectors from it are tightly coupled by focusing both subprocedures on subsequences of limited lengths. In a new process that is referred to as folding, a subsequence is partitioned into two halves, and the goal of the modification is to ensure that the two halves are as similar as possible. With similar halves, the expectation is that it will be possible to omit test vectors from the subsequence. Experimental results demonstrate that the procedure produces extremely short functional test sequences for benchmark circuits.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2015:BST, author = "Ran Wang and Krishnendu Chakrabarty and Sudipta Bhawmik", title = "Built-In Self-Test and Test Scheduling for Interposer-Based {$ 2.5 $D IC}", journal = j-TODAES, volume = "20", number = "4", pages = "58:1--58:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2757278", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Interposer-based 2.5D integrated circuits (ICs) are seen today as a precursor to 3D ICs based on through-silicon vias (TSVs). All the dies and the interposer in a 2.5D IC must be adequately tested for product qualification. We present an efficient built-in self-test (BIST) architecture for targeting defects in dies and in the interposer interconnects. The proposed BIST architecture can also be used for fault diagnosis during interconnect testing. To reduce the overall test cost, we describe a test scheduling and optimization technique under power constraints. We present simulation results to validate the BIST architecture and demonstrate fault detection, synthesis results to evaluate the area overhead of the proposed BIST architecture, and test scheduling results to highlight the effectiveness of the optimization approach.", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bahar:2015:ISI, author = "R. Iris Bahar and Alex K. Jones and Yuan Xie", title = "Introduction to the Special Issue on Reliable, Resilient, and Robust Design of Circuits and Systems", journal = j-TODAES, volume = "20", number = "4", pages = "59:1--59:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2796541", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kiddie:2015:SEM, author = "Bradley T. Kiddie and William H. Robinson and Daniel B. Limbrick", title = "Single-Event Multiple-Transient Characterization and Mitigation via Alternative Standard Cell Placement Methods", journal = j-TODAES, volume = "20", number = "4", pages = "60:1--60:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2740962", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As fabrication technology scales towards smaller transistor sizes and lower critical charge, single-event radiation effects are more likely to cause errant behavior in multiple, physically adjacent devices in modern integrated circuits (ICs), and with higher operating frequencies, this risk increasingly impacts design logic over memory as well. In order to increase future system reliability, circuit designers need greater awareness of multiple-transient charge-sharing effects during the early stages of their design flow with standard cell placement and routing. To measure the propagation and observability of multiple transients from single radiation events, this work uses several intra-pipeline combinational logic circuits at the 32nm technology node, investigates several different standard cell placements of each design, and analyzes those placements with a novel, physically realistic transient injection and simulation method. It is shown that (1) this simulation methodology, informed by experimental data, provides an increased realism over other works in traditional fault injection fields, (2) different placements of the same circuit where standard cells are grouped by logical hierarchy can result in different reliability behavior and benefits especially useful within the area of approximate computing, and (3) improved reliability through charge-sharing transient mitigation can be gained with no area penalty and minimal speed and power penalties by adjusting the placement of standard cells.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Delshadtehrani:2015:SMR, author = "Leila Delshadtehrani and Hamed Farbeh and Seyed Ghassem Miremadi", title = "In-Scratchpad Memory Replication: Protecting Scratchpad Memories in Multicore Embedded Systems against Soft Errors", journal = j-TODAES, volume = "20", number = "4", pages = "61:1--61:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2770874", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scratchpad memories (SPMs) are widely employed in multicore embedded processors. Reliability is one of the major constraints in the embedded processor design, which is threatened with the increasing susceptibility of memory cells to multiple-bit upsets (MBUs) due to continuous technology down-scaling. This article proposes a low-cost and efficient data replication mechanism, called In-Scratchpad Memory Replication (ISMR), to correct MBUs in SPMs of multicore embedded processors. The main feature of ISMR is a smart controller, called Replication Management Unit (RMU), which is responsible for dynamically analyzing the activity of the SPM blocks at runtime and efficiently replicating the vulnerable SPM blocks into currently inactive SPM blocks. RMU exploits a 2-bit tag for each SPM block, where the value of each tag is determined by RMU according to the SPM access pattern. Accordingly, the proposed mechanism guarantees the replication of all vulnerable SPM blocks to provide error correction without decreasing the SPM utilization. To detect errors in SPM blocks, ISMR uses a 2-bit interleaved-parity code. As compared with the previous E-RAID 1 mechanism, the simulation results illustrate that for an 8-core embedded processor, the ISMR mechanism experiences 81\% less energy consumption overhead and 48\% less performance overhead.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Papandreou:2015:ERM, author = "Nikolaos Papandreou and Thomas Parnell and Haralampos Pozidis and Thomas Mittelholzer and Evangelos Eleftheriou and Charles Camp and Thomas Griffin and Gary Tressler and Andrew Walls", title = "Enhancing the Reliability of {MLC NAND} Flash Memory Systems by Read Channel Optimization", journal = j-TODAES, volume = "20", number = "4", pages = "62:1--62:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2699866", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "NAND flash memory is not only the ubiquitous storage medium in consumer applications but has also started to appear in enterprise storage systems as well. MLC and TLC flash technology made it possible to store multiple bits in the same silicon area as SLC, thus reducing the cost per amount of data stored. However, at current sub-20nm technology nodes, MLC flash devices fail to provide the levels of raw reliability, mainly cycling endurance, that are required by typical enterprise applications. Advanced signal processing and coding schemes are needed to improve the flash bit error rate and thus elevate the device reliability to the desired level. In this article, we report on the use of adaptive voltage thresholds and cell-to-cell interference cancellation in the read operation of NAND flash devices. We discuss how the optimal read voltage thresholds can be determined and assess the benefit of cancelling cell-to-cell interference in terms of cycling endurance, data retention, and resilience to read disturb.", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xu:2015:ICF, author = "Cong Xu and Dimin Niu and Yang Zheng and Shimeng Yu and Yuan Xie", title = "Impact of Cell Failure on Reliable Cross-Point Resistive Memory Design", journal = j-TODAES, volume = "20", number = "4", pages = "63:1--63:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2753759", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Resistive random access memory (ReRAM) technology is an emerging candidate for next-generation nonvolatile memory (NVM) architecture due to its simple structure, low programming voltage, fast switching speed, high on/off ratio, excellent scalability, good endurance, and great compatibility with silicon CMOS technology. The most attractive of the characteristics of ReRAM is its cross-point structure, which features a 4 F$^2$ cell size. In a cross-point structure, the existence of sneak current and resulting voltage loss due to the wire's resistance might cause read and write failures if not designed properly. In addition, a robust ReRAM design needs to deal with both soft and hard errors. In this article, we summarize mechanisms of both soft and hard errors of ReRAM cells and propose a unified model to characterize different failure behaviors. We quantitatively analyze the impact of cell failure types on the reliability of the cross-point array. We also propose an error-resilient architecture, which avoids unnecessary writes in the hard error detection unit. Assuming constant soft error rate, our approach can extend the lifetime of ReRAM up to 75\% over a design without hard error detection and up to 12\% over the design with a ``write-verify'' detection mechanism. Our approach yields greater significant lifetime improvement when considering postcycling retention degradation.", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2015:RLP, author = "Renyuan Zhang and Mineo Kaneko", title = "Robust and Low-Power Digitally Programmable Delay Element Designs Employing Neuron-{MOS} Mechanism", journal = j-TODAES, volume = "20", number = "4", pages = "64:1--64:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2740963", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The feasibility of designing digitally programmable delay elements (PDEs) employing neuron-MOS mechanism is investigated in this work. By coupling the capacitors on the gate of the MOS transistor, the current flowing through the transistor can be digitally tuned without additional static power consumption. Various switching delays are generated by a clock buffer stage in this manner. Two types of neuron-MOS-based PDEs are suggested in this article. One of them is realized by directly applying capacitor-coupling technology on the transistors of an inverter as a clock buffer. The delay programmability is realized by tuning the charging/discharging current through the neuron-MOS inverter digitally. Since no additional transistor is introduced into the charging/discharging path, the performance fluctuation due to process variations on MOS transistors is reduced. The temperature effect is also partially compensated by the proposed neuron-MOS implementation. Another type of PDE circuit is proposed by employing a reliable reference-current-generator, where the neuron-MOS transistor acts as a linearly tunable resistance. A stable reference current is generated and used for charging/discharging the inverter as a clock buffer. As a result, the switching delay of the inverter is linearly programmed by digital input patterns. In general, both types of suggested PDE circuits achieve improved or fair performances over the robustness, power consumption, and linearity.", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2015:UIL, author = "Hyungjun Kim and Siva Bhanu Krishna Boga and Arseniy Vitkovskiy and Stavros Hadjitheophanous and Paul V. Gratz and Vassos Soteriou and Maria K. Michael", title = "Use It or Lose It: Proactive, Deterministic Longevity in Future Chip Multiprocessors", journal = j-TODAES, volume = "20", number = "4", pages = "65:1--65:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2770873", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Moore's Law scaling continues to yield higher transistor density with each succeeding process generation, leading to today's many-core chip multiprocessors (CMPs) with tens or even hundreds of interconnected cores or tiles. Unfortunately, deep submicron CMOS process technology is marred by increasing susceptibility to wear. Prolonged operational stress gives rise to accelerated wearout and failure due to several physical failure mechanisms, including hot-carrier injection (HCI) and negative-bias temperature instability (NBTI). Each failure mechanism correlates with different usage-based stresses, all of which can eventually generate permanent faults. While the wearout of an individual core in many-core CMPs may not necessarily be catastrophic, a single fault in the interprocessor network-on-chip (NoC) fabric could render the entire chip useless, as it could lead to protocol-level deadlocks, or even partition away vital components such as the memory controller or other critical I/O. In this article, we study HCI- and NBTI-induced wear due to actual stresses caused by real workloads, applied onto the interconnect microarchitecture and develop a critical path model for NBTI-induced wearout. A key finding of this modeling is that, counter to prevailing wisdom, wearout in the CMP's on-chip interconnect is correlated with lack of load observed in the NoC routers rather than high load. We then develop a novel wearout-decelerating scheme in which routers under low load have their wear-sensitive components exercised without significantly impacting cycle time, pipeline depth, area, or power consumption of the overall router. A novel deterministic approach is proposed for the generation of appropriate exercise-mode data, ensuring design parameter targets are met. We subsequently show that the proposed design yields an $\approx$2,300$ \times $ decrease in the rate of wear.", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kahng:2015:IMR, author = "Andrew B. Kahng and Seokhyeong Kang and Jiajia Li and Jose {Pineda De Gyvez}", title = "An Improved Methodology for Resilient Design Implementation", journal = j-TODAES, volume = "20", number = "4", pages = "66:1--66:??", month = sep, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2749462", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Sep 29 08:53:54 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Resilient design techniques are used to (i) ensure correct operation under dynamic variations and to (ii) improve design performance (e.g., timing speculation). However, significant overheads (e.g., 16\% and 14\% energy penalties due to throughput degradation and additional circuits) are incurred by existing resilient design techniques. For instance, resilient designs require additional circuits to detect and correct timing errors. Further, when there is an error, the additional cycles needed to restore a previous correct state degrade throughput, which diminishes the performance benefit of using resilient designs. In this work, we describe an improved methodology for resilient design implementation to minimize the costs of resilience in terms of power, area, and throughput degradation. Our methodology uses two levers: selective-endpoint optimization (i.e., sensitivity-based margin insertion) and clock skew optimization. We integrate the two optimization techniques in an iterative optimization flow which comprehends toggle rate information and the trade-off between cost of resilience and margin on combinational paths. Since the error-detection network can result in up to 9\% additional wirelength cost, we also propose a matching-based algorithm for construction of the error-detection network to minimize this resilience overhead. Further, our implementations comprehend the impacts of signoff corners (in particular, hold constraints, and use of typical vs. slow libraries) and process variation, which are typically omitted in previous studies of resilience trade-offs. Our proposed flow achieves energy reductions of up to 21\% and 10\% compared to a conventional (with only margin used to attain robustness) design and a brute-force implementation (i.e., a typical resilient design, where resilient endpoints are (greedily) instantiated at timing-critical endpoints), respectively. We show that these benefits increase in the context of an adaptive voltage scaling strategy.", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Roy:2015:FTE, author = "Debashri Roy and Prasun Ghosal and Saraju Mohanty", title = "{FuzzRoute}: a Thermally Efficient Congestion-Free Global Routing Method for Three-Dimensional Integrated Circuits", journal = j-TODAES, volume = "21", number = "1", pages = "1:1--1:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2767127", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The high density of interconnects, closer proximity of modules, and routing phase are pivotal during the layout of a performance-centric three-dimensional integrated circuit (3D IC). Heuristic-based approaches are typically used to handle such NP-complete problems of global routing in 3D ICs. To overcome the inherent limitations of deterministic approaches, a novel methodology for multi-objective global routing based on fuzzy logic has been proposed in this article. The guiding information generated after the placement phase is used during routing with the help of a fuzzy expert system to achieve thermally efficient and congestion-free routing. A complete global routing solution is designed based on the proposed algorithms and the results are compared with selected fully established global routers, namely Labyrinth, FastRoute3.0, NTHU-R, BoxRouter 2.0, FGR, NTHU-Route2.0, FastRoute4.0, NCTU-GR, MGR, and NCTU-GR2.0. Experiments are performed over ISPD 1998 and 2008 benchmarks. The proposed router, called FuzzRoute, achieves balanced superiority in terms of routability, runtime, and wirelength over others. The improvements on routing time for Labyrinth, BoxRouter 2.0, and FGR are 91.81\%, 86.87\%, and 32.16\%, respectively, for ISPD 1998 benchmarks. It may be noted that, though FastRoute3.0 achieves fastest runtime, it fails to generate congestion-free solutions for all benchmarks, which is overcome by the proposed FuzzRoute of the current article. It also shows wirelength improvements of 17.35\%, 2.88\%, 2.44\%, 2.83\%, and 2.10\%, respectively, over others for ISPD 1998 benchmarks. For ISPD 2008 benchmark circuits it also provides 2.5\%, 2.6\%, 1 \%, 1.1\%, and 0.3\% lesser wirelength and averagely runs 1.68$ \times $, 6.42$ \times $, 2.21$ \times $, 0.76$ \times $, and 1.54$ \times $ faster than NTHU-Route2.0, FastRoute4.0, NCTU-GR, MGR, and NCTU-GR2.0, respectively.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2015:LDP, author = "Ye Zhang and Wai-Shing Luk and Yunfeng Yang and Hai Zhou and Changhao Yan and David Z. Pan and Xuan Zeng", title = "Layout Decomposition with Pairwise Coloring and Adaptive Multi-Start for Triple Patterning Lithography", journal = j-TODAES, volume = "21", number = "1", pages = "2:1--2:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2764904", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article we present a pairwise coloring (PWC) approach to tackle the layout decomposition problem for triple patterning lithography (TPL). The main idea is to reduce the problem to a set of bi-coloring problems. The overall solution is refined by applying a bi-coloring method for pairs of color sets per pass. One obvious advantage of this method is that the existing double patterning lithography (DPL) techniques can be reused effortlessly. Moreover, we observe that each pass can be fulfilled efficiently by integrating an SPQR-tree-graph-division-based bi-coloring method. In addition, to prevent the solution getting stuck in the local minima, an adaptive multi-start (AMS) approach is incorporated. Adaptive starting points are generated according to the vote of previous solutions. The experimental results show that our method is competitive with other works on both solution quality and runtime performance.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2015:DMD, author = "Hu Chen and Sanghamitra Roy and Koushik Chakraborty", title = "{DARP-MP}: Dynamically Adaptable Resilient Pipeline Design in Multicore Processors", journal = j-TODAES, volume = "21", number = "1", pages = "3:1--3:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2755558", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we demonstrate that the sensitized path delays in various microprocessor pipe stages exhibit intriguing temporal and spatial variations during the execution of real-world applications. To effectively exploit these delay variations, we propose dynamically adaptable resilient pipeline (DARP)-a series of runtime techniques to boost power-performance efficiency and fault tolerance in a pipelined microprocessor. DARP employs early error prediction to avoid a major portion of the timing errors. We combine DARP with the state-of-art topologically homogeneous and power-performance heterogeneous (THPH) architecture to build up a new frontier for the energy efficiency of multicore processors (DARP-MP). Using a rigorous circuit-architectural infrastructure, we demonstrate that DARP substantially improves the multicore processor performance (9.4--20\%) and energy efficiency (10--28.6\%) compared to state-of-the-art techniques. The energy-efficiency improvements of DARP-MP are 42\% and 49.9\% compared against the original THPH and another state-of-art multicore power management scheme, respectively.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2015:MMS, author = "Myungsun Kim and Jinkyu Koo and Hyojung Lee and James R. Geraci", title = "Memory Management Scheme to Improve Utilization Efficiency and Provide Fast Contiguous Allocation without a Statically Reserved Area", journal = j-TODAES, volume = "21", number = "1", pages = "4:1--4:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2770871", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Fast allocation of large blocks of physically contiguous memory plays a crucial role to boost the performance of multimedia applications in modern memory-constrained portable devices, such as smartphones, tablets, etc. Existing systems have addressed this issue by provisioning a large statically reserved memory area (SRA) in which only dedicated applications can allocate pages. However, this in turn degrades the performance of applications that are prohibited to utilize the SRA due to the reduced available memory pool. To overcome this drawback while maintaining the benefits of the SRA, we propose a new memory management scheme that uses a special memory region, called page-cache-preferred area (PCPA), in concert with a quick memory reclaiming algorithm. The key of the proposed scheme is to enhance the memory utilization efficiency by enabling to allocate page-cached pages of all applications in the PCPA until predetermined applications require to allocate big chunks of contiguous memory. At this point, clean page-cached pages in the PCPA are rapidly evicted without write-back to a secondary storage. Compared to the SRA scheme, experimental results show that the average launch time of real-world applications and the execution time of I/O-intensive benchmarks are reduced by 9.2\% and 24.7\%, respectively.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Oboril:2015:EIS, author = "Fabian Oboril and Mehdi B. Tahoori", title = "Exploiting Instruction Set Encoding for Aging-Aware Microprocessor Design", journal = j-TODAES, volume = "21", number = "1", pages = "5:1--5:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2783435", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Microprocessors fabricated at nanoscale nodes are exposed to accelerated transistor aging due to bias temperature instability and hot carrier injection. As a result, device delays increase over time, reducing the mean time to failure (MTTF) and hence lifetime of the processor. To address this challenge, many (micro)-architectural techniques target the execution stage of the instruction pipeline, as this one is typically most critical. However, also the decoding stages can become aging critical and limit the microprocessor lifetime, as we will show in this work. Therefore, we propose a novel aging-aware instruction set-encoding methodology (ArISE) that improves the instruction encoding iteratively using a heuristic algorithm. In addition, the switching activities of the affected memory elements are considered in order to co-optimize lifetime and energy efficiency. Our experimental results show that MTTF of the decoding stages can be improved by 2.3$ \times $ with negligible implementation costs.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{More:2015:LAN, author = "Ankit More and Baris Taskin", title = "Locality-Aware Network Utilization Balancing in {NoCs}", journal = j-TODAES, volume = "21", number = "1", pages = "6:1--6:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2743012", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hierarchical and multi-network networks-on-chip (NoCs) have been proposed in the literature to improve the energy- and performance-efficient scalability of the traditional flat-mesh NoC architecture. Theoretically, based on a small-world network-based analysis, traditional hierarchical NoCs are expected to provide good scalability. However, the traditional theoretical analysis (e.g. for small-worldness) does not take into account the congestion phenomenon experienced in such networks. Counterintuitively, as shown in this work, breaking the hierarchy in traditional hierarchical NoCs and utilizing the proposed locality-aware network utilization (NU) balancing technique performs better. This improvement in performance is observed through experimental analysis, which is contrasted with the theoretical analysis that does not account for congestion. In addition to the novelties for hierarchical networks, the application of the proposed locality-aware NU balancing scheme is extended to multi-network NoC topologies (with already separated networks). Results of the analysis show the superiority of applying the locality-aware NU balancing technique for a throughput and energy-efficient scaling of the multi-network NoC architectures, much like those of the hierarchical NoCs. For instance, for a NoC with 1024 nodes, the proposed NU balancing technique provides up to 95\% higher throughput efficiency and consumes up to 29\% less energy per flit compared to the best NoC topology without the NU balancing technique. The analysis also helps to render the choice of a NoC topology for traffic patterns varying in locality and nonlocality on exascale computing CMPs.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cheng:2015:ABW, author = "Hsiang-Yun Cheng and Mary Jane Irwin and Yuan Xie", title = "Adaptive Burst-Writes {(ABW)}: Memory Requests Scheduling to Reduce Write-Induced Interference", journal = j-TODAES, volume = "21", number = "1", pages = "7:1--7:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2753757", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Main memory latencies have become a major performance bottleneck for chip-multiprocessors (CMPs). Since reads are on the critical path, existing memory controllers prioritize reads over writes. However, writes must be eventually processed when the write queue is full. These writes are serviced in a burst to reduce the bus turnaround delay and increase the row-buffer locality. Unfortunately, a large number of reads may suffer long queuing delay when the burst-writes are serviced. The long write latency of future nonvolatile memory will further exacerbate the long queuing delay of reads during burst-writes. In this article, we propose a run-time mechanism, Adaptive Burst-Writes (ABW), to reduce the queuing delay of reads. Based on the row-buffer hit rate of writes and the arrival rate of reads, we dynamically control the number of writes serviced in a burst to trade off the write service time and the queuing latency of reads. For prompt adjustment, our history-based mechanism further terminates the burst-writes earlier when the row-buffer hit rate of writes in the previous burst-writes is low. As a result, our policy improves system throughput by up to 28\% (average 10\%) and 43\% (average 14\%) in CMPs with DRAM-based and PCM-based main memory.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ochoa-Ruiz:2015:MAR, author = "Gilberto Ochoa-Ruiz and S{\'e}bastien Guillet and Florent {De Lamotte} and Eric Rutten and El-Bay Bourennane and Jean-Philippe Diguet and Guy Gogniat", title = "An {MDE} Approach for Rapid Prototyping and Implementation of Dynamic Reconfigurable Systems", journal = j-TODAES, volume = "21", number = "1", pages = "8:1--8:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2800784", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a co-design methodology based on RecoMARTE, an extension to the well-known UML MARTE profile, which is used for the specification and automatic generation of Dynamic and Partially Reconfigurable Systems-on-Chip (DRSoC). This endeavor is part of a larger framework in which Model-Driven Engineering (MDE) techniques are extensively used for modeling and via model transformations, generating executable models, which are exploited by implementation tools to create reconfigurable systems. More specifically, the methodological aspects presented in this article are concerned with expediting the conception and implementation of the hardware platform and the integration of correct by construction reconfiguration controller. This article builds upon previous research by integrating previously separated endeavors to obtain a complete PR system generation chain, which aims at shielding the designer of many of the burdensome technological and tool-specific requirements. The methodology permits for the verification of the platform description at different stages in the development process (i.e., HDL for simulation, static FPGA implementation, controller simulation and verification). Furthermore, automation capabilities embedded in the flow enable the generation of the platform description and the integration of the reconfiguration controller executive seamlessly. In order to demonstrate the benefits of the proposed approach, we present a case study in which we target the creation of an image-processing application to be deployed onto an FPGA board. We present the required modeling strategies and we discuss how the generation chains are integrated with the back-end Xilinx tools (the most mature version of PR technology) to produce the necessary executable artifacts: VHDL for the platform description and a C description of the reconfiguration controller to be executed by an embedded processor.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2015:CPM, author = "Shih-Hsu Huang and Hua-Hsin Yeh and Yow-Tyng Nieh", title = "Clock Period Minimization with Minimum Leakage Power", journal = j-TODAES, volume = "21", number = "1", pages = "9:1--9:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2778954", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the design of nonzero clock skew circuits, an increase of the short-path delay may improve circuit speed or reduce leakage power. However, the impact of increasing the short-path delay on the trade-off between circuit speed and leakage power has not been well studied. An analysis of previous works shows that they can be classified into two independent groups. One group uses extra buffers to increase the short-path delay for achieving the lower bound of the clock period; however, this group has a large overhead of leakage power. The other group uses the combination of threshold voltage assignment and gate sizing (TVA/GS) to increase the short-path delay as possible for reducing leakage power; however, this group often does not work with the lower bound of the clock period. Accordingly, this article considers the simultaneous application of buffer insertion and TVA/GS during clock skew scheduling. Our objective is to minimize the leakage power for working with the lower bound of the clock period. To the best of our knowledge, our approach is the first leakage-power-aware clock skew scheduling that guarantees working with the lower bound of the clock period. Benchmark data consistently show that our approach achieves good results in terms of both the circuit speed and the leakage power.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Subramaniam:2015:FPM, author = "Anupama R. Subramaniam and Janet Roveda and Yu Cao", title = "A Finite-Point Method for Efficient Gate Characterization Under Multiple Input Switching", journal = j-TODAES, volume = "21", number = "1", pages = "10:1--10:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2778970", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Timing characterization of standard cells is one of the essential steps in VLSI design. The traditional static timing analysis (STA) tool assumes single input switching models for the characterization of multiple input gates. However, due to technology scaling, increasing operating frequency, and process variation, the probability of the occurrence of multiple input switching (MIS) is increasing. On the other hand, considering all possible MIS scenarios for the characterization of multiple input logic gates, is computationally intensive. To improve the efficiency, this work proposes a finite-point-based characterization methodology for multiple input gates with the effects of MIS. Furthermore, delay variation due to MIS is integrated into the STA flow through propagation of switching windows. The proposed modeling methodology is validated using benchmark circuits at the 45nm technology node for various operating conditions. Experimental results demonstrate significant reduction in computation cost and data volume with less than $\approx$10\% error compared to that of traditional SPICE simulation.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jung:2015:LMS, author = "Dongha Jung and Hokyoon Lee and Seon Wook Kim", title = "Lowering Minimum Supply Voltage for Power-Efficient Cache Design by Exploiting Data Redundancy", journal = j-TODAES, volume = "21", number = "1", pages = "11:1--11:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2795229", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Voltage scaling is known to be an efficient way of saving power and energy within a system, and large caches such as LLCs are good candidates for voltage scaling considering their constantly increasing size. However, the V$^{CCMIN}$ problem, in which the lower bound of scalable voltage is limited by process variation, has made it difficult to exploit the benefits of voltage scaling. Lowering V$^{CCMIN}$ incurs multibit faults, which cannot be efficiently resolved by current technologies due to their high complexity and power consumption. We overcame the limitation by exploiting the data redundancy of memory hierarchy. For example, cache coherence states and several layers of cache organization naturally expose the existence of redundancy within cache blocks. If blocks have redundant copies, their V$^{CCMIN}$ can be lowered; although more faults can occur in the blocks, they can be efficiently detected by simple error detection codes and recovered by reloading the redundant copies. Our scheme requires only minor modifications to the existing cache design. We verified our proposal on a cycle accurate simulator with SPLASH-2 and PARSEC benchmark suites and found that the V$^{CCMIN}$ of a 2MB L2 cache can be further lowered by 0.1V in 32nm technology with negligible degradation in performance. As a result, we could achieve 15.6\% of reduction in dynamic power and 15.4\% of reduction in static power compared to the previous minimum power.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Qin:2015:CSE, author = "Ying Qin and Shengyu Shen and Qingbo Wu and Huadong Dai and Yan Jia", title = "Complementary Synthesis for Encoder with Flow Control Mechanism", journal = j-TODAES, volume = "21", number = "1", pages = "12:1--12:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2794079", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Complementary synthesis automatically generates an encoder's decoder with the assumption that the encoder's all input variables can always be uniquely determined by its output symbol sequence. However, to prevent the faster encoder from overwhelming the slower decoder, many encoders employ flow control mechanism that fails this assumption. Such encoders, when their output symbol sequences are too fast to be processed by the decoders, will stop transmitting data symbols, but instead transmitting idle symbols that can only uniquely determine a subset of the encoder's input variables. And the decoder should recognize and discard these idle symbols. This mechanism fails the assumption of all complementary synthesis algorithms, because some input variables can't be uniquely determined by the idle symbol. A novel algorithm is proposed to handle such encoders. First, it identifies all input variables that can be uniquely determined, and takes them as flow control variables. Second, it infers a predicate over these flow control variables that enables all other input variables to be uniquely determined. Third, it characterizes the decoder's Boolean function with Craig interpolant. Experimental results on several complex encoders indicate that this algorithm can always correctly identify the flow control variables, infer the predicates and generate the decoder's Boolean functions.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2015:ETC, author = "Irith Pomeranz", title = "Enhanced Test Compaction for Multicycle Broadside Tests by Using State Complementation", journal = j-TODAES, volume = "21", number = "1", pages = "13:1--13:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2778953", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicycle tests support test compaction by allowing each test to detect more target faults. The ability of multicycle broadside tests to provide test compaction depends on the ability of primary input sequences to take the circuit between pairs of states that are useful for detecting target faults. This ability can be enhanced by adding design-for-testability (DFT) logic that allows states to be complemented. This article describes a test compaction procedure that uses such DFT logic to form a compact multicycle broadside test set for transition faults where the tests use constant primary input vectors. The use of complemented states also allows the procedure to increase the transition fault coverage beyond the transition fault coverage of a broadside test set. The procedure has the option of increasing the switching activity of the tests gradually in order to explore the tradeoff between the number of tests, the fault coverage, and the switching activity.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Potluri:2015:DAT, author = "Seetal Potluri and A. Satya Trinadh and Sobhan Babu Ch. and V. Kamakoti and Nitin Chandrachoodan", title = "{DFT} Assisted Techniques for Peak Launch-to-Capture Power Reduction during Launch-On-Shift At-Speed Testing", journal = j-TODAES, volume = "21", number = "1", pages = "14:1--14:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2790297", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scan-based testing is crucial to ensuring correct functioning of chips. In this scheme, the scan and capture phases are interleaved. It is well known that for large designs, excessive switching activity during the launch-to-capture window leads to high voltage droop on the power grid, ultimately resulting in false delay failures during at-speed test. This article proposes a new design-for-testability (DFT) scheme for launch-on-shift (LOS) testing, which ensures that the combinational logic remains undisturbed between the interleaved capture phases, providing computer-aided-design (CAD) tools with extra search space for minimizing launch-to-capture switching activity through test pattern ordering (TPO). We further propose a new TPO algorithm that keeps track of the don't cares during the ordering process, so that the don't care filling step after the ordering process yields a better reduction in launch-to-capture switching activity compared to any other technique in the literature. The proposed DFT-assisted technique, when applied to circuits in ITC99 benchmark suite, produces an average reduction of 17.68\% in peak launch-to-capture switching activity (CSA) compared to the best known lowpower TPO technique. Even for circuits whose test cubes are not rich in don't care bits, the proposed technique produces an average reduction of 15\% in peak CSA, while for the circuits with test cubes rich in don't care bits ({$>$}=75\%), the average reduction is 24\%. The proposed technique also reduces the average power dissipation (considering both scan cells and combinational logic) during the scan phase by about 43.5\% on an average, compared to the adjacent filling technique.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2015:PDU, author = "Chien-Chih Huang and Chin-Long Wey and Jwu-E Chen and Pei-Wen Luo", title = "Performance-Driven Unit-Capacitor Placement of Successive-Approximation-Register {ADCs}", journal = j-TODAES, volume = "21", number = "1", pages = "15:1--15:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2770872", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The performance of many switched-capacitor analog integrated circuits, such as analog-to-digital converters (ADCs) and sample and hold circuits, is directly related to their accurate capacitance ratios. In general, capacitor mismatch can result from two sources of errors: random mismatch and systematic mismatch. Paralleling unit capacitance (UC) with a common-centroid structure can alleviate the random mismatch errors. The complexity of generating an optimal solution to the UC placement problem is extremely high, let alone if both placement and routing problems are to be optimized simultaneously. This article evaluates the performance of the UC placement generated in an existing work and proposes an alternative UC placement to achieve optimal ratio mismatch M and better linearity performance of SAR ADC design. Results show that the proposed UC placement achieves a ratio mismatch of M = 0.695, the effective number of bits ENOB = 8.314 bits, and the integral nonlinearity INL = 0.816 LSB (least significant bits) for a 9-bit SAR ADC design.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sun:2015:NUB, author = "Jin Sun and Claudio Talarico and Priyank Gupta and Janet Roveda", title = "A New Uncertainty Budgeting-Based Method for Robust Analog\slash Mixed-Signal Design", journal = j-TODAES, volume = "21", number = "1", pages = "16:1--16:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2778959", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes a novel methodology for robust analog/mixed-signal IC design by introducing a notion of budget of uncertainty. This method employs a new conic uncertainty model to capture process variability and describes variability-affected circuit design as a set-based robust optimization problem. For a prespecified yield requirement, the proposed method conducts uncertainty budgeting by associating performance yield with the size of uncertainty set for process variations. Hence the uncertainty budgeting problem can be further translated into a tractable robust optimization problem. Compared with the existing robust design flow based on ellipsoid model, this method is able to produce more reliable design solutions by allowing varying size of conic uncertainty set at different design points. In addition, the proposed method addresses the limitation that the size of the ellipsoid model is calculated solely relying on the distribution of process parameters, while neglecting the dependence of circuit performance upon these design parameters. The proposed robust design framework has been verified on various analog/mixed-signal circuits to demonstrate its efficiency against the ellipsoid model. Up to 24\% reduction of design cost has been achieved by using the uncertainty budgeting-based method.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mitra:2015:OWS, author = "Debasis Mitra and Sarmishtha Ghoshal and Hafizur Rahaman and Krishnendu Chakrabarty and Bhargab B. Bhattacharya", title = "Offline Washing Schemes for Residue Removal in Digital Microfluidic Biochips", journal = j-TODAES, volume = "21", number = "1", pages = "17:1--17:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2798726", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A digital microfluidic biochip (DMB) is often deployed for multiplexing several assays in space and in time. The residue left by one assay may contaminate the droplets used for subsequent assays. Biochemical assays involving cell culture and those based on particle microfluidics also require sweeping of residual media from an active droplet on-chip. Thus, fluidic operations such as washing or residue removal need to be performed routinely either to clean contamination from the droplet pathways or to rinse off certain droplets on the chip. In this work, several graph-based techniques are presented for offline washing of biochips that may have either a regular geometry (e.g., a 2D array of electrodes), or an irregular geometry (e.g., an application-specific layout). The schemes can be used for total washing, that is, for cleaning the entire biochip or for selective washing of sites or pathways located sparsely on the chip. The problem of reducing the path length and washing time of the droplets is investigated with or without capacity constraints. The proposed algorithms for offline washing make use of several techniques such as graph traversal, integer linear programming (ILP) modeling, and customized heuristics based on the nature of the geometric distribution of the contamination profile. The contaminated pathways are assumed to be Manhattan or curved, and hence the techniques are applicable to the conventional field-actuated DMBs as well as to the emerging classes of light-actuated and active-matrix DMBs. These techniques will be useful in enhancing the reliability of a wide class of emerging digital microfluidic healthcare devices", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2015:SAD, author = "Chung-Wei Lin and Bowen Zheng and Qi Zhu and Alberto Sangiovanni-Vincentelli", title = "Security-Aware Design Methodology and Optimization for Automotive Systems", journal = j-TODAES, volume = "21", number = "1", pages = "18:1--18:??", month = nov, year = "2015", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2803174", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Dec 3 10:15:10 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we address both security and safety requirements and solve security-aware design problems for the controller area network (CAN) protocol and time division multiple access (TDMA)-based protocols. To provide insights and guidelines for other similar security problems with limited resources and strict timing constraints, we propose a general security-aware design methodology to address security with other design constraints in a holistic framework and optimize design objectives. The security-aware design methodology is further applied to solve a security-aware design problem for vehicle-to-vehicle (V2V) communications with dedicated short-range communication (DSRC) technology. Experimental results demonstrate the effectiveness of our approaches in system design without violating design constraints and indicate that it is necessary to consider security together with other metrics during design stages.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2016:CFS, author = "Daming Zhang and Shuangchen Li and Yongpan Liu and Xiaobo Sharon Hu and Xinyu He and Yining Zhang and Pei Zhang and Huazhong Yang", title = "A {C2RTL} Framework Supporting Partition, Parallelization, and {FIFO} Sizing for Streaming Applications", journal = j-TODAES, volume = "21", number = "2", pages = "19:1--19:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2797135", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Developing circuits for streaming applications written in C (or its variants) can benefit greatly from C-to-RTL (C2RTL) synthesis. Yet, most existing C2RTL tools lack system-level options to trade off various design constraints, such as delay and area. This article introduces a systematic way to accomplish C2RTL synthesis for streaming applications containing thousands of lines of C (or its variants) codes. Synthesizing circuits for such large applications presents serious challenges for existing C2RTL tools. Specifically, the proposed approach determines simultaneously the number of pipeline stages and the number of times that each functional block is duplicated in each pipeline stage. A mixed integer linear programming-based solution is formulated for obtaining the optimal solution. Furthermore, a heuristic algorithm is developed for large-scale problems. To accommodate the differences of the data rates between the adjacent hardware modules, first-in-first-out (FIFO) buffers are indispensable, but their overheads are nonnegligible. A parallelism-aware FIFO sizing method is also introduced to determine the optimal sizes of FIFOs. Experimental results on seven real-world applications demonstrate that the algorithms in the synthesis flow can make effective design trade-offs and find superior solutions in a short time compared with existing approaches. Furthermore, the algorithms achieve optimal results in most cases with subsecond running time.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pierre:2016:AVT, author = "Laurence Pierre", title = "Auxiliary Variables in Temporal Specifications: Semantic and Practical Analysis for System-Level Requirements", journal = j-TODAES, volume = "21", number = "2", pages = "20:1--20:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2811260", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Assertion-based verification (ABV) for IP blocks given as synchronous RTL (register transfer level) descriptions has now widely gained acceptance. The challenge addressed here is ABV for systems on chip (SoC) modeled at the system level in SystemC TLM (Transactional Level Modeling). Requirements to be verified at this level of abstraction usually express temporal constraints on the interactions and communications in the SoC. We use the IEEE standard language PSL to formalize these temporal assertions which represent properties on communication actions and their parameters. Auxiliary variables are often indispensable for this formalization, but their use may induce semantic issues. This article discusses this matter, analyzes various existing approaches and proposes a summary of their advantages and shortcomings. They are also compared to our syntactic and semantic framework, implemented in a verification tool. The proposed operational semantics has the advantages of being simple and intuitive while supporting both global and local auxiliary variables. Experimental results on industrial case studies illustrate its applicability.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2016:PDA, author = "Jin-Tai Yan", title = "Performance-Driven Assignment of Buffered {I/O} Signals in Area-{I/O} Flip-Chip Designs", journal = j-TODAES, volume = "21", number = "2", pages = "21:1--21:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2818642", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to the inappropriate assignment of bump pads or the improper assignment of I/O buffers, the constructed buffered I/O signals in an area-I/O flip-chip design may yield longer maximum delay. In this article, the problem of assigning performance-driven buffered I/O signals in an area-I/O flip-chip design is first formulated. Furthermore, the assignment of the buffered I/O signals can be divided into two sequential phases: Construction of performance-driven I/O signals and Assignment of timing-constrained I/O buffers. Finally, an efficient matching-based approach is proposed to construct the performance-driven I/O signals for the given I/O pins and assign the timing-constrained I/O buffers into the constructed I/O signals in the assignment of the buffered I/O signals in an area-I/O flip-chip design. Compared with the experimental results of seven tested circuits in the Elmore delay model, the experimental results show that the matching-based assignment in our proposed approach can reduce 3.56\% of the total path delay, 9.72\% of the maximum input delay, 5.90\% of the input skew, 5.64\% of the maximum output delay, and 6.25\% of the output skew on average by reassigning the I/O buffers. Our proposed approach can further reduce 38.89\% of the total path delay, 44.00\% of the maximum input delay, 49.13\% of the input skew, 44.93\% of the maximum output delay, and 50.82\% of output skew on average by reconstructing the I/O signals and reassigning the I/O buffers into the I/O signals. Compared with the experimental results of seven tested circuits in Peng's [Peng et al. 2006] publication, the experimental results show that our proposed matching-based approach can further reduce 71.06\% of the total path delay, 67.83\% of the maximum input delay, 59.84\% of the input skew, 68.87\% of the maximum output delay, and 61.46\% of the output skew on average. On the other hand, compared with the experimental results of five tested circuits in Lai's [Lai and Chen 2008] publication, the experimental results show that our proposed approach can further reduce 75.36\% of the total path delay, 48.94\% of the input skew, and 52.80\% of the output skew on the average.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kritikakou:2016:ASC, author = "Angeliki Kritikakou and Francky Catthoor and Vasilios Kelefouras and Costas Goutis", title = "Array Size Computation under Uniform Overlapping and Irregular Accesses", journal = j-TODAES, volume = "21", number = "2", pages = "22:1--22:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2818643", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The size required to store an array is crucial for an embedded system, as it affects the memory size, the energy per memory access, and the overall system cost. Existing techniques for finding the minimum number of resources required to store an array are less efficient for codes with large loops and not regularly occurring memory accesses. They have to approximate the accessed parts of the array leading to overestimation of the required resources. Otherwise, their exploration time is increased with an increase over the number of the different accessed parts of the array. We propose a methodology to compute the minimum resources required for storing an array which keeps the exploration time low and provides a near-optimal result for regularly and non-regularly occurring memory accesses and overlapping writes and reads.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2016:IWP, author = "Youngsik Kim and Sungjoo Yoo and Sunggu Lee", title = "Improving Write Performance by Controlling Target Resistance Distributions in {MLC PRAM}", journal = j-TODAES, volume = "21", number = "2", pages = "23:1--23:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2820610", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multi-level cell (MLC) phase change RAM (PRAM) is expected to offer lower cost main memory than DRAM. However, poor write performance is one of the most critical problems for practical applications of MLC PRAM. In this article, we present two schemes to improve write performance by controlling the target resistance distribution of MLC PRAM cells. First, we propose multiple RESET/SET operations that relax the target resistance bands of intermediate logic levels with additional RESET/SET operations, which reduces the program time of intermediate logic levels, thereby improving write performance. Second, we propose a two-step write scheme consisting of lightweight write and idle-time completion write that exploits the fact that hot dirty data tend to be overwritten in a short time period and the MLC PRAM often has long idle times. Experimental results show that the multiple RESET/SET and two-step write schemes result in an average IPC improvement of 15.7\% and 10.4\%, respectively, on a hybrid DRAM/PRAM main memory subsystem. Furthermore, their integrated solution results in an average IPC improvement of 23.2\% (up to 46.4\%).", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xiang:2016:NUB, author = "Dong Xiang and Kele Shen", title = "A New Unicast-Based Multicast Scheme for Network-on-Chip Router and Interconnect Testing", journal = j-TODAES, volume = "21", number = "2", pages = "24:1--24:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2821506", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "3D technology for networks-on-chip (NOCs) becomes attractive. It is important to present an effective scheme for 3D stacked NOC router and interconnect testing. A new approach to testing of NOC routers is proposed by classifying the routers. Routers with the same number of input/output ports fall into the same class. Routers of the same class are identical if their tests are the same. A test packet is delivered to all the identical routers by a simple unicast-based multicast scheme. It is found that the depth of the consumption buffer at each router has great impact on the test delivery time because test application and test delivery for router testing cannot be handled concurrently. Test delivery must set a router to operational mode. A mathematical model is presented to evaluate the impact of consumption buffer depth on the test delivery time. A new and simple test application scheme is proposed for interconnect testing. Some interesting extensions are presented for further test time reduction and thermal considerations. Sufficient experimental results are presented by comparison with one previous method. The proposed method works for single stuck-at, transition, even small delay faults at routers, and single bridging faults at physical, consumption and injection channels.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2016:ODM, author = "Zipeng Li and Tsung-Yi Ho and Krishnendu Chakrabarty", title = "Optimization of {$3$D} Digital Microfluidic Biochips for the Multiplexed Polymerase Chain Reaction", journal = j-TODAES, volume = "21", number = "2", pages = "25:1--25:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2811259", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A digital microfluidic biochip (DMFB) is an attractive technology platform for revolutionizing immunoassays, clinical diagnostics, drug discovery, DNA sequencing, and other laboratory procedures in biochemistry. In most of these applications, real-time polymerase chain reaction (PCR) is an indispensable step for amplifying specific DNA segments. To reduce the reaction time to meet the requirement of ``real-time'' applications, multiplexed PCR is widely utilized. In recent years, three-dimensional (3D) DMFBs that integrate photodetectors (i.e., cyberphysical DMFBs) have been developed, which offer the benefits of smaller size, higher sensitivity, and faster result generations. However, current DMFB design methods target optimization in only two dimensions, thus ignoring the 3D two-layer structure of a DMFB. Furthermore, these techniques ignore practical constraints related to the interference between on-chip device pairs, the performance-critical PCR thermal loop, and the physical size of devices. Moreover, some practical issues in real scenarios are not stressed (e.g., the avoidance of the cross-contamination for multiplexed PCR). In this article, we describe an optimization solution for a 3D DMFB and present a three-stage algorithm to realize a compact 3D PCR chip layout, which includes: (i) PCR thermal-loop optimization, (ii) 3D global placement based on Strong-Push-Weak-Pull (SPWP) model, and (iii) constraint-aware legalization. To avoid cross-contamination between different DNA samples, we also propose a Minimum-Cost-Maximum-Flow-based (MCMF-based) method for reservoir assignment. Simulation results for four laboratory protocols demonstrate that the proposed approach is effective for the design and optimization of a 3D chip for multiplexed real-time PCR.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2016:PPG, author = "Le Zhang and Vivek Sarin", title = "Parallel Power Grid Analysis Based on Enlarged Partitions", journal = j-TODAES, volume = "21", number = "2", pages = "26:1--26:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2806885", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the size and complexity of current VLSI circuits grows, faster power grid simulation is becoming more and more desirable. In this article, we present a parallel iterative method for static VLSI power grid simulation. In the proposed enlarged-partition-based preconditioned conjugate gradient (EPPCG) power grid solver, the power grid is divided into disjoint partitions that are subsequently enlarged to obtain accurate solution within each partition. The global solution obtained by solving enlarged partition problems concurrently acts as a highly effective parallel preconditioner. The combination of effective preconditioning and efficient parallelization helps achieve very high performance. The experiments show that our parallel implementation can achieve significant speed improvement [61X--142X] over a state-of-the-art direct solver.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jin:2016:CEE, author = "Song Jin and Songwei Pei and Yinhe Han and Huawei Li", title = "A Cost-Effective Energy Optimization Framework of Multicore {SoCs} Based on Dynamically Reconfigurable Voltage-Frequency Islands", journal = j-TODAES, volume = "21", number = "2", pages = "27:1--27:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2817207", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Voltage-frequency island (VFI)-based design has been widely exploited for optimizing system energy of embedded multicore chip in recent years. The existing work either constructed a single static VFI partition for all kinds of applications or required per-core voltage domain configuration. However, the former solution is hard to find one optimal VFI partition for diverse applications while the latter one suffers from high hardware cost. In this article, we propose a cost effective energy optimization framework based on dynamically reconfigurable VFI (D-VFI). Our framework treats a small number of cores as dynamic cores (D-cores) and configures each of them with an independent voltage domain. At runtime, the D-cores can be pieced together with neighboring static VFIs by scaling their operating voltages. This can dynamically construct the optimal VFI partitions for different kinds of applications, thus achieving more aggressive energy optimization under low cost. To identify the D-cores, we propose a rules constrained task scheduling and VFI partitioning algorithm. Moreover, we analyze the task schedules to determine the optimal scaling intervals which can accommodate voltage scaling induced latency. Experimental results demonstrate that the effectiveness of the proposed scheme.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kamal:2016:YSI, author = "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and Massoud Pedram", title = "Yield and Speedup Improvements in Extensible Processors by Allocating Extra Cycles to Some Custom Instructions", journal = j-TODAES, volume = "21", number = "2", pages = "28:1--28:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2830566", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we investigate the application of different techniques for mitigating the impact of process variations on the custom functional unit (CFU) of extensible processors. The techniques include using extra cycles for the CFU and extending the clock period for the extensible processor. The former technique is based on providing an extra clock cycle to those custom instructions (CIs) that have timing yields smaller than one. For this purpose, we make use of a lookup table (LUT) for each fabricated processor. Based on a post-fabrication analysis, the need for an extra clock cycle for some CIs is determined. Consequently, the CI timing violations are prevented, and all manufactured extensible processors will work with a predefined clock cycle time. To study the effect of the objective function (used during the CI selection phase) on the efficacy of the suggested architectural technique, we investigate three different objective functions. In the second technique, the clock period extension is used to guarantee a design yield of one. Our results demonstrate that combining both techniques helps increase the speedup achieved by the extensible processor. To assess the efficacies of the proposed methods, several benchmarks from different application domains are used. Results of the study reveal that the suggested techniques provide considerable improvements in the speedups of the extensible processors when compared to those of approaches that do not consider the impact of process variations.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2016:TTS, author = "Guoqing Chen and Yi Xu and Xing Hu and Xiangyang Guo and Jun Ma and Yu Hu and Yuan Xie", title = "{TSocket}: Thermal Sustainable Power Budgeting", journal = j-TODAES, volume = "21", number = "2", pages = "29:1--29:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2837023", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As technology scales, thermal management for multicore architectures becomes a critical challenge due to increasing power density. Existing power budgeting techniques focus on maximizing performance under a given power budget by optimizing the core configurations. In multicore era, a chip-wide power budget, however, is not sufficient to ensure thermal constraints because the thermal sustainable power capacity varies with different threading strategies and core configurations. In this article, we propose two models to dynamically estimate the thermal sustainable power capacity in homogeneous multicore systems: uniform power model and nonuniform power model. These two models convert the thermal effect of threading strategies and core configurations into power capacity, which provide a context-based core power capacity for power budgeting. Based on these models, we introduce a power budgeting framework aiming to improve the performance within thermal constraints, named as TSocket. Compared to the chip-wide power budgeting solution, TSocket shows 19\% average performance improvement for the PARSEC benchmarks in single program scenario and up to 11\% performance improvement in multiprogram scenario. The performance improvement is achieved by reducing thermal violations and exploring thermal headrooms.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2016:RAR, author = "Liang Chen and Mojtaba Ebrahimi and Mehdi B. Tahoori", title = "Reliability-Aware Resource Allocation and Binding in High-Level Synthesis", journal = j-TODAES, volume = "21", number = "2", pages = "30:1--30:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2839300", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Soft error is nowadays a major reliability issue for nanoscale VLSI, and addressing it during high-level synthesis is essential to improve the efficiency of error mitigation. Motivated by the observation that for behavioral designs, especially control-flow intensive ones, variables and operations have non-uniform soft error vulnerabilities, we propose a novel reliability-aware allocation and binding technique to explore more effective soft error mitigation during high level synthesis. We first perform a comprehensive vulnerability analysis at the behavioral level by considering error propagation and masking in both control and data flows. Then the optimizations based on integer linear programming, as well as heuristic algorithm, are employed to incorporate the behavioral vulnerabilities into the register and functional unit binding phases to achieve cost-efficient error mitigation. The experimental results reveal that compared with the previous techniques which ignored behavioral vulnerabilities, the proposed approach can achieve up to 85\% reliability improvement with the same amount of area budget in the RTL design.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dubeuf:2016:EPA, author = "Jeremy Dubeuf and David Hely and Vincent Beroulle", title = "{ECDSA} Passive Attacks, Leakage Sources, and Common Design Mistakes", journal = j-TODAES, volume = "21", number = "2", pages = "31:1--31:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2820611", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Elliptic Curves Cryptography (ECC) tends to replace RSA for public key cryptographic services. ECC is involved in many secure schemes such as Elliptic Curve Diffie--Hellman (ECDH) key agreement, Elliptic Curve Integrated Encryption Scheme (ECIES), and Elliptic Curve Digital Signature Algorithm (ECDSA). As for every cryptosystem, implementation of such schemes may jeopardize the inherent security provided by the mathematical properties of the ECC. Unfortunate implementation or algorithm choices may create serious vulnerabilities. The elliptic curve scalar operation is particularly sensitive among these schemes. This article surveys passive attacks against well-spread elliptic curve scalar multiplication algorithms highlighting leakage sources and common mistakes that can be used to attack the ECDSA scheme. Experimental results are provided to illustrate and demonstrate the effectiveness of each vulnerability. Finally, the article describes the link between partial leakage and lattice attack in order to understand and demonstrate the impact of small leakages on the security of ECDSA. An example of side channel and lattice attack combination on NIST P-256 is provided in the case where the elliptic curve scalar multiplication is not protected against DPA/CPA and a controllable device is not accessible.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lukasiewycz:2016:SAO, author = "Martin Lukasiewycz and Philipp Mundhenk and Sebastian Steinhorst", title = "Security-Aware Obfuscated Priority Assignment for Automotive {CAN} Platforms", journal = j-TODAES, volume = "21", number = "2", pages = "32:1--32:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2831232", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Security in automotive in-vehicle networks is an increasing problem with the growing connectedness of road vehicles. This article proposes a security-aware priority assignment for automotive controller area network (CAN) platforms with the aim of mitigating scaling effects of attacks on vehicle fleets. CAN is the dominating field bus in the automotive domain due to its simplicity, low cost, and robustness. While messages might be encrypted to enhance the security of CAN systems, their priorities are usually identical for automotive platforms, comprising generally a large number of vehicle models. As a result, the identifier uniquely defines which message is sent, allowing attacks to scale across a fleet of vehicles with the same platform. As a remedy, we propose a methodology that is capable of determining obfuscated message identifiers for each individual vehicle. Since identifiers directly represent message priorities, the approach has to take the resulting response time variations into account while satisfying application deadlines for each vehicle schedule separately. Our approach relies on Quadratically Constrained Quadratic Program (QCQP) solving in two stages, specifying first a set of feasible fixed priorities and subsequently bounded priorities for each message. With the obtained bounds, obfuscated identifiers are determined, using a very fast randomized sampling. The experimental results, consisting of a large set of synthetic test cases and a realistic case study, give evidence of the efficiency of the proposed approach in terms of scalability. The results also show that the diversity of obtained identifiers is effectively optimized with our approach, resulting in a very good obfuscation of CAN messages in in-vehicle communication.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Suresh:2016:AVD, author = "Chandra K. H. Suresh and Ozgur Sinanoglu and Sule Ozev", title = "Adapting to Varying Distribution of Unknown Response Bits", journal = j-TODAES, volume = "21", number = "2", pages = "33:1--33:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2835489", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditionally, test patterns that are generated for a given circuit are applied in an identical manner to all manufactured devices until each device under test either fails or passes each test. With increasing process variations, the statistical diversity of manufactured devices is increasing, making such one-size-fits-all approaches increasingly inefficient. Adaptive test techniques address this problem by tailoring the test decisions for the statistical characteristics of the device under test. In this article, we present several adaptive strategies to enable adaptive unknown bit masking for faster-than-at-speed testing so as to ensure no yield loss while attaining the maximum test quality based on tester memory constraints. We also develop a tester-enabled compression scheme that helps alleviate memory constraints further, shifting the tradeoff space favorably to improve test quality.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tan:2016:ESE, author = "Jingweijia Tan and Zhi Li and Mingsong Chen and Xin Fu", title = "Exploring Soft-Error Robust and Energy-Efficient Register File in {GPGPUs} using Resistive Memory", journal = j-TODAES, volume = "21", number = "2", pages = "34:1--34:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2827697", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increasing adoption of graphics processing units (GPUs) for high-performance computing raises the reliability challenge, which is generally ignored in traditional GPUs. GPUs usually support thousands of parallel threads and require a sizable register file. Such large register file is highly susceptible to soft errors and power-hungry. Although ECC has been adopted to register file in modern GPUs, it causes considerable power overhead, which further increases the power stress. Thus, an energy-efficient soft-error protection mechanism is more desirable. Besides its extremely low leakage power consumption, resistive memory (e.g., spin-transfer torque RAM) is also immune to the radiation induced soft errors due to its magnetic field based storage. In this article, we propose to LEverage reSistive memory to enhance the Soft-error robustness and reduce the power consumption (LESS) of registers in the General-Purpose computing on GPUs (GPGPUs). Since resistive memory experiences longer write latency compared to SRAM, we explore the unique characteristics of GPGPU applications to obtain the win-win gains: achieving the near-full soft-error protection for the register file, and meanwhile substantially reducing the energy consumption with negligible performance degradation. Our experimental results show that LESS is able to mitigate the registers soft-error vulnerability by 86\% and achieve 61\% energy savings with negligible (e.g., 1\%) performance degradation.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2016:DTF, author = "Irith Pomeranz", title = "Design-for-Testability for Functional Broadside Tests under Primary Input Constraints", journal = j-TODAES, volume = "21", number = "2", pages = "35:1--35:??", month = jan, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2831231", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 6 07:43:40 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Functional broadside tests avoid overtesting of delay faults by creating functional operation conditions during the clock cycles where delay faults are detected. When a circuit is embedded in a larger design, a functional broadside test needs to take into consideration the functional constraints that the design creates for its primary input vectors. At the same time, application of primary input vectors as part of a scan-based test requires hardware support. An earlier work considered the case where a primary input vector is held constant during a test. The approach described in this article matches the hardware for applying primary input vectors to the functional constraints that the design creates. This increases the transition fault coverage that can be achieved by functional broadside tests. This article also considers the effect on the transition fault coverage achievable using close-to-functional broadside tests.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Young:2016:PSS, author = "Evangeline Young and Azadeh Davoodi", title = "Preface to Special Section on New Physical Design Techniques for the Next Generation of Integration Technology", journal = j-TODAES, volume = "21", number = "3", pages = "36:1--36:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2902365", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Darav:2016:EPH, author = "Nima Karimpour Darav and Andrew Kennings and Aysa Fakheri Tabrizi and David Westwick and Laleh Behjat", title = "{Eh?Placer}: a High-Performance Modern Technology-Driven Placer", journal = j-TODAES, volume = "21", number = "3", pages = "37:1--37:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2899381", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The placement problem has become more complex and challenging due to a wide variety of complicated constraints imposed by modern process technologies. Some of the most challenging constraints and objectives were highlighted during the most recent ACM/IEEE International Symposium on Physical Design (ISPD) contests. In this article, the framework of Eh?Placer and its developed algorithms are elaborated, with the main focus on modern technology constraints and runtime. The technology constraints considered as part of Eh?Placer are fence region, target density, and detailed routability constraints. We present a complete description on how these constraints are considered in different stages of Eh?Placer. The results obtained from the contests indicate that Eh?Placer is able to efficiently handle modern technology constraints and ranks highly among top academic placement tools.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Livramento:2016:CTA, author = "Vinicius Livramento and Renan Netto and Chrystian Guth and Jos{\'e} Lu{\'\i}s G{\"u}ntzel and Luiz C. V. {Dos Santos}", title = "Clock-Tree-Aware Incremental Timing-Driven Placement", journal = j-TODAES, volume = "21", number = "3", pages = "38:1--38:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2858793", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increasing impact of interconnections on overall circuit performance makes timing-driven placement (TDP) a crucial step toward timing closure. Current TDP techniques improve critical paths but overlook the impact of register placement on clock tree quality. On the other hand, register placement techniques found in the literature mainly focus on power consumption, disregarding timing and routabilty. Indeed, postponing register placement may undermine the optimization achieved by TDP, since the wiring between sequential and combinational elements would be touched. This work proposes a new approach for an effective coupling between register placement and TDP that relies on two key aspects to handle sequential and combinational elements separately: only the registers in the critical paths are touched by TDP (in practice they represent a small percentage of the total number of registers), and the shortening of clock tree wirelength can be obtained with limited variation in signal wirelength and placement density. The approach consists of two steps: (1) incremental register placement guided by a virtual clock tree to reduce clock wiring capacitance while preserving signal wirelength and density, and (2) incremental TDP to minimize the total negative slack. For the first step, we propose a novel technique that combines clock-net contraction and register clustering forces to reduce the clock wirelength. For the second step, we propose a novel Lagrangian Relaxation formulation that minimizes total negative slack for both setup and hold timing violations. To solve the formulation, we propose a TDP technique using a novel discrete search that employs a Euclidean distance to define a proper neighborhood. For the experimental evaluation of the proposed approach, we relied on the ICCAD 2014 TDP contest infrastructure and compared our results with the best results obtained from that contest in terms of timing closure, clock tree compactness, signal wirelength, and density. Assuming a long displacement constraint, our technique achieves worst and total negative slack reductions of around 24\% and 26\%, respectively. In addition, our approach leads to 44\% shorter clock tree wirelength with negligible impact on signal wirelength and placement density. In the face of such results, the proposed coupling seems a useful approach to handle the challenges faced by contemporary physical synthesis.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2016:PAC, author = "Po-Hsun Wu and Mark Po-Hung Lin and Xin Li and Tsung-Yi Ho", title = "Parasitic-Aware Common-Centroid {FinFET} Placement and Routing for Current-Ratio Matching", journal = j-TODAES, volume = "21", number = "3", pages = "39:1--39:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2856031", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The FinFET technology is regarded as a better alternative for modern high-performance and low-power integrated-circuit design due to more effective channel control and lower power consumption. However, the gate-misalignment problem resulting from process variation and the parasitic resistance resulting from interconnecting wires based on the FinFET technology becomes even more severe compared with the conventional planar CMOS technology. Such gate misalignment and unwanted parasitic resistance may increase the threshold voltage and decrease the drain current of transistors. When applying the FinFET technology to analog circuit design, the variation of drain currents can destroy current-ratio matching among transistors and degrade circuit performance. In this article, we present the first FinFET placement and routing algorithms for layout generation of a common-centroid FinFET array to precisely match the current ratios among transistors. Experimental results show that the proposed matching-driven FinFET placement and routing algorithms can obtain the best current-ratio matching compared with the state-of-the-art common-centroid placer.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2016:FTS, author = "Jinglei Huang and Song Chen and Wei Zhong and Wenchao Zhang and Shengxi Diao and Fujiang Lin", title = "Floorplanning and Topology Synthesis for Application-Specific Network-on-Chips with {RF}-Interconnect", journal = j-TODAES, volume = "21", number = "3", pages = "40:1--40:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2890499", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Application-specific Network-on-Chip (ASNoC) has been proposed as a promising solution to address the global communication challenges in System-on-Chips. However, with the number of cores increasing, the on-chip communication becomes more and more complex and the power consumption imposes the major challenge for designing ASNoCs. In this article, we propose a four-stage floorplanning and topology synthesis approach for ASNoCs with Radio-Frequency Interconnect (RF-I). First, considering the advantage of RF-I in long-distance on-chip communication, we integrate the floorplanning and clustering to explore the proper clustering of cores, where the cores belonging to the same cluster will share the same switch for communications, form an island, and occupy a contiguous physical region. After the switches and network interfaces are inserted into the floorplan, the allocation of routing paths and the RF-I logical channels are integrated in an iterative procedure to generate fine-grained dynamically reconfigurable ASNoC topologies. Finally, considering the signal integrity of RF-I, we adjust the placement of the switches by a simulated annealing-based method to reduce the number of RF-I routing corners. To evaluate the placement of switches, we propose a dynamical programming-based method to route the transmission line with the minimized number of routing corners in linear time. The results show that, using the RF-I, we can reduce the power consumption of ASNoCs by 20\% to 26\%.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xu:2016:ACS, author = "Chang Xu and Guojie Luo and Peixin Li and Yiyu Shi and Iris Hui-Ru Jiang", title = "Analytical Clustering Score with Application to Postplacement Register Clustering", journal = j-TODAES, volume = "21", number = "3", pages = "41:1--41:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2894753", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Circuit clustering is usually done through discrete optimizations to enable circuit size reduction or design-specific cluster formation. In this article, we are interested in the register-clustering technique for clock-power reduction by leveraging new opportunities introduced by multibit flip-flop (MBFF). Currently, INTEGRA is the only existing postplacement MBFF clustering optimizer with a subquadratic time complexity. However, it severely degrades the wirelength, especially for realistic designs, which may nullify the benefits of MBFF clustering. In contrast, we formulate an analytical clustering score with a nonlinear programming framework, in which the wirelength objective can be seamlessly integrated and the solver has empirical subquadratic time complexity. With the MBFF library, the application of our analytical clustering method achieves comparable clock power to the state-of-the-art techniques, but further reduces the wirelength by about 25\%. Even without the MBFF library, we can still achieve 30\% clock wirelength reduction. In addition, the proposed method can potentially be integrated into an in-placement MBFF clustering solver and be applied to other problems that require formulating clustering scores in their objective functions.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xu:2016:PPA, author = "Xiaoqing Xu and Bei Yu and Jhih-Rong Gao and Che-Lun Hsu and David Z. Pan", title = "{PARR}: Pin-Access Planning and Regular Routing for Self-Aligned Double Patterning", journal = j-TODAES, volume = "21", number = "3", pages = "42:1--42:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2842612", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Pin access has become one of the most difficult challenges for detailed routing in advanced technology nodes, for example, in 14nm and below, for which double-patterning lithography has to be used for manufacturing lower metal routing layers with tight pitches, such as M2 and M3. Self-aligned double patterning (SADP) provides better control on line edge roughness and overlay, but it has very restrictive design constraints and prefers regular layout patterns. This article presents a comprehensive pin-access planning and regular routing framework (PARR) for SADP friendliness. Our key techniques include precomputation of both intracell and intercell pin accessibility, as well as local and global pin-access planning to enable handshaking between standard cell-level pin access and detailed routing under SADP constraints. A pin access-driven rip-up and reroute scheme is proposed to improve the ultimate routability. Our experimental results demonstrate that PARR can achieve much better routability and overlay control compared with previous approaches.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yu:2016:EOA, author = "Bei Yu and Kun Yuan and Jhih-Rong Gao and Shiyan Hu and David Z. Pan", title = "{EBL} Overlapping Aware Stencil Planning for {MCC} System", journal = j-TODAES, volume = "21", number = "3", pages = "43:1--43:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2888394", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Electron beam lithography (EBL) is a promising, maskless solution for the technology beyond 14nm logic nodes. To overcome its throughput limitation, industry has proposed character projection (CP) technique, where some complex shapes (characters) can be printed in one shot. Recently, the traditional EBL system was extended into a multi-column cell (MCC) system to further improve the throughput. In an MCC system, several independent CPs are used to further speed-up the writing process. Because of the area constraint of stencil, the MCC system needs to be packed/planned carefully to take advantage of the characters. In this article, we prove that the overlapping aware stencil planning (OSP) problem is NP-hard. Then we propose E-BLOW, a tool to solve the MCC system OSP problem. E-BLOW involves several novel speedup techniques, such as successive relaxation and dynamic programming. Experimental results show that, compared with previous works, E-BLOW demonstrates better performance for both the conventional EBL system and the MCC system.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2016:NAP, author = "Seungwon Kim and Seokhyeong Kang and Ki Jin Han and Youngmin Kim", title = "Novel Adaptive Power-Gating Strategy and Tapered {TSV} Structure in Multilayer {$3$D} {IC}", journal = j-TODAES, volume = "21", number = "3", pages = "44:1--44:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2894752", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Among power dissipation components, leakage power has become more dominant with each successive technology node. Power-gating techniques have been widely used to reduce the standby leakage energy. In this work, we investigate a power-gating strategy for through-silicon via (TSV)-based 3D IC stacking structures. Power-gating control is becoming more complicated as more dies are stacked. We combine the on-chip PDN and TSV in a multilayered 3D IC to perform power-gating analysis of the static and dynamic voltage drops and in-rush current. Then, we propose a novel power-gating strategy that optimizes the in-rush current profile, subject to the voltage-drop constraints. Our power-gating strategy provides a minimal wake-up latency such that the voltage noise safety margins are not violated. In addition, the layer dependency of the 3D IC on the power gating is analyzed in terms of the wake-up time reduction. We achieve an average wake-up time reduction of 43\% for all cases with our adaptive power-gating method that exploits location (or layer) information regarding the aggressors in a 3D IC. A tapered TSV architecture based on the layer dependency has been analyzed; it exhibits up to 18\% wake-up time reduction compared to that of circuits with uniform TSVs.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2016:DCV, author = "Gong Chen and Toru Fujimura and Qing Dong and Shigetoshi Nakatake and Bo Yang", title = "{DC} Characteristics and Variability on 90nm {CMOS} Transistor Array-Style Analog Layout", journal = j-TODAES, volume = "21", number = "3", pages = "45:1--45:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2888395", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the MOS analog layout, variability suppression is becoming a major issue, as is layout efficiency. Introducing a transistor array (TA) style to analog layout, this article addresses the layout-dependent variability based on the measurement results of test chips on 90nm CMOS process. In TA style, a large transistor is decomposed into a set of unified subtransistors, which are connected in series or parallel. Focusing on one row layout of diffusion sharing for the multiple gates, we analyze the current direction-dependent variability and the leakage current via off-gates for the electrical isolation. Furthermore, we present several analog design cases on TA including analysis of the impact on the DC characteristics caused by the transistor channel decomposition.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2016:MSM, author = "Chao Wang and Chuansheng Dong and Haibo Zeng and Zonghua Gu", title = "Minimizing Stack Memory for Hard Real-Time Applications on Multicore Platforms with Partitioned Fixed-Priority or {EDF} Scheduling", journal = j-TODAES, volume = "21", number = "3", pages = "46:1--46:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2846096", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicore processors are increasingly adopted in resource-constrained real-time embedded applications. In the development of such applications, efficient use of RAM memory is as important as the effective scheduling of software tasks. Preemption Threshold Scheduling (PTS) is a well-known technique for controlling the degree of preemption, possibly improving system schedulability, and to reduce system stack usage. In this paper, we consider partitioned multi-processor scheduling on a multicore processor with either Fixed-Priority or Earliest Deadline First scheduling algorithms with PTS and address the design optimization problem of mapping tasks to processor cores and assignment of task priorities and preemption thresholds with the optimization objective of minimizing system stack usage. We present both optimal solution techniques based on Mixed Integer Linear Programming and efficient heuristic algorithms that can achieve high-quality results. We perform extensive performance evaluations using both synthetic tasksets and industrial case studies.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2016:DWC, author = "Sungkwang Lee and Taemin Lee and Hyunsun Park and Junwhan Ahn and Sungjoo Yoo and Youjip Won and Sunggu Lee", title = "Differential Write-Conscious Software Design on Phase-Change Memory: an {SQLite} Case Study", journal = j-TODAES, volume = "21", number = "3", pages = "47:1--47:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2842613", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Phase-change memory (PCM) has several benefits including low cost, non-volatility, byte-addressability, etc., and limitations such as write endurance. There have been several hardware approaches to exploit the benefits while minimizing the negative impact of limitations. Software approaches could give further improvements, when used together with hardware approaches, by taking advantage of write behavior present in the program, e.g., write behavior on dynamically allocated data, which is hardly captured by hardware approaches. This work proposes a software design methodology to reduce costly PCM writes. First, on top of existing hardware approach such as Flip-N-Write, we advocate exploiting the capability of PCM bit-level differential write in the software by judiciously reusing previously allocated memory resource. In order to avoid wear-out incurred by the reuse, we present software-based wear-leveling methods that distribute writes across PCM cells. In order to further reduce PCM writes, we propose identifying data, the loss of which does not affect the functionality of the underlying software, and then diverting write traffic for those data items to volatile memory. To evaluate the effectiveness of these methods, as a case study, we applied the proposed methods to the design of journaling in SQLite, which is an important database application commonly used in smartphones. For the experiments, we used an in-house PCM-based prototype board. Our experiments with four representative mobile applications show that the proposed design methods, which is applied on top of the hardware approach, Flip-N-Write, result in 75.2\% further reduction in total bit updates in PCM, on average, without aggravating wear-out compared with the baseline of PCM-based journaling, which is based only on the hardware approach. Also, the proposed design methods result in 49.4\% reduction in energy consumption and 52.3\% reduction in runtime compared to a typical FIFO management of free resources.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2016:FOF, author = "Xing Huang and Wenzhong Guo and Genggeng Liu and Guolong Chen", title = "{FH-OAOS}: a Fast Four-Step Heuristic for Obstacle-Avoiding Octilinear {Steiner} Tree Construction", journal = j-TODAES, volume = "21", number = "3", pages = "48:1--48:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2856033", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the sharp increase of very large-scale integrated (VLSI) circuit density, we are faced with many knotty issues. Particularly in the routing phase of VLSI physical design, the interconnection effects directly relate to the final performance of circuits. However, the optimization capability of traditional rectilinear architecture is limited; thus, both academia and industry have been devoted to nonrectilinear architecture in recent years, especially octilinear architecture, which is the most promising because it can greatly improve the performance of modern chips. In this article, we design FH-OAOS, an obstacle-avoiding algorithm in octilinear architecture, by constructing an obstacle-avoiding the octilinear Steiner minimal tree (OAOSMT). Our approach first constructs an obstacle-free Euclidean minimal spanning tree (OFEMST) on the given pins based on Delaunay triangulation (DT). Then, two lookup tables about OFEMST's edge are generated, which can be seen as the information center of FH-OAOS and can provide information support for algorithm operation. Next, an efficient obstacle-avoiding strategy is proposed to convert the OFEMST into an obstacle-avoiding octilinear Steiner tree (OAOST). Finally, the generated OAOST is refined to construct the final OAOSMT by applying three effective strategies. Experimental results on various benchmarks show that FH-OAOS achieves 66.39 times speedup on average, while the average wirelength of the final OAOSMT is only 0.36\% larger than the best existing solution.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mittal:2016:STC, author = "Sparsh Mittal", title = "A Survey of Techniques for Cache Locking", journal = j-TODAES, volume = "21", number = "3", pages = "49:1--49:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2858792", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Cache memory, although important for boosting application performance, is also a source of execution time variability, and this makes its use difficult in systems requiring worst-case execution time (WCET) guarantees. Cache locking is a promising approach for simplifying WCET estimation and providing predictability, and hence, several commercial processors provide ability for locking cache. However, cache locking also has several disadvantages (e.g., extra misses for unlocked blocks, complex algorithms required for selection of locking contents) and hence, a careful management is required to realize the full potential of cache locking. In this article, we present a survey of techniques proposed for cache locking. We categorize the techniques into several groups to underscore their similarities and differences. We also discuss the opportunities and obstacles in using cache locking. We hope that this article will help researchers gain insight into cache locking schemes and will also stimulate further work in this area.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Venkatasubramanian:2016:PID, author = "Ramachandran Venkatasubramanian and Robert Elio and Sule Ozev", title = "Process Independent Design Methodology for the Active {RC} and Single-Inverter-Based Rail Clamp", journal = j-TODAES, volume = "21", number = "3", pages = "50:1--50:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2851490", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "RC and single-inverter-based rail clamps are widely used in semiconductor products for electrostatic discharge (ESD) protection. We propose a technology-node-independent design methodology for these rail clamp circuits that takes process, voltage, and temperature variations into consideration. The methodology can be used as a cookbook by the designer or be used to automate the entire design process. Tradeoffs between various design metrics such as ESD performance (Human Body Model), leakage, and area are considered. Simplified circuit models for the rail clamp are presented to gain insights into its working and to size the circuit components. A rail clamp for core power domain is designed using the proposed approach in 40nm low-power process and performance results of the design are also presented. The effectiveness of the design methodology is proven in three different technology nodes by comparing the obtained design with the best design from among 250,000 designs obtained by randomly sampling from the design space.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2016:SDM, author = "Sangmin Kim and Seokhyeong Kang and Youngsoo Shin", title = "Synthesis of Dual-Mode Circuits Through Library Design, Gate Sizing, and Clock-Tree Optimization", journal = j-TODAES, volume = "21", number = "3", pages = "51:1--51:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2856032", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A dual-mode circuit is a circuit that has two operating modes: a default high-performance mode at nominal voltage and a secondary low-performance near-threshold voltage (NTV) mode. A key problem that we address is to maximize NTV mode clock frequency. Some cells that are particularly slow in NTV mode are optimized through transistor sizing and stack removal; static noise margin of each gate is extracted and appended in a library so that function failures can be checked and removed during synthesis. A new gate-sizing algorithm is proposed that takes account of timing slacks at both modes. A new sensitivity measure is introduced for this purpose; binary search is then applied to find the maximum NTV mode frequency. Clock-tree synthesis is reformulated to minimize clock skew at both modes. This is motivated by the fact that the proportion of load-dependent delay along clock paths, as well as clock-path delays themselves, should be made equal. Experiments on some test circuits indicate that NTV mode clock period is reduced by 24\%, on average; clock skew at NTV decreases by 13\%, on average; and NTV mode energy-delay product is reduced by 20\%, on average.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Qian:2016:PEN, author = "Zhiliang Qian and Paul Bogdan and Chi-Ying Tsui and Radu Marculescu", title = "Performance Evaluation of {NoC}-Based Multicore Systems: From Traffic Analysis to {NoC} Latency Modeling", journal = j-TODAES, volume = "21", number = "3", pages = "52:1--52:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2870633", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this survey, we review several approaches for predicting performance of Network-on-Chip (NoC)-based multicore systems, starting from the traffic models to the complex NoC models for latency evaluation. We first review typical traffic models to represent the application workloads in NoC. Specifically, we review Markovian and non-Markovian (e.g., self-similar or long-range memory-dependent) traffic models and discuss their applications on multicore platform design. Then, we review the analytical techniques to predict NoC performance under given input traffic. We investigate analytical models for average as well as maximum delay evaluation. We also review the developments and design challenges of NoC simulators. One interesting research direction in NoC performance evaluation consists of combining simulation and analytical models in order to exploit their advantages together. Toward this end, we discuss several newly proposed approaches that use hardware-based or learning-based techniques. Finally, we summarize several open problems and our perspective to address these challenges.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kashif:2016:PSR, author = "Hany Kashif and Hiren Patel and Sebastian Fischmeister", title = "Path Selection for Real-Time Communication on Priority-Aware {NoCs}", journal = j-TODAES, volume = "21", number = "3", pages = "53:1--53:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2866572", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work investigates selecting paths for communication flows when deploying a hard real-time application on a chip-multiprocessor system. This chip-multiprocessor system uses a priority-aware real-time network-on-chip interconnect between the processors. Given a mapping of the computation tasks onto the chip-multiprocessor, the problem we address in this work is to discover paths the communication flows take such that hard real-time deadlines of flows are met. Furthermore, we must ensure that deadlines are met even in the presence of direct and indirect interference from other flows sharing network links on the path. To achieve this, our algorithm utilizes a stage-level analysis for real-time communication to determine the impact of a network link being used by a flow, and its effect on other flows sharing the link. The path selection algorithm uses heuristics such as selecting links with least interference, and considering lower-priority flows when dedicating links to paths of higher-priority flows since an optimal one is intractable. The algorithm also considers constraints on the number of virtual channels at each router port in the network. The statistically significant experimental results show an improvement in schedulability by 5\% and 12\% over existing path selection algorithms such as Minimum Interference Routing and Widest Shortest Path algorithms, respectively. We also present a set-top box case study to further illustrate the benefits of using the proposed algorithm.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2016:ECM, author = "Chuangwen Liu and Peishan Tu and Pangbo Wu and Haomo Tang and Yande Jiang and Jian Kuang and Evangeline F. Y. Young", title = "An Effective Chemical Mechanical Polishing Fill Insertion Approach", journal = j-TODAES, volume = "21", number = "3", pages = "54:1--54:??", month = jul, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2886097", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "To reduce chip-scale topography variation, dummy fill is commonly used to improve the layout density uniformity. Previous works either sought the most uniform density distribution or sought to minimize the inserted dummy fills while satisfying certain density uniformity constraint. However, due to more stringent manufacturing challenges, more criteria, like line deviation and outlier, emerge at newer technology nodes. This article presents a joint optimization scheme to consider variation, total fill, line deviation, outlier, overlap, and running time simultaneously. More specifically, first we decompose the rectilinear polygons and partition fillable regions into rectangles for easier processing. After decomposition, we insert dummy fills into the fillable rectangular regions optimizing the fill metrics simultaneously. We propose three approaches, Fast Median approach, LP approach, and Iterative approach, which are much faster with better quality, compared with the results of the top three contestants in the ICCAD Contest 2014.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zuluaga:2016:SSN, author = "Marcela Zuluaga and Peter Milder and Markus P{\"u}schel", title = "Streaming Sorting Networks", journal = j-TODAES, volume = "21", number = "4", pages = "55:1--55:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2854150", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Sorting is a fundamental problem in computer science and has been studied extensively. Thus, a large variety of sorting methods exist for both software and hardware implementations. For the latter, there is a trade-off between the throughput achieved and the cost (i.e., the logic and storage invested to sort n elements). Two popular solutions are bitonic sorting networks with O ( n log$^2$ n ) logic and storage, which sort n elements per cycle, and linear sorters with O ( n ) logic and storage, which sort n elements per n cycles. In this article, we present new hardware structures that we call streaming sorting networks, which we derive through a mathematical formalism that we introduce, and an accompanying domain-specific hardware generator that translates our formal mathematical description into synthesizable RTL Verilog. With the new networks, we achieve novel and improved cost-performance trade-offs. For example, assuming that n is a two-power and w is any divisor of n, one class of these networks can sort in n /; w cycles with O ( w log$^2$ n ) logic and O ( n log$^2$ n ) storage; the other class that we present sorts in n log$^2$ n /; w cycles with O ( w ) logic and O ( n ) storage. We carefully analyze the performance of these networks and their cost at three levels of abstraction: (1) asymptotically, (2) exactly in terms of the number of basic elements needed, and (3) in terms of the resources required by the actual circuit when mapped to a field-programmable gate array. The accompanying hardware generator allows us to explore the entire design space, identify the Pareto-optimal solutions, and show superior cost-performance trade-offs compared to prior work.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2016:SRE, author = "Yue Zhao and Taeyoung Kim and Hosoon Shin and Sheldon X.-D. Tan and Xin Li and Haibao Chen and Hai Wang", title = "Statistical Rare-Event Analysis and Parameter Guidance by Elite Learning Sample Selection", journal = j-TODAES, volume = "21", number = "4", pages = "56:1--56:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2875422", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurately estimating the failure region of rare events for memory-cell and analog circuit blocks under process variations is a challenging task. In this article, we propose a new statistical method, called EliteScope, to estimate the circuit failure rates in rare-event regions and to provide conditions of parameters to achieve targeted performance. The new method is based on the iterative blockade framework to reduce the number of samples, but consists of two new techniques to improve existing methods. First, the new approach employs an elite-learning sample-selection scheme, which can consider the effectiveness of samples and well coverage for the parameter space. As a result, it can reduce additional simulation costs by pruning less effective samples while keeping the accuracy of failure estimation. Second, the EliteScope identifies the failure regions in terms of parameter spaces to provide a good design guidance to accomplish the performance target. It applies variance-based feature selection to find the dominant parameters and then determine the in-spec boundaries of those parameters. We demonstrate the advantage of our proposed method using several memory and analog circuits with different numbers of process parameters. Experiments on four circuit examples show that EliteScope achieves a significant improvement on failure-region estimation in terms of accuracy and simulation cost over traditional approaches. The 16b 6T-SRAM column example also demonstrates that the new method is scalable for handling large problems with large numbers of process variables.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ewetz:2016:CRC, author = "Rickard Ewetz and Cheng-Kok Koh", title = "Construction of Reconfigurable Clock Trees for {MCMM} Designs Using Mode Separation and Scenario Compression", journal = j-TODAES, volume = "21", number = "4", pages = "57:1--57:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2883609", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The clock networks of many modern circuits have to operate in multiple corners and multiple modes (MCMM). We propose to construct mode-reconfigurable clock trees (MRCTs) based on mode separation and scenario compression. The technique of scenario compression is proposed to consider the timing constraints in multiple scenarios at the same time, compressing the MCMM problem into an equivalent single-corner multiple-mode (SCMM), or single-corner single-mode (SCSM) problem. The compression is performed by combining the skew constraints of the different scenarios in skew constraint graphs based on delay linearization and dominating skew constraints. An MRCT consists of several clock trees and mode separation involves, depending on the active mode, selecting one of the clock trees to deliver the clock signal. To limit the overhead, the bottom part (closer to the clock sinks) of all the different clock trees are shared and only the top part (closer to the clock source) of the clock network is mode reconfigurable. The reconfiguration is realized using OR-gates and a one-input-multiple-output demultiplexer. The experimental results show that for a set of synthesized MCMM circuits, with 715 to 13, 216 sequential elements, the proposed approach can achieve high yield.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ghasemzadeh:2016:HAE, author = "Hassan Ghasemzadeh and Ramin Fallahzadeh and Roozbeh Jafari", title = "A Hardware-Assisted Energy-Efficient Processing Model for Activity Recognition Using Wearables", journal = j-TODAES, volume = "21", number = "4", pages = "58:1--58:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2886096", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Wearables are being widely utilized in health and wellness applications, primarily due to the recent advances in sensor and wireless communication, which enhance the promise of wearable systems in providing continuous and real-time monitoring and interventions. Wearables are generally composed of hardware/software components for collection, processing, and communication of physiological data. Practical implementation of wearable monitoring in real-life applications is currently limited due to notable obstacles. The wearability and form factor are dominated by the amount of energy needed for sensing, processing, and communication. In this article, we propose an ultra-low-power granular decision-making architecture, also called screening classifier, which can be viewed as a tiered wake-up circuitry, consuming three orders of magnitude-less power than the state-of-the-art low-power microcontrollers. This processing model operates based on computationally simple template matching modules, based on coarse- to fine-grained analysis of the signals with on-demand and gradually increasing the processing power consumption. Initial template matching rejects signals that are clearly not of interest from the signal processing chain, keeping the rest of processing blocks idle. If the signal is likely of interest, the sensitivity and the power of the template matching modules are gradually increased, and ultimately, the main processing unit is activated. We pose optimization techniques to efficiently split a full template into smaller bins, called mini-templates, and activate only a subset of bins during each classification decision. Our experimental results on real data show that this signal screening model reduces power consumption of the processing architecture by a factor of 70\% while the sensitivity of detection remains at least 80\%.", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Teman:2016:PAP, author = "Adam Teman and Davide Rossi and Pascal Meinerzhagen and Luca Benini and Andreas Burg", title = "Power, Area, and Performance Optimization of Standard Cell Memory Arrays Through Controlled Placement", journal = j-TODAES, volume = "21", number = "4", pages = "59:1--59:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2890498", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Embedded memory remains a major bottleneck in current integrated circuit design in terms of silicon area, power dissipation, and performance; however, static random access memories (SRAMs) are almost exclusively supplied by a small number of vendors through memory generators, targeted at rather generic design specifications. As an alternative, standard cell memories (SCMs) can be defined, synthesized, and placed and routed as an integral part of a given digital system, providing complete design flexibility, good energy efficiency, low-voltage operation, and even area efficiency for small memory blocks. Yet implementing an SCM block with a standard digital flow often fails to exploit the distinct and regular structure of such an array, leaving room for optimization. In this article, we present a design methodology for optimizing the physical implementation of SCM macros as part of the standard design flow. This methodology introduces controlled placement, leading to a structured, noncongested layout with close to 100\% placement utilization, resulting in a smaller silicon footprint, reduced wire length, and lower power consumption compared to SCMs without controlled placement. This methodology is demonstrated on SCM macros of various sizes and aspect ratios in a state-of-the-art 28nm fully depleted silicon-on-insulator technology, and compared with equivalent macros designed with the noncontrolled, standard flow, as well as with foundry-supplied SRAM macros. The controlled SCMs provide an average 25\% reduction in area as compared to noncontrolled implementations while achieving a smaller size than SRAM macros of up to 1Kbyte. Power and performance comparisons of controlled SCM blocks of a commonly found 256 $ \times $ 32 (1 Kbyte) memory with foundry-provided SRAMs show greater than 65\% and 10\% reduction in read and write power, respectively, while providing faster access than their SRAM counterparts, despite being of an aspect ratio that is typically unfavorable for SCMs. In addition, the SCM blocks function correctly with a supply voltage as low as 0.3V, well below the lower limit of even the SRAM macros optimized for low-voltage operation. The controlled placement methodology is applied within a full-chip physical implementation flow of an OpenRISC-based test chip, providing more than 50\% power reduction compared to equivalently sized compiled SRAMs under a benchmark application.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Narayanaswamy:2016:BRE, author = "Swaminathan Narayanaswamy and Steffen Schlueter and Sebastian Steinhorst and Martin Lukasiewycz and Samarjit Chakraborty and Harry Ernst Hoster", title = "On Battery Recovery Effect in Wireless Sensor Nodes", journal = j-TODAES, volume = "21", number = "4", pages = "60:1--60:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2890501", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the perennial demand for longer runtime of battery-powered Wireless Sensor Nodes (WSNs), several techniques have been proposed to increase the battery runtime. One such class of techniques exploiting the battery recovery effect phenomenon claims that performing an intermittent discharge instead of a continuous discharge will increase the usable battery capacity. Several works in the areas of embedded systems and wireless sensor networks have assumed the existence of this recovery effect and proposed different power management techniques in the form of power supply architectures (multiple battery setup) and communication protocols (burst mode transmission) in order to exploit it. However, until now, a systematic experimental evaluation of the recovery effect has not been performed with real battery cells, using high-accuracy battery testers to confirm the existence of this recovery phenomenon. In this article, a systematic evaluation procedure is developed to verify the existence of this battery recovery effect. Using our evaluation procedure, we investigated Alkaline, Nickel-Metal Hydride (NiMH), and Lithium-Ion (Li-Ion) battery chemistries, which are commonly used as power supplies for Wireless Sensor Node (WSN) applications. Our experimental results do not show any evidence of the aforementioned recovery effect in these battery chemistries. In particular, our results show a significant deviation from the stochastic battery models, which were used by many power management techniques. Therefore, the existing power management approaches that rely on this recovery effect do not hold in practice. Instead of a battery recovery effect, our experimental results show the existence of the rate capacity effect, which is the reduction of usable battery capacity with higher discharge power, to be the dominant electrochemical phenomenon that should be considered for maximizing the runtime of WSN applications. We outline power management techniques that minimize the rate capacity effect in order to obtain a higher energy output from the battery.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tannir:2016:AMN, author = "Dani Tannir and Ya Wang and Peng Li", title = "Accurate Modeling of Nonideal Low-Power {PWM} {DC--DC} Converters Operating in {CCM} and {DCM} using Enhanced Circuit-Averaging Techniques", journal = j-TODAES, volume = "21", number = "4", pages = "61:1--61:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2890500", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The development of enhanced modeling techniques for the simulation of switched-mode Pulse Width Modulated (PWM) DC-DC power converters using circuit averaging is the main focus of this article. The circuit-averaging technique has traditionally been used to model the behavior of PWM DC-DC converters without considering important nonideal characteristics of the switching devices. As a result, most of these existing approaches present simplified models that are ideal or linearized, and do not accurately account for the performance characteristics of the converter. This is especially problematic for low-power applications. In this article, we present an enhanced nonideal behavioral circuit-averaged model that makes the simulation of DC-DC converters both computationally efficient and accurate, thereby presenting an important tool for circuit designers. Experimentally, we show that our Verilog-A-based new model allows for accurate simulation of both Buck- and Boost-type PWM converters operating in either CCM or DCM modes while providing more than one order of magnitude speedup over the transistor-level simulation.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Steinhorst:2016:CPC, author = "Sebastian Steinhorst and Matthias Kauer and Arne Meeuw and Swaminathan Narayanaswamy and Martin Lukasiewycz and Samarjit Chakraborty", title = "Cyber-Physical Co-Simulation Framework for Smart Cells in Scalable Battery Packs", journal = j-TODAES, volume = "21", number = "4", pages = "62:1--62:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2891407", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article introduces a Cyber-physical Co-Simulation Framework (CPCSF) for design and analysis of smart cells that enable scalable battery pack and Battery Management System (BMS) architectures. In contrast to conventional cells in battery packs, where all cells are monitored and controlled centrally, each smart cell is equipped with its own electronics in the form of a Cell Management Unit (CMU). The CMU maintains the cell in a safe and healthy operating state, while system-level battery management functions are performed by cooperation of the smart cells via communication. Here, the smart cells collaborate in a self-organizing fashion without a central controller instance. This enables maximum scalability and modularity, significantly simplifying integration of battery packs. However, for this emerging architecture, system-level design methodologies and tools have not been investigated yet. By contrast, components are developed individually and then manually tested in a hardware development platform. Consequently, the systematic design of the hardware/software architecture of smart cells requires a cyber-physical multi-level co-simulation of the network of smart cells that has to include all the components from the software, electronic, electric, and electrochemical domains. This comprises distributed BMS algorithms running on the CMUs, the communication network, control circuitry, cell balancing hardware, and battery cell behavior. For this purpose, we introduce a CPCSF that enables rapid design and analysis of smart cell hardware/software architectures. Our framework is then applied to investigate request-driven active cell balancing strategies that make use of the decentralized system architecture. In an exhaustive analysis on a realistic 21.6kW h Electric Vehicle (EV) battery pack containing 96 smart cells in series, the CPCSF is able to simulate hundreds of balancing runs together with all system characteristics, using the proposed request-driven balancing strategies at highest accuracy within an overall time frame of several hours. Consequently, the presented CPCSF for the first time allows us to quantitatively and qualitatively analyze the behavior of smart cell architectures for real-world applications.", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Guin:2016:FCS, author = "Ujjwal Guin and Qihang Shi and Domenic Forte and Mark M. Tehranipoor", title = "{FORTIS}: a Comprehensive Solution for Establishing Forward Trust for Protecting {IPs} and {ICs}", journal = j-TODAES, volume = "21", number = "4", pages = "63:1--63:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2893183", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the advent of globalization in the semiconductor industry, it is necessary to prevent unauthorized usage of third-party IPs (3PIPs), cloning and unwanted modification of 3PIPs, and unauthorized production of ICs. Due to the increasing complexity of ICs, system-on-chip (SoC) designers use various 3PIPs in their design to reduce time-to-market and development costs, which creates a trust issue between the SoC designer and the IP owners. In addition, as the ICs are fabricated around the globe, the SoC designers give fabrication contracts to offshore foundries to manufacture ICs and have little control over the fabrication process, including the total number of chips fabricated. Similarly, the 3PIP owners lack control over the number of fabricated chips and/or the usage of their IPs in an SoC. Existing research only partially addresses the problems of IP piracy and IC overproduction, and to the best of our knowledge, there is no work that considers IP overuse. In this article, we present a comprehensive solution for preventing IP piracy and IC overproduction by assuring forward trust between all entities involved in the SoC design and fabrication process. We propose a novel design flow to prevent IC overproduction and IP overuse. We use an existing logic encryption technique to obfuscate the netlist of an SoC or a 3PIP and propose a modification to enable manufacturing tests before the activation of chips which is absolutely necessary to prevent overproduction. We have used asymmetric and symmetric key encryption, in a fashion similar to Pretty Good Privacy (PGP), to transfer keys from the SoC designer or 3PIP owners to the chips. In addition, we also propose to attach an IP digest (a cryptographic hash of the entire IP) to the header of an IP to prevent modification of the IP by the SoC designers. We have shown that our approach is resistant to various attacks with the cost of minimal area overhead.", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2016:TPD, author = "William Lee and Vikas S. Vij and Kenneth S. Stevens", title = "Timing Path-Driven Cycle Cutting for Sequential Controllers", journal = j-TODAES, volume = "21", number = "4", pages = "64:1--64:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2893473", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power and performance optimization of integrated circuits is performed by timing-driven algorithms that operate on directed acyclic graphs. Sequential circuits and circuits with topological feedback contain cycles. Cyclic circuits must be represented as directed acyclic graphs to be optimized and evaluated using static timing analysis. Algorithms in commercial electronic design automation tools generate the required acyclic graphs by cutting cycles without considering timing paths. This work reports on a method for generating directed acyclic circuit graphs that do not cut the specified timing paths. The algorithm is applied to over 125 benchmark designs and asynchronous handshake controllers. The runtime is less than 1 second, even for even the largest published controllers. Circuit timing graphs generated using this method retain the necessary timing paths, which enables circuit validation and optimization employing the commercial tools. Additional benefits show these designs are on an average a third in size, operate 33.3\% faster, and consume one-fourth the energy.", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xu:2016:HSL, author = "Yang Xu and J{\"u}rgen Teich", title = "Hierarchical Statistical Leakage Analysis and Its Application", journal = j-TODAES, volume = "21", number = "4", pages = "65:1--65:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2896820", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we investigate a hierarchical statistical leakage analysis (HSLA) design flow where module-level statistical leakage models supplied by IP vendors are used to improve the efficiency and capacity of SoC statistical leakage power analysis. To solve the challenges of incorporating spatial correlations between IP modules at system level, we first propose a method to extract correlation-inclusive leakage models. Then a method to handle the spatial correlations at system level is proposed. Using this method, the runtime of system statistical leakage analysis (SLA) can be significantly improved without disclosing the netlists of the IP modules. Experimental results demonstrate that the proposed HSLA method is about 100 times faster than gate-level full-chip SLA methods while maintaining the accuracy. In addition, we also investigate one application of this HSLA method, a leakage-yield-driven floorplanning framework, to demonstrate the benefits of such an HSLA method in practice. Moreover, an optimized hierarchical leakage analysis method dedicated to the floorplanning framework is proposed. The effectiveness of the floorplanning framework and the optimized method are confirmed by extensive experimental results.", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{S:2016:EAD, author = "Ramprasath S. and Vinita Vasudevan", title = "Efficient Algorithms for Discrete Gate Sizing and Threshold Voltage Assignment Based on an Accurate Analytical Statistical Yield Gradient", journal = j-TODAES, volume = "21", number = "4", pages = "66:1--66:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2896819", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we derive a simple and accurate expression for the change in timing yield due to a change in the gate delay distribution. It is based on analytical bounds that we have derived for the moments of the circuit and path delay. Based on this, we propose computationally efficient algorithms for (1) discrete gate sizing and (2) simultaneous gate sizing and threshold voltage ( V$^T$ ) assignment so that the circuit meets a timing yield specification under parameter variations. The use of this analytical yield gradient within a gradient-based timing yield optimization algorithm results in a significant improvement in the runtime as compared to the numerical method, while achieving the same final yield. It also allows us to explore a larger search space in each iteration more efficiently, which is required in the case of simultaneous resizing and V$^T$ assignment. We also propose heuristics for resizing/changing the V$^T$ of multiple gates in each iteration. This makes it possible to optimize the timing yield for large circuits. Results on ITC '99 benchmarks show that the proposed multinode resizing algorithm results in a significant improvement in the runtime with a marginal average area penalty and no cost to the final yield achieved.", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2016:ERL, author = "Hongfei Wang and R. D. (Shawn) Blanton", title = "Ensemble Reduction via Logic Minimization", journal = j-TODAES, volume = "21", number = "4", pages = "67:1--67:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2897515", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "An ensemble of machine learning classifiers usually improves generalization performance and is useful for many applications. However, the extra memory storage and computational cost incurred from the combined models often limits their potential applications. In this article, we propose a new ensemble reduction method called CANOPY that significantly reduces memory storage and computations. CANOPY uses a technique from logic minimization for digital circuits to select and combine particular classification models from an initial pool in the form of a Boolean function, through which the reduced ensemble performs classification. Experiments on 20 UCI datasets demonstrate that CANOPY either outperforms or is very competitive with the initial ensemble and one state-of-the-art ensemble reduction method in terms of generalization error, and is superior to all existing reduction methods surveyed for identifying the smallest numbers of models in the reduced ensembles.", acknowledgement = ack-nhfb, articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2016:DTS, author = "Irith Pomeranz", title = "{$N$}-Detection Test Sets for Circuits with Multiple Independent Scan Chains", journal = j-TODAES, volume = "21", number = "4", pages = "68:1--68:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2897514", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In a circuit with multiple independent scan chains, it is possible to operate groups of scan chains independently in functional or shift mode. This design-for-testability approach can be used to increase the quality of a test set. This article describes an N -detection test generation procedure for increasing the quality of a transition fault test set in such a circuit. The procedure uses the possibility of applying the same test, with the scan chains operating in different modes, to increase the numbers of detections without increasing the number of tests that need to be generated or stored on a tester. This results in reduced input storage requirements compared with a conventional N -detection test set and an increased number of applied tests. The increased quality of the test set is verified by its bridging fault coverage.", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Won:2016:RSC, author = "Jae-Yeon Won and Paul V. Gratz and Srinivas Shakkottai and Jiang Hu", title = "Resource Sharing Centric Dynamic Voltage and Frequency Scaling for {CMP} Cores, Uncore, and Memory", journal = j-TODAES, volume = "21", number = "4", pages = "69:1--69:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2897394", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the breakdown of Dennard's scaling over the past decade, performance growth of modern microprocessor design has largely relied on scaling core count in chip multiprocessors (CMPs). The challenge of chip power density, however, remains and demands new power management solutions. This work investigates a coordinated CMP systemwide Dynamic Voltage and Frequency Scaling (DVFS) policy centered around shared resource utilization. This approach represents a new angle on the problem, differing from the conventional core-workload-driven approaches. The key component of our work is per-core DVFS leveraging a technique similar to TCP Vegas congestion control from networking. This TCP Vegas-based DVFS can potentially identify the synergy between power reduction and performance improvement. Further, this work includes uncore (on-chip interconnect and shared last level cache) and main memory DVFS policies coordinated with the per-core DVFS policy. Full system simulations on PARSEC benchmarks show that our technique reduces total energy dissipation by over 47\% across all benchmarks with less than 2.3\% performance degradation. Our work also leads to 12\% more energy savings compared to a prior work CMP DVFS policy.", acknowledgement = ack-nhfb, articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ho:2016:AAD, author = "Ching-Hsuan Ho and Yung-Chih Chen and Chun-Yao Wang and Ching-Yi Huang and Suman Datta and Vijaykrishnan Narayanan", title = "Area-Aware Decomposition for Single-Electron Transistor Arrays", journal = j-TODAES, volume = "21", number = "4", pages = "70:1--70:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2898998", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Single-electron transistor (SET) at room temperature has been demonstrated as a promising device for extending Moore's law due to its ultra-low power consumption. Existing SET synthesis methods synthesize a Boolean network into a large reconfigurable SET array where the height of SET array equals the number of primary inputs. However, recent experiments on device level have shown that this height is restricted to a small number, say, 10, rather than arbitrary value due to the ultra-low driving strength of SET devices. On the other hand, the width of an SET array is also suggested to be a small value. Consequently, it is necessary to decompose a large SET array into a set of small SET arrays where each of them realizes a sub-function of the original circuit with no more than 10 inputs. Thus, this article presents two techniques for achieving area-efficient SET array decomposition: One is a width minimization algorithm for reducing the area of a single SET array; the other is a depth-bounded mapping algorithm, which decomposes a Boolean network into many sub-functions such that the widths of the corresponding SET arrays are balanced. The width minimization algorithm leads to a 25\%--41\% improvement compared to the state of the art, and the mapping algorithm achieves a 60\% reduction in total area compared to a na{\"\i}ve approach.", acknowledgement = ack-nhfb, articleno = "70", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mao:2016:LBP, author = "Fubing Mao and Yi-Chung Chen and Wei Zhang and Hai (Helen) Li and Bingsheng He", title = "Library-Based Placement and Routing in {FPGAs} with Support of Partial Reconfiguration", journal = j-TODAES, volume = "21", number = "4", pages = "71:1--71:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2901295", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While traditional Field-Programmable Gate Array design flow usually employs fine-grained tile-based placement, modular placement is increasingly required to speed up the large-scale placement and save the synthesis time. Moreover, the commonly used modules can be pre-synthesized and stored in the library for design reuse to significantly save the design, verification time, and development cost. Previous work mainly focuses on modular floorplanning without module placement information. In this article, we propose a library-based placement and routing flow that best utilizes the pre-placed and routed modules from the library to significantly save the execution time while achieving the minimal area-delay product. The flow supports the static and reconfigurable modules at the same time. The modular information is represented in the B*-Tree structure, and the B*-Tree operations are amended together with Simulated Annealing to enable a fast search of the placement space. Different width-height ratios of the modules are exploited to achieve area-delay product optimization. Partial reconfiguration-aware routing using pin-to-wire abutment is proposed to connect the modules after placement. Our placer can reduce the compilation time by 65\% on average with 17\% area and 8.2\% delay overhead compared with the fine-grained results of Versatile Place and Route through the reuse of module information in the library for the base architecture. For other architectures, the area increase ranges from 8.32\% to 25.79\%, the delay varies from --- 13.66\% to 19.79\%, and the runtime improves by 43.31\% to 77.2\%.", acknowledgement = ack-nhfb, articleno = "71", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bernasconi:2016:IRZ, author = "Anna Bernasconi and Valentina Ciriani", title = "Index-Resilient Zero-Suppressed {BDDs}: Definition and Operations", journal = j-TODAES, volume = "21", number = "4", pages = "72:1--72:??", month = sep, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2905363", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Sep 23 15:16:20 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Zero-Suppressed Binary Decision Diagrams (ZDDs) are widely used data structures for representing and handling combination sets and Boolean functions. In particular, ZDDs are commonly used in CAD for the synthesis and verification of integrated circuits. The purpose of this article is to design an error-resilient version of this data structure: a self-repairing ZDD. More precisely, we design a new ZDD canonical form, called index-resilient reduced ZDD, such that a faulty index can be reconstructed in time O ( k ), where k is the number of nodes with a corrupted index. Moreover, we propose new versions of the standard algorithms for ZDD manipulation and construction that are error resilient during their execution and produce an index-resilient ZDD as output. The experimental results validate the proposed approach.", acknowledgement = ack-nhfb, articleno = "72", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2016:HDT, author = "Hai Wang and Jian Ma and Sheldon X.-D. Tan and Chi Zhang and He Tang and Keheng Huang and Zhenghong Zhang", title = "Hierarchical Dynamic Thermal Management Method for High-Performance Many-Core Microprocessors", journal = j-TODAES, volume = "22", number = "1", pages = "1:1--1:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2891409", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "It is challenging to manage the thermal behavior of many-core microprocessors while still keeping them running at high performance since the control complexity increases as the core number increases. In this article, a novel hierarchical dynamic thermal management method is proposed to overcome this challenge. The new method employs model predictive control (MPC) with task migration and a DVFS scheme to ensure smooth control behavior and negligible computing performance sacrifice. In order to be scalable to many-core systems, the hierarchical control scheme is designed with two levels. At the lower level, the cores are spatially clustered into blocks, and local task migration is used to match current power distribution with the optimal distribution calculated by MPC. At the upper level, global task migration is used with the unmatched powers from the lower level. A modified iterative minimum cut algorithm is used to assist the task migration decision making if the power number is large at the upper level. Finally, DVFS is applied to regulate the remaining unmatched powers. Experiments show that the new method outperforms existing methods and is very scalable to manage many-core microprocessors with small performance degradation.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Poddar:2016:ECS, author = "Sudip Poddar and Sarmishtha Ghoshal and Krishnendu Chakrabarty and Bhargab B. Bhattacharya", title = "Error-Correcting Sample Preparation with Cyberphysical Digital Microfluidic Lab-on-Chip", journal = j-TODAES, volume = "22", number = "1", pages = "2:1--2:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2898999", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Digital (droplet-based) microfluidic technology offers an attractive platform for implementing a wide variety of biochemical laboratory protocols, such as point-of-care diagnosis, DNA analysis, target detection, and drug discovery. A digital microfluidic biochip consists of a patterned array of electrodes on which tiny fluid droplets are manipulated by electrical actuation sequences to perform various fluidic operations, for example, dispense, transport, mix, or split. However, because of the inherent uncertainty of fluidic operations, the outcome of biochemical experiments performed on-chip can be erroneous even if the chip is tested a priori and deemed to be defect-free. In this article, we address an important error recoverability problem in the context of sample preparation. We assume a cyberphysical environment, in which the physical errors, when detected online at selected checkpoints with integrated sensors, can be corrected through recovery techniques. However, almost all prior work on error recoverability used checkpointing-based rollback approach, that is, re-execution of certain portions of the protocol starting from the previous checkpoint. Unfortunately, such techniques are expensive both in terms of assay completion time and reagent cost, and can never ensure full error-recovery in deterministic sense. We consider imprecise droplet mix-split operations and present a novel roll-forward approach where the erroneous droplets, thus produced, are used in the error-recovery process, instead of being discarded or remixed. All erroneous droplets participate in the dilution process and they mutually cancel or reduce the concentration-error when the target droplet is reached. We also present a rigorous analysis that reveals the role of volumetric-error on the concentration of a sample to be prepared, and we describe the layout of a lab-on-chip that can execute the proposed cyberphysical dilution algorithm. Our analysis reveals that fluidic errors caused by unbalanced droplet splitting can be classified as being either critical or non-critical, and only those of the former type require correction to achieve error-free sample dilution. Simulation experiments on various sample preparation test cases demonstrate the effectiveness of the proposed method.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Czerwinski:2016:SAO, author = "Robert Czerwinski and Dariusz Kania", title = "State Assignment and Optimization of Ultra-High-Speed {FSMs} Utilizing Tristate Buffers", journal = j-TODAES, volume = "22", number = "1", pages = "3:1--3:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2905366", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The logic synthesis of ultra-high-speed FSMs is presented. The state assignment is based on a well-known method that uses output vectors. This technique is adjusted to include elements of two-level minimization and takes into account the limited number of terms contained in the programmable-AND/fixed-OR logic cell. The state assignment is based on a special form of the binary decision tree. The second phase of the FSM design is logic optimization. The optimization method is based on tristate buffers, thus making possible a one-logic-level FSM structure. The key point is to search partition variables that control the tristate buffers. This technique can also be applied to combinational circuits or the output block of FSMs only. Algorithms for state assignment and optimization are presented and richly illustrated by examples. The method is dedicated to using specific features of complex programmable logic devices. Experimental results prove its effectiveness (e.g., the implementation of the the 16-bit counter requires 136 logic cells and one-logic-cell level instead of 213 cells and four levels). The optimization method using tristate buffers and a state assignment binary decision tree can be directly applied to FPGA-dedicated logic synthesis.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Das:2016:FBP, author = "Shirshendu Das and Hemangee K. Kapoor", title = "A Framework for Block Placement, Migration, and Fast Searching in Tiled-{DNUCA} Architecture", journal = j-TODAES, volume = "22", number = "1", pages = "4:1--4:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2907946", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Multicore processors have proliferated several domains ranging from small-scale embedded systems to large data centers, making tiled CMPs (TCMPs) the essential next-generation scalable architecture. NUCA architectures help in managing the capacity and access time for such larger cache designs. It divides the last-level cache (LLC) into multiple banks connected through an on-chip network. Static NUCA (SNUCA) has a fixed address mapping policy, whereas dynamic NUCA (DNUCA) allows blocks to relocate nearer to the processing cores at runtime. To allow this, DNUCA divides the banks into multiple banksets and a block can be placed in any bank within a particular bankset. The entire bankset may need to be searched to access a block. Optimal bankset searching mechanisms are essential for getting the benefits from DNUCA. This article proposes a DNUCA-based TCMP architecture called TLD-NUCA. It reduces the LLC access time of TCMP and also allows a heavily loaded bank to distribute its load among the underused banks. Instead of other DNUCA designs, TLD-NUCA considers larger banksets. Such relaxations result in more uniform load distribution than existing DNUCA-based TCMP (T-DNUCA). Considering larger banksets improves the utilization factor, but T-DNUCA cannot implement it because of its expensive searching mechanism. TLD-NUCA uses a centralized directory, called TLD, to search a block from all the banks. Also, the proposed block placement policy reduces the instances when the central TLD needs to be contacted. It does not require the expensive simultaneous search as needed by T-DNUCA. Better cache utilization and a reduction in LLC access time improve the miss rate as well as the average memory access time (AMAT). Improving the miss rate and AMAT results in improvements in cycles per instructions (CPI). Experimental analysis found that TLD-NUCA improves performance by 6.5\% as compared to T-DNUCA. The improvement is 13\% as compared to the SNUCA-based TCMP design.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wu:2016:OAW, author = "Yu-Wei Wu and Yiyu Shi and Sudip Roy and Tsung-Yi Ho", title = "Obstacle-Avoiding Wind Turbine Placement for Power Loss and Wake Effect Optimization", journal = j-TODAES, volume = "22", number = "1", pages = "5:1--5:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2905365", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As finite energy resources are being consumed at faster rate than they can be replaced, renewable energy resources have drawn extensive attention. Wind power development is one such example growing significantly throughout the world. The main difficulty in wind power development is that wind turbines interfere with each other. The produced turbulence-wake effect-directly reduces the power generation. In addition, wirelength of the collection network among wind turbines is not merely an economic factor; it also decides power loss on the wind farm. Moreover, in reality, obstacles (buildings, lakes, etc.) exist on the wind farm, which are unavoidable. Nevertheless, to the best of our knowledge, none of the existing works consider wake effect, wirelength, and avoidance of obstacles all together in the wind turbine placement problem. In this article, we propose an analytical method to obtain the obstacle-avoiding placement of wind turbines, thus minimizing both power loss and wake effect. We also propose a postprocessing method to fine-tune the solution obtained from the analytical method to find a better solution. Simulation results show that our tool is 12x faster than the state-of-the-art industrial tool AWS OpenWind and 203x faster than the state-of-the-art academic tool TDA with almost the same produced power.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xiao:2016:HTL, author = "K. Xiao and D. Forte and Y. Jin and R. Karri and S. Bhunia and M. Tehranipoor", title = "Hardware {Trojans}: Lessons Learned after One Decade of Research", journal = j-TODAES, volume = "22", number = "1", pages = "6:1--6:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2906147", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Given the increasing complexity of modern electronics and the cost of fabrication, entities from around the globe have become more heavily involved in all phases of the electronics supply chain. In this environment, hardware Trojans (i.e., malicious modifications or inclusions made by untrusted third parties) pose major security concerns, especially for those integrated circuits (ICs) and systems used in critical applications and cyber infrastructure. While hardware Trojans have been explored significantly in academia over the last decade, there remains room for improvement. In this article, we examine the research on hardware Trojans from the last decade and attempt to capture the lessons learned. A comprehensive adversarial model taxonomy is introduced and used to examine the current state of the art. Then the past countermeasures and publication trends are categorized based on the adversarial model and topic. Through this analysis, we identify what has been covered and the important problems that are underinvestigated. We also identify the most critical lessons for those new to the field and suggest a roadmap for future hardware Trojan research.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2016:PSS, author = "Irith Pomeranz", title = "Periodic Scan-In States to Reduce the Input Test Data Volume for Partially Functional Broadside Tests", journal = j-TODAES, volume = "22", number = "1", pages = "7:1--7:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2911983", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article describes a procedure for test data compression targeting functional and partially functional broadside tests. The scan-in state of such a test is either a reachable state or has a known Hamming distance from a reachable state. Reachable states are fully specified, while the popular LFSR -based test data compression methods require the use of incompletely specified test cubes. The test data compression approach considered in this article is based on the use of periodic scan-in states. Such states require the storage of a period that can be significantly shorter than a scan-in state, thus providing test data compression. The procedure computes a set of periods that is sufficient for detecting all the detectable target faults. Considering the scan-in states that the periods produce, the procedure ranks the periods based on the distances of the scan-in states from reachable states, and the lengths of the periods. Functional and partially functional broadside tests are generated preferring shorter periods with smaller Hamming distances. The results are compared with those of an LFSR -based approach.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2016:ESM, author = "Jinyong Lee and Ingoo Heo and Yongje Lee and Yunheung Paek", title = "Efficient Security Monitoring with the Core Debug Interface in an Embedded Processor", journal = j-TODAES, volume = "22", number = "1", pages = "8:1--8:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2907611", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "For decades, various concepts in security monitoring have been proposed. In principle, they all in common in regard to the monitoring of the execution behavior of a program (e.g., control-flow or dataflow) running on the machine to find symptoms of attacks. Among the proposed monitoring schemes, software-based ones are known for their adaptability on the commercial products, but there have been concerns that they may suffer from nonnegligible runtime overhead. On the other hand, hardware-based solutions are recognized for their high performance. However, most of them have an inherent problem in that they usually mandate drastic changes to the internal processor architecture. More recent ones have strived to minimize such modifications by employing external hardware security monitors in the system. However, these approaches intrinsically suffer from the overhead caused by communication between the host and the external monitor. Our solution also relies on external hardware for security monitoring, but unlike the others, ours tackles the communication overhead by using the core debug interface (CDI), which is readily available in most commercial processors for debugging. We build our system simply by plugging our monitoring hardware into the processor via CDI, precluding the need for altering the processor internals. To validate the effectiveness of our approach, we implement two well-known monitoring techniques on our proposed framework: dynamic information flow tracking and branch regulation. The experimental results on our FPGA prototype show that our external hardware monitors efficiently perform monitoring tasks with negligible performance overhead, mainly with thanks to the support of CDI, which helps us reduce communication costs substantially.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2016:IPE, author = "Yu-Ming Chang and Pi-Cheng Hsiu and Yuan-Hao Chang and Chi-Hao Chen and Tei-Wei Kuo and Cheng-Yuan Michael Wang", title = "Improving {PCM} Endurance with a Constant-Cost Wear Leveling Design", journal = j-TODAES, volume = "22", number = "1", pages = "9:1--9:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2905364", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Improving PCM endurance is a fundamental issue when it is considered as an alternative to replace DRAM as main memory. Memory-based wear leveling (WL) is an effective way to improve PCM endurance, but its major challenge is how to efficiently determine the appropriate memory pages for allocation or swapping. In this article, we present a constant-cost WL design that is compatible with existing memory management. Two implementations, namely bucket-based and array-based WL, with constant-time (or nearly zero) search cost are proposed to be integrated into the OS layer and the hardware layer, respectively, as well as to trade between time and space complexity. The results of experiments conducted based on an implementation in Android, as well as simulations with popular benchmarks, to evaluate the effectiveness of the proposed design are very encouraging.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{He:2016:RIM, author = "Xu He and Yao Wang and Yang Guo and Evangeline F. Y. Young", title = "{Ripple 2.0}: Improved Movement of Cells in Routability-Driven Placement", journal = j-TODAES, volume = "22", number = "1", pages = "10:1--10:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2925989", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Routability is one of the most important problems in high-performance circuit designs. From the viewpoint of placement design, two major factors cause routing congestion: (i) interconnections between cells and (ii) connections on macro blockages. In this article, we present a routability-driven placer, Ripple 2.0, which emphasizes both kinds of routing congestion. Several techniques will be presented, including (i) cell inflation with routing path consideration, (ii) congested cluster optimization, (iii) routability-driven cell spreading, and (iv) simultaneous routing and placement for routability refinement. With the official evaluation protocol, Ripple 2.0 outperforms other published academic routability-driven placers. Compared with top results in the ICCAD 2012 contest, Ripple 2.0 achieves a better detailed routing solution obtained by a commercial router.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mazumdar:2016:CIS, author = "Bodhisatwa Mazumdar and Sk. Subidh Ali and Ozgur Sinanoglu", title = "A Compact Implementation of {Salsa20} and Its Power Analysis Vulnerabilities", journal = j-TODAES, volume = "22", number = "1", pages = "11:1--11:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2934677", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present a compact implementation of the Salsa20 stream cipher that is targeted towards lightweight cryptographic devices such as radio-frequency identification (RFID) tags. The Salsa20 stream cipher, ann addition-rotation-XOR (ARX) cipher, is used for high-security cryptography in NEON instruction sets embedded in ARM Cortex A8 CPU core-based tablets and smartphones. The existing literature shows that although classical cryptanalysis has been effective on reduced rounds of Salsa20, the stream cipher is immune to software side-channel attacks such as branch timing and cache timing attacks. To the best of our knowledge, this work is the first to perform hardware power analysis attacks, where we evaluate the resistance of all eight keywords in the proposed compact implementation of Salsa20. Our technique targets the three subrounds of the first round of the implemented Salsa20. The correlation power analysis (CPA) attack has an attack complexity of 2$^{19}$. Based on extensive experiments on a compact implementation of Salsa20, we demonstrate that all these keywords can be recovered within 20,000 queries on Salsa20. The attacks show a varying resilience of the key words against CPA that has not yet been observed in any stream or block cipher in the present literature. This makes the architecture of this stream cipher interesting from the side-channel analysis perspective. Also, we propose a lightweight countermeasure that mitigates the leakage in the power traces as shown in the results of Welch's t -test statistics. The hardware area overhead of the proposed countermeasure is only 14\% and is designed with compact implementation in mind.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chakraborty:2016:PDM, author = "Prasenjit Chakraborty and Preeti Ranjan Panda and Sandeep Sen", title = "Partitioning and Data Mapping in Reconfigurable Cache and Scratchpad Memory-Based Architectures", journal = j-TODAES, volume = "22", number = "1", pages = "12:1--12:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2934680", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scratchpad memory (SPM) is considered a useful component in the memory hierarchy, solely or along with caches, for meeting the power and energy constraints as performance ceases to be the sole criteria for processor design. Although the efficiency of SPM is well known, its use has been restricted owing to difficulties in programmability. Real applications usually have regions that are amenable to exploitation by either SPM or cache and hence can benefit if the two are used in conjunction. Dynamically adjusting the local memory resources to suit application demand can significantly improve the efficiency of the overall system. In this article, we propose a compiler technique to map application data objects to the SPM-cache and also partition the local memory between the SPM and cache depending on the dynamic requirement of the application. First, we introduce a novel graph-based structure to tackle data allocation in an application. Second, we use this to present a data allocation heuristic to map program objects for a fixed-size SPM-cache hybrid system that targets whole program optimization. We finally extend this formulation to adapt the SPM and cache sizes, as well as the data allocation as per the requirement of different application regions. We study the applicability of the technique on various workloads targeted at both SPM-only and hardware reconfigurable memory systems, observing an average of 18\% energy-delay improvement over state-of-the-art techniques.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mehri:2016:GAB, author = "Hossein Mehri and Bijan Alizadeh", title = "Genetic-Algorithm-Based {FPGA} Architectural Exploration Using Analytical Models", journal = j-TODAES, volume = "22", number = "1", pages = "13:1--13:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2939372", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "FPGA architectural optimization has emerged as one of the most important digital design challenges. In recent years, experimental methods have been replaced by analytical ones to find the optimized architecture. Time is the main reason for this replacement. Conventional Geometric Programming (GP) is a routine framework to solve analytical models, including area, delay, and power models. In this article, we discuss the application of the Genetic Algorithm (GA) to the design of FPGA architectures. The performance model has been integrated into the Genetic Algorithm framework in order to investigate the impact of various architectural parameters on the performance efficiency of FPGAs. This way, we are able to rapidly analyze FPGA architectures and select the best one. The main advantages of using GA versus GP are concurrency and speed. The results show that concurrent optimization of high-level architecture parameters, including lookup table size (K) and cluster size (N), and low-level parameters, like scaling of transistors, is possible for GA, whereas GP does not capture K and N under its concurrency and it needs to exhaustively search all possible combinations of K and N. The results also show that more than two orders of magnitude in runtime improvement in comparison with GP-based analysis is achieved.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gingade:2016:HPM, author = "Ganesh Gingade and Wenyi Chen and Yung-Hsiang Lu and Jan Allebach and Hernan Ildefonso Gutierrez-Vazquez", title = "Hybrid Power Management for Office Equipment", journal = j-TODAES, volume = "22", number = "1", pages = "14:1--14:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2910582", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Office machines (such as printers, scanners, facsimile machines, and copiers) can consume significant amounts of power. Most office machines have sleep modes to save power. Power management of these machines is usually timeout-based: a machine sleeps after being idle long enough. Setting the time-out duration can be difficult: if it is too long, the machine wastes power during idleness. If it is too short, the machine sleeps too soon and too often-the wake-up delay can significantly degrade productivity. Thus, power management is a tradeoff between saving energy and keeping response time short. Many power management policies have been published and one policy may outperform another in some scenarios. There is no definite conclusion regarding which policy is always better. This article describes two methods for office equipment power management. The first method adaptively reduces power based on a constraint of the wake-up delay. The second is a hybrid method with multiple candidate policies and it selects the most appropriate power management policy. Using 6 months of request traces from 18 different printers, we demonstrate that the hybrid policy outperforms individual policies. We also discover that power management based on business hours does not produce consistent energy savings.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Katoen:2016:PMC, author = "Joost-Pieter Katoen and Hao Wu", title = "Probabilistic Model Checking for Uncertain Scenario-Aware Data Flow", journal = j-TODAES, volume = "22", number = "1", pages = "15:1--15:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2914788", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Scenario-Aware Dataflow (SADF) model is based on concurrent actors that interact via channels. It combines streaming data and control to capture scenarios while incorporating hard and soft real-time aspects. To model data-flow computations that are subject to uncertainty, SADF models are equipped with random primitives. We propose to use probabilistic model checking to analyze uncertain SADF models. We show how measures such as expected time, long-run objectives like throughput, as well as timed reachability-can a given system configuration be reached within a deadline with high probability?-can be automatically determined. The crux of our method is a compositional semantics of SADF with exponential agent execution times combined with automated abstraction techniques akin to partial-order reduction. We present the semantics in detail and show how it accommodates the incorporation of execution platforms, enabling the analysis of energy consumption. The feasibility of our approach is illustrated by analyzing several quantitative measures of an MPEG-4 decoder and an industrial face recognition application.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2016:DAE, author = "Qixiao Liu and Miquel Moreto and Jaume Abella and Francisco J. Cazorla and Mateo Valero", title = "{DReAM}: an Approach to Estimate per-Task {DRAM} Energy in Multicore Systems", journal = j-TODAES, volume = "22", number = "1", pages = "16:1--16:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2939370", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurate per-task energy estimation in multicore systems would allow performing per-task energy-aware task scheduling and energy-aware billing in data centers, among other applications. Per-task energy estimation is challenged by the interaction between tasks in shared resources, which impacts tasks' energy consumption in uncontrolled ways. Some accurate mechanisms have been devised recently to estimate per-task energy consumed on-chip in multicores, but there is a lack of such mechanisms for DRAM memories. This article makes the case for accurate per-task DRAM energy metering in multicores, which opens new paths to energy/performance optimizations. In particular, the contributions of this article are (i) an ideal per-task energy metering model for DRAM memories; (ii) DReAM, an accurate yet low cost implementation of the ideal model (less than 5\% accuracy error when 16 tasks share memory); and (iii) a comparison with standard methods (even distribution and access-count based) proving that DReAM is much more accurate than these other methods.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Somashekar:2016:NEG, author = "Ahish Mysore Somashekar and Spyros Tragoudas and Rathish Jayabharathi and Sreenivas Gangadhar", title = "Non-enumerative Generation of Path Delay Distributions and Its Application to Critical Path Selection", journal = j-TODAES, volume = "22", number = "1", pages = "17:1--17:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2940327", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A Monte Carlo-based approach is proposed capable of identifying in a non-enumerative and scalable manner the distributions that describe the delay of every path in a combinational circuit. Furthermore, a scalable approach to select critical paths from a potentially exponential number of path candidates is presented. Paths and their delay distributions are stored in Zero Suppressed Binary Decision Diagrams. Experimental results on some of the largest ISCAS-89 and ITC-99 benchmarks shows that the proposed method is highly scalable and effective.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2016:ADB, author = "Yi Wang and Zhiwei Qin and Renhai Chen and Zili Shao and Laurence T. Yang", title = "An Adaptive Demand-Based Caching Mechanism for {NAND} Flash Memory Storage Systems", journal = j-TODAES, volume = "22", number = "1", pages = "18:1--18:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2947658", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "During past decades, the capacity of NAND flash memory has been increasing dramatically, leading to the use of nonvolatile flash in the system's memory hierarchy. The increasing capacity of NAND flash memory introduces a large RAM footprint to store the logical to physical address mapping. The demand-based approach can effectively reduce and well control the RAM footprint. However, extra address translation overhead is also introduced which may degrade the system performance. In this article, we present CDFTL, an adaptive Caching mechanism for Demand-based Flash Translation Layer, for NAND flash memory storage systems. CDFTL adopts both the fine-grained entry-based caching mechanism to exploit temporal locality and the coarse-grained translation-page-based caching mechanism to exploit spatial locality of workloads. By selectively caching the on-demand address mappings and adaptively changing the space configurations of two granularities, CDFTL can effectively utilize the RAM space and improve the cache hit ratio. We evaluate CDFTL under a real hardware embedded platform using a variety of I/O traces. Experimental results show that our technique can achieve an 11.13\% reduction in average system response time and a 35.21\% reduction in translation block erase counts compared with the previous work.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nair:2016:ESP, author = "Piyoosh Purushothaman Nair and Arnab Sarkar and N. M. Harsha and Megha Gandhi and P. P. Chakrabarti and Sujoy Ghose", title = "{ERfair} Scheduler with Processor Suspension for Real-Time Multiprocessor Embedded Systems", journal = j-TODAES, volume = "22", number = "1", pages = "19:1--19:??", month = dec, year = "2016", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2948979", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:29 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Proportional fair schedulers with their ability to provide optimal schedulability along with hard timeliness and quality-of-service guarantees on multiprocessors form an attractive alternative in real-time embedded systems that concurrently run a mix of independent applications with varying timeliness constraints. This article presents ERfair Scheduler with Suspension on Multiprocessors (ESSM), an efficient, optimal proportional fair scheduler that attempts to reduce system wide energy consumption by locally maximizing the processor suspension intervals while not sacrificing the ERfairness timing constraints of the system. The proposed technique takes advantage of higher execution rates of tasks in underloaded ERfair systems and uses a procrastination scheme to search for time points within the schedule where suspension intervals are locally maximal. Evaluation results reveal that ESSM achieves good sleep efficiency and provides up to 50\% higher effective total sleep durations as compared to the Basic-ERfair scheduler on systems consisting of 2 to 20 processors.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nguyen:2017:SAA, author = "Phuong Ha Nguyen and Durga Prasad Sahoo and Rajat Subhra Chakraborty and Debdeep Mukhopadhyay", title = "Security Analysis of Arbiter {PUF} and Its Lightweight Compositions Under Predictability Test", journal = j-TODAES, volume = "22", number = "2", pages = "20:1--20:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2940326", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Unpredictability is an important security property of Physically Unclonable Function (PUF) in the context of statistical attacks, where the correlation between challenge-response pairs is explicitly exploited. In the existing literature on PUFs, the Hamming Distance Test, denoted by HDT(t), was proposed to evaluate the unpredictability of PUFs, which is a simplified case of the Propagation Criterion test PC(t). The objective of these test schemes is to estimate the output transition probability when there are t or fewer than t bits flips, and ideally this probability value should be 0.5. In this work, we show that aforementioned two test schemes are not enough to ensure the unpredictability of a PUF design. We propose a new test, which is denoted as HDT(e, t). This test scheme is a fine-tuned version of the previous schemes, as it considers the flipping bit pattern vector e along with parameter t. As a contribution, we provide a comprehensive discussion and analytic interpretation of HDT(t), PC(t), and HDT(e, t) test schemes for Arbiter PUF (APUF), Exclusive-OR (XOR) PUF, and Lightweight Secure PUF (LSPUF). Our analysis establishes that HDT(e, t) test is more general in comparison with HDT(t) and PC(t) tests. In addition, we demonstrate a few scenarios where the adversary can exploit the information obtained from the analysis of HDT(e, t) properties of APUF, XOR PUF, and LSPUF to develop statistical attacks on them, if the ideal value of HDT(e, t) = 0.5 is not achieved for a given PUF. We validate our theoretical observations using the simulated and Field Programmable Gate Array (FPGA) implemented APUF, XOR PUF, and LSPUF designs.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhu:2017:CCA, author = "Di Zhu and Siyu Yue and Massoud Pedram and Lizhong Chen", title = "{CALM}: Contention-Aware Latency-Minimal Application Mapping for Flattened Butterfly On-Chip Networks", journal = j-TODAES, volume = "22", number = "2", pages = "21:1--21:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2950045", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the emergence of many-core multiprocessor system-on-chips (MPSoCs), on-chip networks are facing serious challenges in providing fast communication among various tasks and cores. One promising on-chip network design approach shown in recent studies is to add express channels to traditional mesh network as shortcuts to bypass intermediate routers, thereby reducing packet latency. This approach not only changes the packet latency models, but also greatly affects network traffic behaviors, both of which have not been fully exploited in existing mapping algorithms. In this article, we explore the opportunities in optimizing application mapping for flattened butterfly, a popular express channel-based on-chip network. Specifically, we identify the unique characteristics of flattened butterfly, analyze the opportunities and new challenges, and propose an efficient heuristic mapping algorithm. The proposed algorithm Contention-Aware Latency Minimal (CALM) is able to reduce unnecessary turns that would otherwise impose additional router pipeline latency to packets, as well as adjust forwarding traffic to reduce network contention latency. Simulation results show that the proposed algorithm can achieve, on average, 3.4X reduction in the number of turns, 24.8\% reduction in contention latency, and 14.12\% reduction in the overall packet latency.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Azarbad:2017:SSB, author = "Mohammad Reza Azarbad and Bijan Alizadeh", title = "Scalable {SMT-Based} Equivalence Checking of Nested Loop Pipelining in Behavioral Synthesis", journal = j-TODAES, volume = "22", number = "2", pages = "22:1--22:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2953879", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present a novel methodology based on SMT-solvers to verify equality of a high-level described specification and a pipelined RTL implementation produced by a high-level synthesis tool. The complex transformations existing in the high-level synthesis process, such as nested loop pipelining, cause the conventional methods of equivalence checking to be inefficient. The proposed equivalence checking method simultaneously attacks the two problems in this context: (1) state space explosion and (2) complex high-level synthesis transformations. To show the scalability and efficiency of the proposed method, the verification results of large designs are compared with those of the SAT-based method, including three different state-of-the-art SAT-solvers: the SMT-based procedure, the modular Horner expansion diagram (M-HED)-based method, and the M-HED partitioning approach. The results show 2470$ \times $, 2540$ \times $, and 142$ \times $ average memory usage reduction and 252$ \times $, 28$ \times $, and 914$ \times $ speedup in comparison with M-HED, M-HED partitioning, and SMT-solver without using the proposed method, respectively.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2017:OIM, author = "Qingling Zhao and Zaid Al-Bayati and Zonghua Gu and Haibo Zeng", title = "Optimized Implementation of Multirate Mixed-Criticality Synchronous Reactive Models", journal = j-TODAES, volume = "22", number = "2", pages = "23:1--23:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2968445", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Model-based design using Synchronous Reactive (SR) models enables early design and verification of application functionality in a platform-independent manner, and the implementation on the target platform should guarantee the preservation of application semantic properties. Mixed-Criticality Scheduling (MCS) is an effective approach to addressing diverse certification requirements of safety-critical systems that integrate multiple subsystems with different levels of criticality. This article considers fixed-priority scheduling of mixed-criticality SR models, and considers two scheduling approaches: Adaptive MCS and Elastic MCS. We formulate the optimization problem of minimizing the total system cost of added functional delays in the implementation while guaranteeing schedulability, and present an optimal algorithm based on branch-and-bound search, and an efficient heuristic algorithm.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ali:2017:RCD, author = "Hazem Ismail Ali and Sander Stuijk and Benny Akesson and Lu{\'\i}s Miguel Pinho", title = "Reducing the Complexity of Dataflow Graphs Using Slack-Based Merging", journal = j-TODAES, volume = "22", number = "2", pages = "24:1--24:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2956232", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "There exist many dataflow applications with timing constraints that require real-time guarantees on safe execution without violating their deadlines. Extraction of timing parameters (offsets, deadlines, periods) from these applications enables the use of real-time scheduling and analysis techniques, and provides guarantees on satisfying timing constraints. However, existing extraction techniques require the transformation of the dataflow application from highly expressive dataflow computational models, for example, Synchronous Dataflow (SDF) and Cyclo-Static Dataflow (CSDF) to Homogeneous Synchronous Dataflow (HSDF). This transformation can lead to an exponential increase in the size of the application graph that significantly increases the runtime of the analysis. In this article, we address this problem by proposing an offline heuristic algorithm called slack-based merging. The algorithm is a novel graph reduction technique that helps in speeding up the process of timing parameter extraction and finding a feasible real-time schedule, thereby reducing the overall design time of the real-time system. It uses two main concepts: (a) the difference between the worst-case execution time of the SDF graph's firings and its timing constraints (slack) to merge firings together and generate a reduced-size HSDF graph, and (b) the novel concept of merging called safe merge, which is a merge operation that we formally prove cannot cause a live HSDF graph to deadlock. The results show that the reduced graph (1) respects the throughput and latency constraints of the original application graph and (2) typically speeds up the process of extracting timing parameters and finding a feasible real-time schedule for real-time dataflow applications. They also show that when the throughput constraint is relaxed with respect to the maximal throughput of the graph, the merging algorithm is able to achieve a larger reduction in graph size, which in turn results in a larger speedup of the real-time scheduling algorithms.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Mundhenk:2017:SAN, author = "Philipp Mundhenk and Andrew Paverd and Artur Mrowca and Sebastian Steinhorst and Martin Lukasiewycz and Suhaib A. Fahmy and Samarjit Chakraborty", title = "Security in Automotive Networks: Lightweight Authentication and Authorization", journal = j-TODAES, volume = "22", number = "2", pages = "25:1--25:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2960407", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the increasing amount of interconnections between vehicles, the attack surface of internal vehicle networks is rising steeply. Although these networks are shielded against external attacks, they often do not have any internal security to protect against malicious components or adversaries who can breach the network perimeter. To secure the in-vehicle network, all communicating components must be authenticated, and only authorized components should be allowed to send and receive messages. This is achieved through the use of an authentication framework. Cryptography is widely used to authenticate communicating parties and provide secure communication channels (e.g., Internet communication). However, the real-time performance requirements of in-vehicle networks restrict the types of cryptographic algorithms and protocols that may be used. In particular, asymmetric cryptography is computationally infeasible during vehicle operation. In this work, we address the challenges of designing authentication protocols for automotive systems. We present Lightweight Authentication for Secure Automotive Networks (LASAN), a full lifecycle authentication approach. We describe the core LASAN protocols and show how they protect the internal vehicle network while complying with the real-time constraints and low computational resources of this domain. By leveraging the fixed structure of automotive networks, we minimize bandwidth and computation requirements. Unlike previous work, we also explain how this framework can be integrated into all aspects of the automotive product lifecycle, including manufacturing, vehicle maintenance, and software updates. We evaluate LASAN in two different ways: First, we analyze the security properties of the protocols using established protocol verification techniques based on formal methods. Second, we evaluate the timing requirements of LASAN and compare these to other frameworks using a new highly modular discrete event simulator for in-vehicle networks, which we have developed for this evaluation.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhang:2017:RTV, author = "Xianwei Zhang and Youtao Zhang and Bruce R. Childers and Jun Yang", title = "On the Restore Time Variations of Future {DRAM} Memory", journal = j-TODAES, volume = "22", number = "2", pages = "26:1--26:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2967609", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the de facto main memory standard, DRAM (Dynamic Random Access Memory) has achieved dramatic density improvement in the past four decades, along with the advancements in process technology. Recent studies reveal that one of the major challenges in scaling DRAM into the deep sub-micron regime is its significant variations on cell restore time, which affect timing constraints such as write recovery time. Adopting traditional approaches results in either low yield rate or large performance degradation. In this article, we propose schemes to expose the variations to the architectural level. By constructing memory chunks with different access speeds and, in particular, exploiting the performance benefits of fast chunks, a variation-aware memory controller can effectively mitigate the performance loss due to relaxed timing constraints. We then proposed restore-time-aware rank construction and page allocation schemes to make better use of fast chunks. Our experimental results show that, compared to traditional designs such as row sparing and Error Correcting Codes, the proposed schemes help to improve system performance by about 16\% and 20\%, respectively, for 20nm and 14nm technology nodes on a four-core multiprocessor system.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2017:HDP, author = "Ye-Jyun Lin and Chia-Lin Yang and Hsiang-Pang Li and Cheng-Yuan Michael Wang", title = "A Hybrid {DRAM\slash PCM} Buffer Cache Architecture for {Smartphones} with {QoS} Consideration", journal = j-TODAES, volume = "22", number = "2", pages = "27:1--27:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2979143", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Flash memory is widely used in mobile phones to store contact information, application files, and other types of data. In an operating system, the buffer cache keeps the I/O blocks in dynamic random access memory (DRAM) to reduce the slow flash accesses. However, in smartphones, we observed two issues which reduce the benefits of the buffer cache. First, a large number of synchronous writes force writing the data from the buffer cache to flash frequently. Second, the large amount of I/O accesses from background applications diminishes the buffer cache efficiency of the foreground application, which degrades the quality-of-service (QoS). In this article, we propose a buffer cache architecture with hybrid DRAM and phase change memory (PCM) memory, which improves the I/O performance and QoS for smartphones. We use a DRAM first-level buffer cache to provide high buffer cache performance and a PCM last-level buffer cache to reduce the impact of frequent synchronous writes. Based on the proposed hierarchical buffer cache architecture, we propose a sub-block management and background flush to reduce the impact of the PCM write limitation and the dirty block write-back overhead, respectively. To improve the QoS, we propose a least-recently-activated first replacement policy (LRA) to keep the data from the applications that are most likely to become the foreground one. The experimental results show that with the proposed mechanisms, our hierarchical buffer cache can improve the I/O response time by 20\% compared to the conventional buffer cache. The proposed LRA can improve the foreground application performance by 1.74x compared to the conventional CLOCK policy.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Su:2017:EMC, author = "Hang Su and Dakai Zhu and Scott Brandt", title = "An Elastic Mixed-Criticality Task Model and Early-Release {EDF} Scheduling Algorithms", journal = j-TODAES, volume = "22", number = "2", pages = "28:1--28:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2984633", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Many algorithms have recently been studied for scheduling mixed-criticality (MC) tasks. However, most existing MC scheduling algorithms guarantee the timely executions of high-criticality (HC) tasks at the expense of discarding low-criticality (LC) tasks, which can cause serious service interruption for such tasks. In this work, aiming at providing guaranteed services for LC tasks, we study an elastic mixed-criticality (E-MC) task model for dual-criticality systems. Specifically, the model allows each LC task to specify its maximum period (i.e., minimum service level) and a set of early-release points. We propose an early-release (ER) mechanism that enables LC tasks to be released more frequently and thus improve their service levels at runtime, with both conservative and aggressive approaches to exploiting system slack being considered, which is applied to both earliest deadline first (EDF) and preference-oriented earliest-deadline schedulers. We formally prove the correctness of the proposed early-release--earliest deadline first scheduler on guaranteeing the timeliness of all tasks through judicious management of the early releases of LC tasks. The proposed model and schedulers are evaluated through extensive simulations. The results show that by moderately relaxing the service requirements of LC tasks in MC task sets (i.e., by having LC tasks' maximum periods in the E-MC model be two to three times their desired MC periods), most transformed E-MC task sets can be successfully scheduled without sacrificing the timeliness of HC tasks. Moreover, with the proposed ER mechanism, the runtime performance of tasks (e.g., execution frequencies of LC tasks, response times, and jitters of HC tasks) can be significantly improved under the ER schedulers when compared to that of the state-of-the-art earliest deadline first-virtual deadline scheduler.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2017:CSL, author = "Irith Pomeranz", title = "Computation of Seeds for {LFSR}-Based $n$-Detection Test Generation", journal = j-TODAES, volume = "22", number = "2", pages = "29:1--29:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2994144", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article describes a new procedure that generates seeds for LFSR-based test generation when the goal is to produce an $n$-detection test set. The procedure does not use test cubes in order to avoid the situation where a seed does not exist for a given test cube with a given LFSR. Instead, the procedure starts from a set of seeds that produces a one-detection test set. It modifies seeds to obtain new seeds such that the tests they produce increase the numbers of detections of target faults. The modification procedure also increases the number of faults that each additional seed detects. Experimental results are presented to demonstrate the effectiveness of the procedure.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hankendi:2017:SCS, author = "Can Hankendi and Ayse Kivilcim Coskun", title = "Scale \& Cap: Scaling-Aware Resource Management for Consolidated Multi-threaded Applications", journal = j-TODAES, volume = "22", number = "2", pages = "30:1--30:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2994145", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "As the number of cores per server node increases, designing multi-threaded applications has become essential to efficiently utilize the available hardware parallelism. Many application domains have started to adopt multi-threaded programming; thus, efficient management of multi-threaded applications has become a significant research problem. Efficient execution of multi-threaded workloads on cloud environments, where applications are often consolidated by means of virtualization, relies on understanding the multi-threaded specific characteristics of the applications. Furthermore, energy cost and power delivery limitations require data center server nodes to work under power caps, which bring additional challenges to runtime management of consolidated multi-threaded applications. This article proposes a dynamic resource allocation technique for consolidated multi-threaded applications for power-constrained environments. Our technique takes into account application characteristics specific to multi-threaded applications, such as power and performance scaling, to make resource distribution decisions at runtime to improve the overall performance, while accurately tracking dynamic power caps. We implement and evaluate our technique on state-of-the-art servers and show that the proposed technique improves the application performance by up to 21\% under power caps compared to a default resource manager.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Backer:2017:SFT, author = "Jerry Backer and David Hely and Ramesh Karri", title = "Secure and Flexible Trace-Based Debugging of Systems-on-Chip", journal = j-TODAES, volume = "22", number = "2", pages = "31:1--31:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2994601", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work tackles the conflict between enforcing security of a system-on-chip (SoC) and providing observability during trace-based debugging. On one hand, security objectives require that assets remain confidential at different stages of the SoC life cycle. On the other hand, the trace-based debug infrastructure exposes values of internal signals that can leak the assets to untrusted third parties. We propose a secure trace-based debug infrastructure to resolve this conflict. The secure infrastructure tags each asset to identify its owner (to whom it can be exposed during debug) and nonintrusively enforces the confidentiality of the assets during runtime debug. We implement a prototype of the enhanced infrastructure on an FPGA to validate its functional correctness. ASIC estimations show that our approach incurs practical area and power costs.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Latifis:2017:MVC, author = "Ioannis Latifis and Karthick Parashar and Grigoris Dimitroulakos and Hans Cappelle and Christakis Lezos and Konstantinos Masselos and Francky Catthoor", title = "A {MATLAB} Vectorizing Compiler Targeting Application-Specific Instruction Set Processors", journal = j-TODAES, volume = "22", number = "2", pages = "32:1--32:28", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2996182", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article discusses a MATLAB-to-C vectorizing compiler that exploits custom instructions, for example, for Single Instruction Multiple Data (SIMD) processing and instructions for complex arithmetic present in Application-Specific Instruction Set Processors (ASIPs). Custom instructions are represented via specialized intrinsic functions in the generated code, and the generated code can be used as input to any C/C++ compiler supporting the target processor. Furthermore, the specialized instruction set of the target processor is described in a parameterized way using a target processor-independent architecture description approach, thus allowing the support of any processor. The compiler has been used for the generation of application code for two different ASIPs for several benchmarks. The code generated by the compiler achieves a speedup between 2$ \times $ --74$ \times $ and 2$ \times $ --97$ \times $ compared to the code generated by the MathWorks MATLAB-to-C compiler. Experimental results also prove that the compiler efficiently exploits SIMD custom instructions achieving a 3.3 factor speedup compared to cases where no SIMD processing is used. Thus the compiler can be employed to reduce the development time/effort/cost and time to market through raising the abstraction of application design in an embedded systems/system-on-chip development context.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Santos:2017:SMH, author = "Rui Santos and Shyamsundar Venkataraman and Akash Kumar", title = "Scrubbing Mechanism for Heterogeneous Applications in Reconfigurable Devices", journal = j-TODAES, volume = "22", number = "2", pages = "33:1--33:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2997646", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Commercial off-the-shelf (COTS) reconfigurable devices have been recognized as one of the most suitable processing devices to be applied in nano-satellites, since they can satisfy and combine their most important requirements, namely processing performance, reconfigurability, and low cost. However, COTS reconfigurable devices, in particular Static-RAM Field Programmable Gate Arrays, can be affected by cosmic radiation, compromising the overall nano-satellite reliability. Scrubbing has been proposed as a mechanism to repair faults in configuration memory. However, the current scrubbing mechanisms are predominantly static, unable to adapt to heterogeneous applications and their runtime variations. In this article, a dynamically adaptive scrubbing mechanism is proposed. Through a window-based scrubbing scheduling, this mechanism adapts the scrubbing process to heterogeneous applications (composed of periodic/sporadic and streaming/DSP (Digital Signal Processing) tasks), as well as their reconfigurations and modifications at runtime. Conducted simulation experiments show the feasibility and the efficiency of the proposed solution in terms of system reliability metric and memory overhead.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Enrici:2017:MDE, author = "Andrea Enrici and Ludovic Apvrille and Renaud Pacalet", title = "A Model-Driven Engineering Methodology to Design Parallel and Distributed Embedded Systems", journal = j-TODAES, volume = "22", number = "2", pages = "34:1--34:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2999537", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In Model-Driven Engineering system-level approaches, the design of communication protocols and patterns is subject to the design of processing operations (computations) and to their mapping onto execution resources. However, this strategy allows us to capture simple communication schemes (e.g., processor-bus-memory) and prevents us from evaluating the performance of both computations and communications (e.g., impact of application traffic patterns onto the communication interconnect) in a single step. To solve these issues, we introduce a novel design approach-the $ \Psi $-chart-where we design communication patterns and protocols independently of a system's functionality and resources, via dedicated models. At the mapping step, both application and communication models are bound to the platform resources and transformed to explore design alternatives for both computations and communications. We present the $ \Psi $-chart and its implementation (i.e., communication models and Design Space Exploration) in TTool/DIPLODOCUS, a Unified Modeling Language (UML)/SysML framework for the modeling, simulation, formal verification and automatic code generation of data-flow embedded systems. The effectiveness of our solution in terms of better design quality (e.g., portability, time) is demonstrated with the design of the physical layer of a ZigBee (IEEE 802.15.4) transmitter onto a multi-processor architecture.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Basten:2017:SSI, author = "Twan Basten and Orlando Moreira and Robert de Groote", title = "Special Section: Integrating Dataflow, Embedded Computing and Architecture", journal = j-TODAES, volume = "22", number = "2", pages = "35:1--35:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3023455", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Choi:2017:WCR, author = "Junchul Choi and Soonhoi Ha", title = "Worst-Case Response Time Analysis of a Synchronous Dataflow Graph in a Multiprocessor System with Real-Time Tasks", journal = j-TODAES, volume = "22", number = "2", pages = "36:1--36:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2997644", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a novel technique that estimates a tight upper bound of the worst-case response time (WCRT) of a synchronous dataflow (SDF) graph when the SDF graph shares processors with other real-time tasks. When an SDF graph is executed at runtime under a self-timed or static assignment scheduling policy on a multi-processor system, static scheduling of the SDF graph does not guarantee the satisfaction of latency constraints since changes to the schedule may result in timing anomalies. To estimate the WCRT of an SDF graph with a given mapping and scheduling result, we first construct a task instance dependency graph that depicts the dependency between node executions in a static schedule. The proposed technique combines two techniques in a novel way: schedule time bound analysis and response time analysis. The former is used to consider the interference between task instances in the same SDF graph, and the latter is used to consider the interference from other real-time tasks. Through extensive experiments with synthetic examples and benchmarks, we verify the superior performance of the proposed technique compared to other existent techniques.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jung:2017:MSM, author = "Hanwoong Jung and Hyunok Oh and Soonhoi Ha", title = "Multiprocessor Scheduling of a Multi-Mode Dataflow Graph Considering Mode Transition Delay", journal = j-TODAES, volume = "22", number = "2", pages = "37:1--37:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2997645", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Synchronous Data Flow (SDF) model is widely used for specifying signal processing or streaming applications. Since modern embedded applications become more complex with dynamic behavior changes at runtime, several extensions of the SDF model have been proposed to specify the dynamic behavior changes while preserving static analyzability of the SDF model. They assume that an application has a finite number of behaviors (or modes), and each behavior (mode) is represented by an SDF graph. They are classified as multi-mode dataflow models in this article. While there exist several scheduling techniques for multi-mode dataflow models, no one allows task migration between modes. By observing that the resource requirement can be additionally reduced if task migration is allowed, we propose a multiprocessor scheduling technique of a multi-mode dataflow graph considering task migration between modes. Based on a genetic algorithm, the proposed technique schedules all SDF graphs in all modes simultaneously to minimize the resource requirement. To satisfy the throughput constraint, the proposed technique calculates the actual throughput requirement of each mode and the output buffer size for tolerating throughput jitter. We compare the proposed technique with a method that analyzes SDF graphs in each execution mode separately, a method that does not allow task migration, and a method that does not allow mode-overlapped schedule for synthetic examples and five real applications: H.264 decoder, lane detection, vocoder, MP3 decoder, and printer pipeline.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bouakaz:2017:SPD, author = "Adnan Bouakaz and Pascal Fradet and Alain Girault", title = "A Survey of Parametric Dataflow Models of Computation", journal = j-TODAES, volume = "22", number = "2", pages = "38:1--38:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2999539", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Dataflow models of computation (MoCs) are widely used to design embedded signal processing and streaming systems. Dozens of dataflow MoCs have been proposed in the past few decades. More recently, several parametric dataflow MoCs have been presented as an interesting tradeoff between analyzability and expressiveness. They offer a controlled form of dynamism under the form of parameters (e.g., parametric rates), along with runtime parameter configuration. This survey provides a comprehensive description of the existing parametric dataflow MoCs (constructs, constraints, properties, static analyses) and compares them using a common example. The main objectives are to help designers of streaming applications choose the most suitable model for their needs and pave the way for the design of new parametric MoCs.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bouakaz:2017:SAD, author = "Adnan Bouakaz and Pascal Fradet and Alain Girault", title = "Symbolic Analyses of Dataflow Graphs", journal = j-TODAES, volume = "22", number = "2", pages = "39:1--39:??", month = mar, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3007898", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The synchronous dataflow model of computation is widely used to design embedded stream-processing applications under strict quality-of-service requirements (e.g., buffering size, throughput, input-output latency). The required analyses can either be performed at compile time (for design space exploration) or at runtime (for resource management and reconfigurable systems). However, these analyses have an exponential time complexity, which may cause a huge runtime overhead or make design space exploration unacceptably slow. In this article, we argue that symbolic analyses are more appropriate since they express the system performance as a function of parameters (i.e., input and output rates, execution times). Such functions can be quickly evaluated for each different configuration or checked with respect to different quality-of-service requirements. We provide symbolic analyses for computing the maximal throughput of acyclic synchronous dataflow graphs, the minimum required buffers for which as soon as possible (ASAP) scheduling achieves this throughput, and finally, the corresponding input-output latency of the graph. The article first investigates these problems for a single parametric edge. The results are extended to general acyclic graphs using linear approximation techniques. We assess the proposed analyses experimentally on both synthetic and real benchmarks.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Park:2017:HHC, author = "Jaehyun Park and Seungcheol Baek and Hyung Gyu Lee and Chrysostomos Nicopoulos and Vinson Young and Junghee Lee and Jongman Kim", title = "{HoPE}: Hot-Cacheline Prediction for Dynamic Early Decompression in Compressed {LLCs}", journal = j-TODAES, volume = "22", number = "3", pages = "40:1--40:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2999538", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Data compression plays a pivotal role in improving system performance and reducing energy consumption, because it increases the logical effective capacity of a compressed memory system without physically increasing the memory size. However, data compression techniques incur some cost, such as non-negligible compression and decompression overhead. This overhead becomes more severe if compression is used in the cache. In this article, we aim to minimize the read-hit decompression penalty in compressed Last-Level Caches (LLCs) by speculatively decompressing frequently used cachelines. To this end, we propose a Hot-cacheline Prediction and Early decompression (HoPE) mechanism that consists of three synergistic techniques: Hot-cacheline Prediction (HP), Early Decompression (ED), and Hit-history-based Insertion (HBI). HP and HBI efficiently identify the hot compressed cachelines, while ED selectively decompresses hot cachelines, based on their size information. Unlike previous approaches, the HoPE framework considers the performance balance/tradeoff between the increased effective cache capacity and the decompression penalty. To evaluate the effectiveness of the proposed HoPE mechanism, we run extensive simulations on memory traces obtained from multi-threaded benchmarks running on a full-system simulation framework. We observe significant performance improvements over compressed cache schemes employing the conventional Least-Recently Used (LRU) replacement policy, the Dynamic Re-Reference Interval Prediction (DRRIP) scheme, and the Effective Capacity Maximizer (ECM) compressed cache management mechanism. Specifically, HoPE exhibits system performance improvements of approximately 11\%, on average, over LRU, 8\% over DRRIP, and 7\% over ECM by reducing the read-hit decompression penalty by around 65\%, over a wide range of applications.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tang:2017:PPE, author = "Li Tang and Richard F. Barrett and Jeanine Cook and X. Sharon Hu", title = "{PeaPaw}: Performance and Energy-Aware Partitioning of Workload on Heterogeneous Platforms", journal = j-TODAES, volume = "22", number = "3", pages = "41:1--41:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2999540", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Performance and energy are two major concerns for application development on heterogeneous platforms. It is challenging for application developers to fully exploit the performance/energy potential of heterogeneous platforms. One reason is the lack of reliable prediction of the system's performance/energy before application implementation. Another reason is that a heterogeneous platform presents a large design space for workload partitioning between different processors. To reduce such development cost, this article proposes a framework, PeaPaw, to assist application developers to identify a workload partition (WP) that has high potential leading to high performance or energy efficiency before actual implementation. The PeaPaw framework includes both analytical performance/energy models and two sets of workload partitioning guidelines. Based on the design goal, application developers can obtain a workload partitioning guideline from PeaPaw for a given platform and follow it to design one or multiple WPs for a given workload. Then PeaPaw can be used to estimate the performance/energy of the designed WPs, and the WP with the best estimated performance/energy can be selected for actual implementation. To demonstrate the effectiveness of PeaPaw, we have conducted three case studies. Results from these case studies show that PeaPaw can faithfully estimate the performance/energy relationships of WPs and provide effective workload partitioning guidelines.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2017:CCS, author = "Kun Yang and Domenic Forte and Mark M. Tehranipoor", title = "{CDTA}: a Comprehensive Solution for Counterfeit Detection, Traceability, and Authentication in the {IoT} Supply Chain", journal = j-TODAES, volume = "22", number = "3", pages = "42:1--42:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3005346", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Internet of Things (IoT) is transforming the way we live and work by increasing the connectedness of people and things on a scale that was once unimaginable. However, the vulnerabilities in the IoT supply chain have raised serious concerns about the security and trustworthiness of IoT devices and components within them. Testing for device provenance, detection of counterfeit integrated circuits (ICs) and systems, and traceability of IoT devices are challenging issues to address. In this article, we develop a novel radio-frequency identification (RFID)-based system suitable for counterfeit detection, traceability, and authentication in the IoT supply chain called CDTA. CDTA is composed of different types of on-chip sensors and in-system structures that collect necessary information to detect multiple counterfeit IC types (recycled, cloned, etc.), track and trace IoT devices, and verify the overall system authenticity. Central to CDTA is an RFID tag employed as storage and a channel to read the information from different types of chips on the printed circuit board (PCB) in both power-on and power-off scenarios. CDTA sensor data can also be sent to the remote server for authentication via an encrypted Ethernet channel when the IoT device is deployed in the field. A novel board ID generator is implemented by combining outputs of physical unclonable functions (PUFs) embedded in the RFID tag and different chips on the PCB. A light-weight RFID protocol is proposed to enable mutual authentication between RFID readers and tags. We also implement a secure interchip communication on the PCB. Simulations and experimental results using Spartan 3E FPGAs demonstrate the effectiveness of this system. The efficiency of the radio-frequency (RF) communication has also been verified via a PCB prototype with a printed slot antenna.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2017:GTS, author = "Irith Pomeranz", title = "Generation of Transparent-Scan Sequences for Diagnosis of Scan Chain Faults", journal = j-TODAES, volume = "22", number = "3", pages = "43:1--43:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3007207", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Diagnosis of scan chain faults is important for yield learning and improvement. Procedures that generate tests for diagnosis of scan chain faults produce scan-based tests with one or more functional capture cycles between a scan-in and a scan-out operation. The approach to test generation referred to as transparent-scan has several advantages in this context. (1) It allows functional capture cycles and scan shift cycles to be interleaved arbitrarily. This increases the flexibility to assign to the scan cells values that are needed for diagnosis. (2) Test generation under transparent-scan considers a circuit model where the scan logic is included explicitly. Consequently, the test generation procedure takes into consideration the full effect of a scan chain fault. It thus produces accurate tests. (3) For the same reason, it can also target faults inside the scan logic. (4) Transparent-scan results in compact test sequences. Compaction is important because of the large volumes of fail data that scan chain faults create. The cost of transparent-scan is that it requires simulation procedures for sequential circuits, and that arbitrary sequences would be applicable to the scan select input. Motivated by the advantages of transparent-scan, and the importance of diagnosing scan chain faults, this article describes a procedure for generating transparent-scan sequences for diagnosis of scan chain faults. The procedure is also applied to produce transparent-scan sequences for diagnosis of faults inside the scan logic.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vatanparvar:2017:ASR, author = "Korosh Vatanparvar and Mohammad Abdullah {Al Faruque}", title = "Application-Specific Residential Microgrid Design Methodology", journal = j-TODAES, volume = "22", number = "3", pages = "44:1--44:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3007206", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In power systems, the traditional, non-interactive, and manually controlled power grid has been transformed to a cyber-dominated smart grid. This cyber-physical integration has provided the smart grid with communication, monitoring, computation, and controlling capabilities to improve its reliability, energy efficiency, and flexibility. A microgrid is a localized and semi-autonomous group of smart energy systems that utilizes the above-mentioned capabilities to drive modern technologies such as electric vehicle charging, home energy management, and smart appliances. Design, upgrading, test, and verification of these microgrids can get too complicated to handle manually. The complexity is due to the wide range of solutions and components that are intended to address the microgrid problems. This article presents a novel Model-Based Design (MBD) methodology to model, co-simulate, design, and optimize microgrid and its multi-level controllers. This methodology helps in the design, optimization, and validation of a microgrid for a specific application. The application rules, requirements, and design-time constraints are met in the designed/optimized microgrid while the implementation cost is minimized. Based on our novel methodology, a design automation, co-simulation, and analysis tool, called GridMAT, is implemented. Our experiments have illustrated that implementing a hierarchical controller reduces the average power consumption by 8\% and shifts the peak load for cost saving. Moreover, optimizing the microgrid design using our MBD methodology considering smart controllers has decreased the total implementation cost. Compared to the conventional methodology, the cost decreases by 14\% and compared to the MBD methodology where smart controllers are not considered, it decreases by 5\%.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2017:LAE, author = "Jin-Tai Yan", title = "Layer Assignment of Escape Buses with Consecutive Constraints in {PCB} Designs", journal = j-TODAES, volume = "22", number = "3", pages = "45:1--45:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3012010", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "It is important for cost and reliability consideration to minimize the number of the used layers in a PCB design. In this article, given a set of n circular escape buses with their escape directions between two adjacent components and a set of m consecutive constraints on the escape buses, the problem of assigning the given escape buses between two adjacent components onto the minimized layers is first formulated for bus-oriented escape routing. Furthermore, an efficient approach is proposed to minimize the number of the used layers for the given escape buses with the consecutive constraints and assign the escape buses onto the available layers. Compared with Yan's approach [Yan and Chen 2012] for the layer assignment of the linear escape buses with no consecutive constraint and Ma's approach [Ma et al. 2011a] for the layer assignment of the circular escape buses with consecutive constraints, the experimental results show that the proposed approach obtains the same optimal results on the number of the used layers and reduces 43.6\% and 90.5\% of CPU time for the tested examples on the average, respectively.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Peng:2017:LSA, author = "Yin-Chi Peng and Chien-Chih Chen and Hsiang-Jen Tsai and Keng-Hao Yang and Pei-Zhe Huang and Shih-Chieh Chang and Wen-Ben Jone and Tien-Fu Chen", title = "{Leak Stopper}: an Actively Revitalized Snoop Filter Architecture with Effective Generation Control", journal = j-TODAES, volume = "22", number = "3", pages = "46:1--46:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3015770", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "To alleviate high energy dissipation of unnecessary snooping accesses, snoop filters have been designed to reduce snoop lookups. These filters have the problem of decreasing filtering efficiency, and thus usually rely on partial or whole filter reset by detecting block evictions. Unfortunately, the reset conditions occur infrequently or unevenly (called passive filter deletion). This work proposes the concept of revitalized snoop filter (RSF) design, which can actively renew the destination filter by employing a generation wrapping-around scheme for various reference behaviors. We further utilize a sampling mechanism for RSF to timely trigger precise filter revitalizations, so that unnecessary RSF flushing can be minimized. The proposed RSF can be integrated to various existent inclusive snoop filters with only a minor change to their designs. We evaluate our proposed design and demonstrate that RSF eliminates 58.6\% of snoop energy compared to JETTY on average while inducing only 6.5\% of revitalization energy overhead. In addition, RSF eliminates 45.5\% of snoop energy compared to stream registers on average and only induces 2.5\% of revitalization energy overhead. Overall, these RSFs reduce the total L2 cache energy consumption by 52.1\% (58.6\% --- 6.5\%) as compared to JETTY and by 43\% (45.5\% --- 2.5\%) as compared to stream registers. Furthermore, RSF improves the overall performance by 1\% to 1.4\% on average compared to JETTY and stream registers for various benchmark suites.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shi:2017:TAA, author = "Guoyong Shi and Hanbin Hu and Shuwen Deng", title = "Topological Approach to Automatic Symbolic {Macromodel} Generation for Analog Integrated Circuits", journal = j-TODAES, volume = "22", number = "3", pages = "47:1--47:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3015782", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the field of analog integrated circuit (IC) design, small-signal macromodels play indispensable roles for developing design insight and sizing reference. However, the subject of automatically generating symbolic low-order macromodels in human readable circuit form has not been well studied. Traditionally, work has been published on reducing full-scale symbolic transfer functions to simpler forms but without the guarantee of interpretability. On the other hand, methodologies developed for interconnect circuits (mainly resistor-capacitor-inductor (RCL) networks) are not suitable for analog ICs. In this work, a topological reduction method is introduced that is able to automatically generate interpretable macromodel circuits in symbolic form; that is, the circuit elements in the compact model maintain analytical relations of the parameters of the original full circuit. This type of symbolic macromodel has several benefits that other traditional modeling methods do not offer: First, reusability, namely that designer need not repeatedly generate macromodels for the same circuit even it is re-sized or re-biased; second, interpretability, namely a designer may directly identify circuit parameters (in the original circuit) that are closely related to the dominant frequency characteristics, such as dc gain, gain/phase margins, and dominant poles/zeros. The effectiveness and computational efficiency of the proposed method have been validated by several operational amplifier (opamp) circuit examples.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Han:2017:CAB, author = "Miseon Han and Youngsun Han and Seon Wook Kim and Hokyoon Lee and Il Park", title = "Content-Aware Bit Shuffling for Maximizing {PCM} Endurance", journal = j-TODAES, volume = "22", number = "3", pages = "48:1--48:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3017445", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recently, phase change memory (PCM) has been emerging as a strong replacement for DRAM owing to its many advantages such as nonvolatility, high capacity, low leakage power, and so on. However, PCM is still restricted for use as main memory because of its limited write endurance. There have been many methods introduced to resolve the problem by either reducing or spreading out bit flips. Although many previous studies have significantly contributed to reducing bit flips, they still have the drawback that lower bits are flipped more often than higher bits because the lower bits frequently change their bit values. Also, interblock wear-leveling schemes are commonly employed for spreading out bit flips by shifting input data, but they increase the number of bit flips per write. In this article, we propose a noble content-aware bit shuffling (CABS) technique that minimizes bit flips and evenly distributes them to maximize the lifetime of PCM at the bit level. We also introduce two additional optimizations, namely, addition of an inversion bit and use of an XOR key, to further reduce bit flips. Moreover, CABS is capable of recovering from stuck-at faults by restricting the change in values of stuck-at cells. Experimental results showed that CABS outperformed the existing state-of-the-art methods in the aspect of PCM lifetime extension with minimal overhead. CABS achieved up to 48.5\% enhanced lifetime compared to the data comparison write (DCW) method only with a few metadata bits. Moreover, CABS obtained approximately 9.7\% of improved write throughput than DCW because it significantly reduced bit flips and evenly distributed them. Also, CABS reduced about 5.4\% of write dynamic energy compared to DCW. Finally, we have also confirmed that CABS is fully applicable to BCH codes as it was able to reduce the maximum number of bit flips in metadata cells by 32.1\%.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Saha:2017:SSS, author = "Shamik Saha and Prabal Basu and Chidhambaranathan Rajamanikkam and Aatreyi Bal and Koushik Chakraborty and Sanghamitra Roy", title = "{SSAGA}: {SMs} Synthesized for Asymmetric {GPGPU} Applications", journal = j-TODAES, volume = "22", number = "3", pages = "49:1--49:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3014163", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The emergence of GPGPU applications, bolstered by flexible GPU programming platforms, has created a tremendous challenge in maintaining high energy efficiency in modern GPUs. In this article, we demonstrate that customizing a Streaming Multiprocessor (SM) of a GPU at a lower frequency is significantly more energy efficient compared to employing DVFS on an SM designed for a high-frequency operation. Using a system-level CAD technique, we propose SSAGA-Streaming Multiprocessors Synthesized for Asymmetric GPGPU Applications -an energy-efficient GPU design paradigm. SSAGA creates architecturally identical SM cores, customized for different voltage-frequency domains. Our rigorous cross-layer methodology demonstrates an average of 20\% improvement in energy efficiency over a spatially multitasking GPU across a range of GPGPU applications.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lu:2017:LPC, author = "Tiantao Lu and Ankur Srivastava", title = "Low-Power Clock Tree Synthesis for {$3$D-ICs}", journal = j-TODAES, volume = "22", number = "3", pages = "50:1--50:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3019610", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose efficient algorithms to construct a low-power clock tree for through-silicon-via (TSV)-based 3D-ICs. We use shutdown gates to save clock trees' dynamic power, which selectively turn off certain clock tree branches to avoid unnecessary clock activities when the modules in these tree branches are inactive. While this clock gating technique has been extensively studied in 2D circuits, its application in 3D-ICs is unclear. In 3D-ICs, a shutdown gate is connected to a control signal unit through control TSVs, which may cause placement conflicts with existing clock TSVs in the layout due to TSV's large physical dimension. We develop a two-phase clock tree synthesis design flow for 3D-ICs: (1) 3D abstract clock tree generation based on K-means clustering and (2) clock tree embedding with simultaneous shutdown gates' insertion based on simulated annealing (SA) and a force-directed TSV placer. Experimental results indicate that (1) the K-means clustering heuristic significantly reduces the clock power by clustering modules with similar switching behavior and close proximity, and (2) the SA algorithm effectively inserts the shutdown gates to a 3D clock tree, while considering control TSV's placement. Compared with previous 3D clock tree synthesis techniques, our K-means clustering-based approach achieves larger reduction in clock tree power consumption while ensuring zero clock skew.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2017:TPT, author = "Woojoo Lee and Kyuseung Han and Yanzhi Wang and Tiansong Cui and Shahin Nazarian and Massoud Pedram", title = "{TEI}-power: Temperature Effect Inversion-Aware Dynamic Thermal Management", journal = j-TODAES, volume = "22", number = "3", pages = "51:1--51:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3019941", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "FinFETs have emerged as a promising replacement for planar CMOS devices in sub-20nm technology nodes. However, based on the temperature effect inversion (TEI) phenomenon observed in FinFET devices, the delay characteristics of FinFET circuits in sub-, near-, and superthreshold voltage regimes may be fundamentally different from those of CMOS circuits with nominal voltage operation. For example, FinFET circuits may run faster in higher temperatures. Therefore, the existing CMOS-based and TEI-unaware dynamic power and thermal management techniques would not be applicable. In this article, we present TEI-power, a dynamic voltage and frequency scaling--based dynamic thermal management technique that considers the TEI phenomenon and also the superlinear dependencies of power consumption components on the temperature and outlines a real-time trade-off between delay and power consumption as a function of the chip temperature to provide significant energy savings, with no performance penalty-namely, up to 42\% energy savings for small circuits where the logic cell delay is dominant and up to 36\% energy savings for larger circuits where the interconnect delay is considerable.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2017:UCP, author = "Yongje Lee and Jinyong Lee and Ingoo Heo and Dongil Hwang and Yunheung Paek", title = "Using {CoreSight PTM} to Integrate {CRA} Monitoring {IPs} in an {ARM}-Based {SoC}", journal = j-TODAES, volume = "22", number = "3", pages = "52:1--52:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3035965", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The ARM CoreSight Program Trace Macrocell (PTM) has been widely deployed in recent ARM processors for real-time debugging and tracing of software. Using PTM, the external debugger can extract execution behaviors of applications running on an ARM processor. Recently, some researchers have been using this feature for other purposes, such as fault-tolerant computation and security monitoring. This motivated us to develop an external security monitor that can detect control hijacking attacks, of which the goal is to maliciously manipulate the control flow of victim applications at an attacker's disposal. This article focuses on detecting a special type of attack called code reuse attacks (CRA), which use a recently introduced technique that allows attackers to perform arbitrary computation without injecting their code by reusing only existing code fragments. Our external monitor is attached to the outside of the host system via the system bus and ARM CoreSight PTM, and is fed with execution traces of a victim application running on the host. As a majority of CRAs violates the normal execution behaviors of a program, our monitor constantly watches and analyzes the execution traces of the victim application and detects a symptom of attacks when the execution behaviors violate certain rules that normal applications are known to adhere. We present two different implementations for this purpose: a hardware-based solution in which all CRA detection components are implemented in hardware, and a hardware/software mixed solution that can be employed in a more resource-constrained environment where the deployment of full hardware-level CRA detection is burdensome.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xue:2017:FCT, author = "Yuankun Xue and Ji Li and Shahin Nazarian and Paul Bogdan", title = "Fundamental Challenges Toward Making the {IoT} a Reachable Reality: a Model-Centric Investigation", journal = j-TODAES, volume = "22", number = "3", pages = "53:1--53:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3001934", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Constantly advancing integration capability is paving the way for the construction of the extremely large scale continuum of the Internet where entities or things from vastly varied domains are uniquely addressable and interacting seamlessly to form a giant networked system of systems known as the Internet-of-Things (IoT). In contrast to this visionary networked system paradigm, prior research efforts on the IoT are still very fragmented and confined to disjoint explorations of different applications, architecture, security, services, protocol, and economical domains, thus preventing design exploration and optimization from a unified and global perspective. In this context, this survey article first proposes a mathematical modeling framework that is rich in expressivity to capture IoT characteristics from a global perspective. It also sets forward a set of fundamental challenges in sensing, decentralized computation, robustness, energy efficiency, and hardware security based on the proposed modeling framework. Possible solutions are discussed to shed light on future development of the IoT system paradigm.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Guo:2017:OBP, author = "Zimu Guo and Jia Di and Mark M. Tehranipoor and Domenic Forte", title = "Obfuscation-Based Protection Framework against Printed Circuit Boards Unauthorized Operation and Reverse Engineering", journal = j-TODAES, volume = "22", number = "3", pages = "54:1--54:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3035482", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Printed circuit boards (PCBs) are a basic necessity for all modern electronic systems but are becoming increasingly vulnerable to cloning, overproduction, tampering, and unauthorized operation. Most efforts to prevent such attacks have only focused on the chip level, leaving a void for PCBs and higher levels of abstraction. In this article, we propose the first ever obfuscation-based framework for the protection of PCBs. Central to our approach is a permutation block that hides the inter-chip connections between chips on the PCB and is controlled by a key. If the correct key is applied, then the correct connections between chips are made. Otherwise, the connections are incorrectly permuted, and the PCB/system fails to operate. We propose a permutation network added to the PCB based on a Benes network that can easily be implemented in a complex programmable logic device or field-programmable gate arrays. Based on this implementation, we analyze the security of our approach with respect to (i) brute-force attempts to reverse engineer the PCB, (ii) brute-force attempts at guessing the correct key, and (iii) physical and logistic attacks by a range of adversaries. Performance evaluation results on 12 reference designs show that brute force generally requires prohibitive time to break the obfuscation. We also provide detailed requirements for countermeasures that prevent reverse engineering, unauthorized operation, and so on, for different classes of attackers.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Torabi:2017:FHA, author = "Mohammad Torabi and Lihong Zhang", title = "A Fast Hierarchical Adaptive Analog Routing Algorithm Based on Integer Linear Programming", journal = j-TODAES, volume = "22", number = "3", pages = "55:1--55:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3035464", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The shrinking design window and high parasitic sensitivity in advanced technologies have imposed special challenges on analog and radio frequency (RF) integrated circuit design. The state-of-the-art analog routing research tends to favor linear programming to achieve various analog constraints, which, although effective, fail to offer high routing efficiency on its own. In this article, we propose a new methodology to address such a deficiency based on integer linear programming (ILP) but without compromising the capability of handling any special constraints for the analog routing problems. Our proposed method supports hierarchical routing, which can divide the entire routing area into multiple small heterogeneous regions where the ILP can efficiently derive routing solutions. Distinct from the conventional methods, our algorithm utilizes adaptive resolutions for various routing regions. For a more congested region, a routing grid with higher resolution is employed, whereas a lower-resolution grid is adopted to a less-crowded routing region. For a large empty space, routing efficiency can be even boosted by creating more routing hierarchy levels. This scheme is especially beneficial to the analog and RF layouts, which are far sparser than their digital counterparts. The experimental results show that our proposed adaptive ILP-based router is much faster than the conventional ones, since it spends much less time in the areas that need no accurate routing anyway. The higher efficiency is demonstrated for large circuits and especially sparse layouts along with promising routing quality in terms of analog constraints.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Song:2017:STV, author = "Yang Song and Kambiz Samadi and Bill Lin", title = "A Single-Tier Virtual Queuing Memory Controller Architecture for Heterogeneous {MPSoCs}", journal = j-TODAES, volume = "22", number = "3", pages = "56:1--56:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3035481", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Heterogeneous MPSoCs typically integrate diverse cores, including application CPUs, GPUs, and HD coders. These cores commonly share an off-chip memory to save cost and energy, but their memory accesses often interfere with each other, leading to undesirable consequences like a slowdown of application performance or a failure to sustain real-time performance. The memory controller plays a central role in meeting the QoS needs of real-time cores while maximizing CPU performance. Previous QoS-aware memory controllers are based on a classic two-tier queuing architecture that buffers memory transactions at the first tier, followed by a second tier that buffers translated DRAM commands. In these designs, QoS-aware policies are used to schedule competing transactions at the first stage, but the translated DRAM commands are served in FIFO order at the second stage. Unfortunately, once the scheduled transactions have been forwarded to the command stage, newly arriving transactions that may be more critical cannot be served ahead of those translated commands that are already queued at the second stage. To address this, we propose a scalable memory controller architecture based on single-tier virtual queuing (STVQ) that maintains a single tier of request queues and employs an efficacious scheduler that considers both QoS requirements and DRAM bank states. In comparison with previous QoS-aware memory controllers, the proposed STVQ memory controller reduces CPU slowdown by up to 13.9\% while satisfying all frame rate requirements. We propose further optimizations that can significantly increase row-buffer hits by up to 66.2\% and reduce memory latency by up to 19.8\%.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2017:ASE, author = "Ji Li and Jeffrey Draper", title = "Accelerated Soft-Error-Rate {(SER)} Estimation for Combinational and Sequential Circuits", journal = j-TODAES, volume = "22", number = "3", pages = "57:1--57:??", month = may, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3035496", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 21 10:49:30 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Radiation-induced soft errors have posed an increasing reliability challenge to combinational and sequential circuits in advanced CMOS technologies. Therefore, it is imperative to devise fast, accurate and scalable soft error rate (SER) estimation methods as part of cost-effective robust circuit design. This paper presents an efficient SER estimation framework for combinational and sequential circuits, which considers single-event transients (SETs) in combinational logic and multiple cell upsets (MCUs) in sequential elements. A novel top-down memoization algorithm is proposed to accelerate the propagation of SETs, and a general schematic and layout co-simulation approach is proposed to model the MCUs for redundant sequential storage structures. The feedback in sequential logic is analyzed with an efficient time frame expansion method. Experimental results on various ISCAS85 combinational benchmark circuits demonstrate that the proposed approach achieves up to 560.2X times speedup with less than 3\% difference in terms of SER results compared with the baseline algorithm. The average runtime of the proposed framework on a variety of ISCAS89 benchmark circuits is 7.20s, and the runtime is 119.23s for the largest benchmark circuit with more than 3,000 flip-flops and 17,000 gates.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2017:EEE, author = "Kaige Yan and Lu Peng and Mingsong Chen and Xin Fu", title = "Exploring Energy-Efficient Cache Design in Emerging Mobile Platforms", journal = j-TODAES, volume = "22", number = "4", pages = "58:1--58:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2843940", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Mobile devices are quickly becoming the most widely used processors in consumer devices. Since their major power supply is battery, energy-efficient computing is highly desired. In this article, we focus on energy-efficient cache design in emerging mobile platforms. We observe that more than 40\% of L2 cache accesses are OS kernel accesses in interactive smartphone applications. Such frequent kernel accesses cause serious interferences between the user and kernel blocks in the L2 cache, leading to unnecessary block replacements and high L2 cache miss rate. We first propose to statically partition the L2 cache into two separate segments, which can be accessed only by the user code and kernel code, respectively. Meanwhile, the overall size of the two segments is shrunk, which reduces the energy consumption while still maintaining the similar cache miss rate. We then find completely different access behaviors between the two separated kernel and user segments and explore the multi-retention STT-RAM-based user and kernel segments to obtain higher energy savings in this static partition-based cache design. Finally, we propose to dynamically partition the L2 cache into the user and kernel segments to minimize overall cache size. We also integrate the short-retention STT-RAM into this dynamic partition-based cache design for maximal energy savings. The experimental results show that our static technique reduces cache energy consumption by 75\% with 2\% performance loss, and our dynamic technique further shows strong capability to reduce cache energy consumption by 85\% with only 3\% performance loss.", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kim:2017:SBS, author = "Taehyun Kim and Jongbum Lim and Jinku Kim and Woo-Cheol Cho and Eui-Young Chung and Hyuk-Jun Lee", title = "Scalable Bandwidth Shaping Scheme via Adaptively Managed Parallel Heaps in Manycore-Based Network Processors", journal = j-TODAES, volume = "22", number = "4", pages = "59:1--59:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3065926", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scalability of network processor-based routers heavily depends on limitations imposed by memory accesses and associated power consumption. Bandwidth shaping of a flow is a key function, which requires a token bucket per output queue and abuses memory bandwidth. As the number of output queues increases, managing token buckets becomes prohibitively expensive and limits scalability. In this work, we propose a scalable software-based token bucket management scheme that can reduce memory accesses and power consumption significantly. To satisfy real-time and low-cost constraints, we propose novel parallel heap data structures running on a manycore-based network processor. By using cache locking, the performance of heap processing is enhanced significantly and is more predictable. In addition, we quantitatively analyze the performance and memory footprint of the proposed software scheme using stochastic modeling and the Lyapunov central limit theorem. Finally, the proposed scheme provides an adaptive method to limit the size of heaps in the case of oversubscribed queues, which can successfully isolate the queues showing unideal behavior. The proposed scheme reduces memory accesses by up to three orders of magnitude for one million queues sharing a 100Gbps interface of the router while maintaining stability under stressful scenarios.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Agrawal:2017:OSA, author = "Prabhav Agrawal and Mike Broxterman and Biswadeep Chatterjee and Patrick Cuevas and Kathy H. Hayashi and Andrew B. Kahng and Pranay K. Myana and Siddhartha Nath", title = "Optimal Scheduling and Allocation for {IC} Design Management and Cost Reduction", journal = j-TODAES, volume = "22", number = "4", pages = "60:1--60:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3035483", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A large semiconductor product company spends hundreds of millions of dollars each year on design infrastructure to meet tapeout schedules for multiple concurrent projects. Resources (servers, electronic design automation tool licenses, engineers, and so on) are limited and must be shared --- and the cost per day of schedule slip can be enormous. Co-constraints between resource types (e.g., one license per every two cores (threads)) and dedicated versus shareable resource pools make scheduling and allocation hard. In this article, we formulate two mixed integer-linear programs for optimal multi-project, multi-resource allocation with task precedence and resource co-constraints. Application to a real-world three-project scheduling problem extracted from a leading-edge design center of anonymized Company X shows substantial compute and license costs savings. Compared to the product company, our solution shows that the makespan of schedule of all projects can be reduced by seven days, which not only saves $\approx$2.7\% of annual labor and infrastructure costs but also enhances market competitiveness. We also demonstrate the capability of scheduling over two dozen chip development projects at the design center level, subject to resource and datacenter capacity limits as well as per-project penalty functions for schedule slips. The design center ended up purchasing 600 additional servers, whereas our solution demonstrates that the schedule can be met without having to purchase any additional servers. Application to a four-project scheduling problem extracted from a leading-edge design center in a non-US location shows availability of up to $\approx$37\% headcount reduction during a half-year schedule for just one type of chip design activity.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Isenberg:2017:PCH, author = "Tobias Isenberg and Marco Platzner and Heike Wehrheim and Tobias Wiersema", title = "Proof-Carrying Hardware via Inductive Invariants", journal = j-TODAES, volume = "22", number = "4", pages = "61:1--61:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3054743", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Proof-carrying hardware (PCH) is a principle for achieving safety for dynamically reconfigurable hardware systems. The producer of a hardware module spends huge effort when creating a proof for a safety policy. The proof is then transferred as a certificate together with the configuration bitstream to the consumer of the hardware module, who can quickly verify the given proof. Previous work utilized SAT solvers and resolution traces to set up a PCH technology and corresponding tool flows. In this article, we present a novel technology for PCH based on inductive invariants. For sequential circuits, our approach is fundamentally stronger than the previous SAT-based one since we avoid the limitations of bounded unrolling. We contrast our technology to existing ones and show that it fits into previously proposed tool flows. We conduct experiments with four categories of benchmark circuits and report consumer and producer runtime and peak memory consumption, as well as the size of the certificates and the distribution of the workload between producer and consumer. Experiments clearly show that our new induction-based technology is superior for sequential circuits, whereas the previous SAT-based technology is the better choice for combinational circuits.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bonetti:2017:AID, author = "Andrea Bonetti and Nicholas Preyss and Adam Teman and Andreas Burg", title = "Automated Integration of Dual-Edge Clocking for Low-Power Operation in Nanometer Nodes", journal = j-TODAES, volume = "22", number = "4", pages = "62:1--62:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3054744", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Clocking power, including both clock distribution and registers, has long been one of the primary factors in the total power consumption of many digital systems. One straightforward approach to reduce this power consumption is to apply dual-edge-triggered (DET) clocking, as sequential elements operate at half the clock frequency while maintaining the same throughput as with conventional single-edge-triggered (SET) clocking. However, the DET approach is rarely taken in modern integrated circuits, primarily due to the perceived complexity of integrating such a clocking scheme. In this article, we first identify the most promising conditions for achieving low-power operation with DET clocking and then introduce a fully automated design flow for applying DET to a conventional SET design. The proposed design flow is demonstrated on three benchmark circuits in a 40nm CMOS technology, providing as much as a 50\% reduction in clock distribution and register power consumption.", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2017:DMF, author = "Katherine Shu-Min Li and Sying-Jyan Wang", title = "Design Methodology of Fault-Tolerant Custom {$3$D} Network-on-Chip", journal = j-TODAES, volume = "22", number = "4", pages = "63:1--63:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3054745", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A systematic design methodology is presented for custom Network-on-Chip (NoC) in three-dimensional integrated circuits (3D-ICs). In addition, fault tolerance is supported in the NoC if extra links are included in the NoC topology. In the proposed method, processors and the communication architecture are synthesized simultaneously in the 3D floorplanning process. 3D-IC technology enables ICs to be implemented in smaller size with higher performance; on the flip side, 3D-ICs suffer yield loss due to multiple dies in a 3D stack and lower manufacturing yield of through-silicon vias (TSVs). To alleviate this problem, a known-good-dies (KGD) test can be applied to ensure every die to be packaged into a 3D-IC is fault-free. However, faulty TSVs cannot be tested in the KGD test. In this article, the proposed method deals with the problem by providing fault tolerance in the NoC topology. The efficiency of the proposed method is evaluated using several benchmark circuits, and the experimental results show that the proposed method produces 3D NoCs with comparable performance than previous methods when fault-tolerant features are not realized. With fault tolerance in NoCs, higher yield can be achieved at the cost of performance penalty and elevated power level.", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pagliari:2017:AEE, author = "Daniele Jahier Pagliari and Enrico Macii and Massimo Poncino", title = "Approximate Energy-Efficient Encoding for Serial Interfaces", journal = j-TODAES, volume = "22", number = "4", pages = "64:1--64:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3041220", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Serial buses are ubiquitous interconnections in embedded computing systems that are used to interface processing elements with peripherals, such as sensors, actuators, and I/O controllers. Despite their limited wiring, as off-chip connections they can account for a significant amount of the total power consumption of a system-on-chip device. Encoding the information sent on these buses is the most intuitive and affordable way to reduce their power contribution; moreover, the encoding can be made even more effective by exploiting the fact that many embedded applications can tolerate intermediate approximations without a significant impact on the final quality of results, thus trading off accuracy for power consumption. We propose a simple yet very effective approximate encoding for reducing dynamic energy in serial buses. Our approach uses differential encoding as a baseline scheme and extends it with bounded approximations to overcome the intrinsic limitations of differential encoding for data with low temporal correlation. We show that the proposed scheme, in addition to yielding extremely compact codecs, is superior to all state-of-the-art approximate serial encodings over a wide set of traces representing data received or sent from/to sensor or actuators.", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Schafer:2017:PHL, author = "Benjamin Carrion Schafer", title = "Parallel High-Level Synthesis Design Space Exploration for Behavioral {IPs} of Exact Latencies", journal = j-TODAES, volume = "22", number = "4", pages = "65:1--65:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3041219", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This works presents a Design Space Exploration (DSE) method for Behavioral IPs (BIPs) given in ANSI-C or SystemC to find the smallest micro-architecture for a specific target latency. Previous work on High-Level Synthesis (HLS) DSE mainly focused on finding a tradeoff curve with Pareto-optimal designs. HLS is, however, a single process (component) synthesis method. Very often, the latency of the components requires a specific fixed latency when inserted within a larger system. This work presents a fast multi-threaded method to find the smallest micro-architecture for a given BIP and target latency by discriminating between all different exploration knobs and exploring these concurrently. Experimental results show that our proposed method is very effective and comprehensive results compare the quality of results vs. the speedup of your proposed explorer.", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Moudallal:2017:GCC, author = "Zahi Moudallal and Farid N. Najm", title = "Generating Current Constraints to Guarantee {RLC} Power Grid Safety", journal = j-TODAES, volume = "22", number = "4", pages = "66:1--66:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3054746", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A critical task during early chip design is the efficient verification of the chip power distribution network. Vectorless verification, developed since the mid-2000s as an alternative to traditional simulation-based methods, requires the user to specify current constraints (budgets) for the underlying circuitry and checks if the corresponding voltage variations on all grid nodes are within a user-specified margin. This framework is extremely powerful, as it allows for efficient and early verification, but specifying/obtaining current constraints remains a burdensome task for users and a hurdle to adoption of this framework by the industry. Recently, the inverse problem has been introduced: Generate circuit current constraints that, if satisfied by the underlying logic circuitry, would guarantee grid safety from excessive voltage variations. This approach has many potential applications, including various grid quality metrics, as well as voltage drop-aware placement and floorplanning. So far, this framework has been developed assuming only resistive and capacitive (RC) elements in the power grid model. Inductive effects are becoming a significant component of the power supply noise and can no longer be ignored. In this article, we extend the constraints generation approach to allow for inductance. We give a rigorous problem definition and develop some key theoretical results related to maximality of the current space defined by the constraints. Based on this, we then develop three constraints generation algorithms that target the peak total chip power that is allowed by the grid, the uniformity of current distribution across the die area, and a combination of both metrics.", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2017:TMR, author = "Irith Pomeranz and M. Enamul Amyeen and Srikanth Venkataraman", title = "Test Modification for Reduced Volumes of Fail Data", journal = j-TODAES, volume = "22", number = "4", pages = "67:1--67:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3065925", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As part of a yield improvement process, fail data is collected from faulty units. Several approaches exist for reducing the tester time and the volume of fail data that needs to be collected based on the observation that a subset of the fail data is sufficient for accurate defect diagnosis. This article addresses the volume of fail data by considering the test set that is used for collecting fail data. It observes that certain faults from a set of target faults produce significantly larger numbers of faulty output values (and therefore significantly larger volumes of fail data) than other faults under a given test set. Based on this observation, it describes a procedure for modifying the test set to reduce the maximum number of faulty output values that a target fault produces. When defects are considered in a simulation experiment, and a defect diagnosis procedure is applied to the fail data that they produce, two effects are observed: the maximum and average numbers of faulty output values per defect are reduced significantly with the modified test set, and the quality of diagnosis is similar or even improved with the modified test set.", acknowledgement = ack-nhfb, articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2017:MSS, author = "Ya Wang and Di Gao and Dani Tannir and Ning Dong and G. Peter Fang and Wei Dong and Peng Li", title = "Multiharmonic Small-Signal Modeling of Low-Power {PWM} {DC-DC} Converters", journal = j-TODAES, volume = "22", number = "4", pages = "68:1--68:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3057274", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Small-signal models of pulse-width modulation (PWM) converters are widely used for analyzing stability and play an important role in converter design and control. However, existing small-signal models either are based on averaged DC behaviors, and hence are unable to capture frequency responses that are faster than the switching frequency, or greatly approximate these high-frequency responses. We address the severe limitations of the existing models by proposing a multiharmonic model that provides a complete small-signal characterization of both DC averages and high-order harmonic responses. The proposed model captures important high-frequency overshoots and undershoots of the converter response, which are otherwise unaccounted for by the existing techniques. In two converter examples, the proposed model corrects the misleading results of the existing models by providing truthful characterization of the overall converter AC response and offers important guidance for converter design and closed-loop control.", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Albalawi:2017:TFP, author = "Hassan Albalawi and Yuanning Li and Xin Li", title = "Training Fixed-Point Classifiers for On-Chip Low-Power Implementation", journal = j-TODAES, volume = "22", number = "4", pages = "69:1--69:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3057275", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we develop several novel algorithms to train classifiers that can be implemented on chip with low-power fixed-point arithmetic with extremely small word length. These algorithms are based on Linear Discriminant Analysis (LDA), Support Vector Machine (SVM), and Logistic Regression (LR), and are referred to as LDA-FP, SVM-FP, and LR-FP, respectively. They incorporate the nonidealities (i.e., rounding and overflow) associated with fixed-point arithmetic into the offline training process so that the resulting classifiers are robust to these nonidealities. Mathematically, LDA-FP, SVM-FP, and LR-FP are formulated as mixed integer programming problems that can be robustly solved by the branch-and-bound methods described in this article. Our numerical experiments demonstrate that LDA-FP, SVM-FP, and LR-FP substantially outperform the conventional approaches for the emerging biomedical applications of brain decoding.", acknowledgement = ack-nhfb, articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hoveida:2017:EMA, author = "Mohaddeseh Hoveida and Fatemeh Aghaaliakbari and Ramin Bashizade and Mohammad Arjomand and Hamid Sarbazi-Azad", title = "Efficient Mapping of Applications for Future Chip-Multiprocessors in Dark Silicon Era", journal = j-TODAES, volume = "22", number = "4", pages = "70:1--70:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3055202", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The failure of Dennard scaling has led to the utilization wall that is the source of dark silicon and limits the percentage of a chip that can actively switch within a given power budget. To address this issue, a structure is needed to guarantee the limited power budget along with providing sufficient flexibility and performance for different applications with various communication requirements. In this article, we present a general-purpose platform for future many-core Chip-Multiprocessors (CMPs) that benefits from the advantages of clustering, Network-on-Chip (NoC) resource sharing among cores, and power gating the unused components of clusters. We also propose two task mapping methods for the proposed platform in which active and dark cores are dispersed appropriately, so that an excess of power budget can be obtained. Our evaluations reveal that the first and second proposed mapping mechanisms respectively reduce the execution time by up to 28.6\% and 39.2\% and the NoC power consumption by up to 11.1\% and 10\%, and gain an excess power budget of up to 7.6\% and 13.4\% over the baseline architecture.", acknowledgement = ack-nhfb, articleno = "70", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Saha:2017:STS, author = "Sangeet Saha and Arnab Sarkar and Amlan Chakrabarti", title = "Spatio-Temporal Scheduling of Preemptive Real-Time Tasks on Partially Reconfigurable Systems", journal = j-TODAES, volume = "22", number = "4", pages = "71:1--71:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3056561", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Reconfigurable devices that promise to offer the twin benefits of flexibility as in general-purpose processors along with the efficiency of dedicated hardwares often provide a lucrative solution for many of today's highly complex real-time embedded systems. However, online scheduling of dynamic hard real-time tasks on such systems with efficient resource utilization in terms of both space and time poses an enormously challenging problem. We attempt to solve this problem using a combined offline-online approach. The offline component generates and stores various optional feasible placement solutions for different sub-sets of tasks that may possibly be co-mapped together. Given a set of periodic preemptive real-time tasks that requires to be executed at runtime, the online scheduler first carries out an admission control procedure and then produces a schedule, which is guaranteed to meet all timing constraints provided it is spatially feasible to place designated subsets of these tasks at specified scheduling points within a future time interval. These feasibility checks are done and actual placement solutions are obtained through a low overhead search of the statically precomputed placement solutions. Based on this approach, we have proposed a periodic preemptive real-time scheduling methodology for runtime partially reconfigurable devices. Effectiveness of the proposed strategy has been verified through simulation based experiments and we observed that the strategy achieves high resource utilization with low task rejection rates over various simulation scenarios.", acknowledgement = ack-nhfb, articleno = "71", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Abella:2017:MBW, author = "Jaume Abella and Maria Padilla and Joan {Del Castillo} and Francisco J. Cazorla", title = "Measurement-Based Worst-Case Execution Time Estimation Using the Coefficient of Variation", journal = j-TODAES, volume = "22", number = "4", pages = "72:1--72:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3065924", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Extreme Value Theory (EVT) has been historically used in domains such as finance and hydrology to model worst-case events (e.g., major stock market incidences). EVT takes as input a sample of the distribution of the variable to model and fits the tail of that sample to either the Generalised Extreme Value (GEV) or the Generalised Pareto Distribution (GPD). Recently, EVT has become popular in real-time systems to derive worst-case execution time (WCET) estimates of programs. However, the application of EVT is not straightforward and requires a detailed analysis of, and customisation for, the particular problem at hand. In this article, we tailor the application of EVT to timing analysis. To that end, (1) we analyse the response time of different hardware resources (e.g., cache memories) and identify those that may lead to radically different types of execution time distributions. (2) We show that one of these distributions, known as mixture distribution, causes problems in the use of EVT. In particular, mixture distributions challenge not only properly selecting GEV/GPD parameters (i.e., location, scale and shape) but also determining the size of the sample to ensure that enough tail values are passed to EVT and that only tail values are used by EVT to fit GEV/GPD. Failing to select these parameters has a negative impact on the quality of the derived WCET estimates. We tackle these problems, by (3) proposing Measurement-Based Probabilistic Timing Analysis using the Coefficient of Variation (MBPTA-CV), a new mixture-distribution aware, WCET-suited MBPTA method that builds on recent EVT developments in other fields (e.g., finance) to automatically select the distribution parameters that best fit the maxima of the observed execution times. Our results on a simulation environment and a real board show that MBPTA-CV produces high-quality WCET estimates.", acknowledgement = ack-nhfb, articleno = "72", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Salcic:2017:NHH, author = "Zoran Salcic and Heejong Park and J{\"u}rgen Teich and Avinash Malik and Muhammad Nadeem", title = "{Noc-HMP}: a Heterogeneous Multicore Processor for Embedded Systems Designed in {SystemJ}", journal = j-TODAES, volume = "22", number = "4", pages = "73:1--73:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3073416", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Scalability and performance in multicore processors for embedded and real-time systems usually don't go well each with the other. Networks on Chip (NoCs) provide scalable execution platforms suitable for such kind of embedded systems. This article presents a NoC-based Heterogeneous Multi-Processor system, called NoC-HMP, which is a scalable platform for embedded systems developed in the GALS language SystemJ. NoC-HMP uses a time-predictable TDMA-MIN NoC to guarantee latencies and communication time between the two types of time-predictable cores and can be customized for a specific performance goal through the execution strategy and scheduling of SystemJ program deployed across multiple cores. Examples of different execution strategies are introduced, explored and analyzed via measurements. The number of used cores can be minimized to achieve the target performance of the application. TDMA-MIN allows easy extensions of NoC-HMP with other cores or IP blocks. Experiments show a significant improvement of performance over a single core system and demonstrate how the addition of cores affects the performance of the designed system.", acknowledgement = ack-nhfb, articleno = "73", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Behera:2017:TTS, author = "Lalatendu Behera and Purandar Bhaduri", title = "Time-Triggered Scheduling of Mixed-Criticality Systems", journal = j-TODAES, volume = "22", number = "4", pages = "74:1--74:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3073415", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Real-time and embedded systems are moving from the traditional design paradigm to integration of multiple functionalities onto a single computing platform. Some of the functionalities are safety critical and subject to certification. The rest of the functionalities are nonsafety critical and do not need to be certified. Designing efficient scheduling algorithms which can be used to meet the certification requirement is challenging. Our research considers the time-triggered approach to scheduling of mixed-criticality jobs with two criticality levels. The first proposed algorithm for the time-triggered approach is based on the OCBP scheduling algorithm which finds a fixed-priority order of jobs. Based on this priority order, the existing algorithm constructs two scheduling tables S$_{LO}^{oc}$ and S$_{HI}^{oc}$. The scheduler uses these tables to find a scheduling strategy. Another time-triggered algorithm called MCEDF was proposed as an improvement over the OCBP-based algorithm. Here we propose an algorithm which directly constructs two scheduling tables without using a priority order. Furthermore, we show that our algorithm schedules a strict superset of instances which can be scheduled by the OCBP-based algorithm as well as by MCEDF. We show that our algorithm outperforms both the OCBP-based algorithm and MCEDF in terms of the number of instances scheduled in a randomly generated set of instances. We generalize our algorithm for jobs with m criticality levels. Subsequently, we extend our algorithm to find scheduling tables for periodic and dependent jobs. Finally, we show that our algorithm is also applicable to mixed-criticality synchronous programs upon uniprocessor platforms and schedules a bigger set of instances than the existing algorithm.", acknowledgement = ack-nhfb, articleno = "74", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2017:ILA, author = "Derong Liu and Bei Yu and Salim Chowdhury and David Z. Pan", title = "Incremental Layer Assignment for Timing Optimization", journal = j-TODAES, volume = "22", number = "4", pages = "75:1--75:??", month = jul, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3083727", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:32 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With VLSI technology nodes scaling into the nanometer regime, interconnect delay plays an increasingly critical role in timing. For layer assignment, most works deal with via counts or total net delays, ignoring critical paths of each net and resulting in potential timing issues. In this article, we propose an incremental layer assignment framework targeting delay optimization in timing the critical path of each net. A set of novel techniques are presented: self-adaptive quadruple partition based on K $ \times $ K division benefits the runtime; semidefinite programming is utilized for each partition; and the sequential mapping algorithm guarantees integer solutions while satisfying edge capacities; additionally, concurrent mapping offers a global view of assignment and post delay optimization reduces the path timing violations. The effectiveness of our work is verified by ISPD'08 benchmarks.", acknowledgement = ack-nhfb, articleno = "75", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bi:2017:OQE, author = "Zhaori Bi and Dian Zhou and Sheng-Guo Wang and Xuan Zeng", title = "Optimization and Quality Estimation of Circuit Design via Random Region Covering Method", journal = j-TODAES, volume = "23", number = "1", pages = "1:1--1:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3084685", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Random region covering is a global optimization technique that explores the landscape by introducing multiple random starting points to initiate the local optimization solvers. This study applies the random region covering technique to circuit design automation and proposes a theory to explain why this technique is efficient at searching for the global optimum. In addition to analyzing the efficiency of the random region covering algorithm, the theory gives a probability-based estimation of the goodness of the optimization result. To enhance the efficiency of the random region covering technique, this work evaluates the boundary of top performance regions and proposes a modified random region covering method that only performs the global optimization on the top design region. The results from a large number of mathematical experiments verify the proposed methodology. The optimized designs of a class-E power amplifier and a wide load range operational amplifier outperform both manual designs and other state-of-the-art optimization techniques.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jeong:2017:CSP, author = "Jae Woong Jeong and Vishwanath Natarajan and Shreyas Sen and Tm Mak and Jennifer Kitchen and Sule Ozev", title = "A Comprehensive {BIST} Solution for Polar Transceivers Using On-Chip Resources", journal = j-TODAES, volume = "23", number = "1", pages = "2:1--2:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3084689", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a Built-in self-test (BIST) solution for polar transceivers with low cost and high accuracy. Radio frequency (RF) Polar transceivers are desirable for portable devices due to higher power efficiency compared to traditional RF Cartesian transceivers. Unfortunately, their design is quite challenging due to substantially different signal paths that need to work coherently to ensure signal quality. In the receiver, phase and gain mismatches degrade sensitivity and error vector magnitude. In the transmitter, delay skew between the envelope and phase signals and the finite envelope bandwidth can create intermodulation distortion, which leads to violation of spectral mask requirements. Typically, these parameters are not directly measured but calibrated through spectral analysis using expensive RF equipment, leading to lengthy and costly measurement/calibration cycles. However, characterization and calibration of these parameters with analytical model would reduce the test time and cost considerably. In this article, we propose a technique to measure with the intent to calibrate impairments of the polar transceiver in the loop-back mode. Simulation and hardware measurement results show that the proposed technique can characterize the targeted impairments accurately.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vatanparvar:2017:EVO, author = "Korosh Vatanparvar and Mohammad Abdullah {Al Faruque}", title = "Electric Vehicle Optimized Charge and Drive Management", journal = j-TODAES, volume = "23", number = "1", pages = "3:1--3:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3084686", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Electric vehicles (EVs) have been considered as a solution to the environmental issues caused by transportation, such as air pollution and greenhouse gas emission. However, limited energy capacity, scarce EV supercharging stations, and long recharging time have brought anxiety to drivers who use EVs as their main mean of transportation. Furthermore, EV owners need to deal with a huge battery replacement cost when the battery capacity degrades. Yet in-house EV chargers affect the pattern of the power grid load, which is not favorable to the utilities. The driving route, departure/arrival time of daily trips, and electricity price influence the EV energy consumption, battery lifetime, electricity cost, and EV charger load on the power grid. The EV driving range and battery lifetime issues have been addressed by battery management systems and route optimization methodologies. However, in this article, we are proposing an optimized charge and drive management (OCDM) methodology that selects the optimal driving route, schedules daily trips, and optimizes the EV charging process while considering the driver's timing preference. Our methodology will improve the EV driving range, extend the battery lifetime, reduce the recharging cost, and diminish the influence of EV chargers on the power grid. The performance of our methodology compared to the state of the art have been analyzed by experimenting on three benchmark EVs and three drivers. Our methodology has decreased EV energy consumption by 27\%, improved the battery lifetime by 24.8\%, reduced the electricity cost by 35\%, and diminished the power grid peak load by 17\% while increasing less than 20 minutes of daily driving time. Moreover, the scalability of our OCDM methodology for different parameters (e.g., time resolution and multiday cycles) in terms of execution time and memory usage has been analyzed.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2017:WPL, author = "Shuai Wang and Guangshan Duan and Yupeng Li and Qianhao Dong", title = "Word- and Partition-Level Write Variation Reduction for Improving Non-Volatile Cache Lifetime", journal = j-TODAES, volume = "23", number = "1", pages = "4:1--4:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3084690", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Non-volatile memory technologies are among the most promising technologies for implementing the main memories and caches in future microprocessors and replacing the traditional DRAM and SRAM technologies. However, one of the most challenging design issues of the non-volatile memory technologies is the limited write. In this article, we first propose to exploit the narrow-width values to improve the lifetime of non-volatile last-level caches with word-level write variation reduction. Leading zeros masking scheme is proposed to reduce the write stress to the upper half of the narrow-width data. To balance the write variations between the upper half and the lower half of the narrow-width data, two swapping schemes, the swap on write (SW) and swap on replacement (SRepl), are proposed. Two existing optimization schemes, the multiple dirty bit (MDB) and read before write (RBW), are adopted with our word-level swapping design. To further reduce the write variation on the partition level, we propose to exploit the cache partitioning design to improve the lifetime. Based on the observation that different applications demonstrate different cache access (write) behaviors, we propose to partition the last-level cache for different applications and balance the write variations by partition swapping. Both software-based and hardware-based partitioning and swapping schemes are proposed and evaluated for different situations. Our experimental results show that both our word- and partition-level designs can improve the lifetime of the non-volatile caches effectively with low performance and energy overheads.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Trinadh:2017:ODC, author = "A. Satya Trinadh and Seetal Potluri and Sobhan Babu Ch. and V. Kamakoti and Shiv Govind Singh", title = "Optimal Don't Care Filling for Minimizing Peak Toggles During At-Speed Stuck-At Testing", journal = j-TODAES, volume = "23", number = "1", pages = "5:1--5:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3084684", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to the increase in manufacturing/environmental uncertainties in the nanometer regime, testing digital chips under different operating conditions becomes mandatory. Traditionally, stuck-at tests were applied at slow speed to detect structural defects and transition fault tests were applied at-speed to detect delay defects. Recently, it was shown that certain cell-internal defects can only be detected using at-speed stuck-at testing. Stuck-at test patterns are power hungry, thereby causing excessive voltage droop on the power grid, delaying the test response, and finally leading to false delay failures on the tester. This motivates the need for peak power minimization during at-speed stuck-at testing. In this article, we use input toggle minimization as a means to minimize a circuit's power dissipation during at-speed stuck-at testing under the Combinational State Preservation scan (CSP-scan) Design-For-Testability (DFT) scheme. For circuits whose test sets are dominated by don't cares, this article maps the problem of optimal X-filling for peak input toggle minimization to a variant of the interval coloring problem and proposes a Dynamic Programming (DP) algorithm (DP-fill) for the same along with a theoretical proof for its optimality. For circuits whose test sets are not dominated by don't cares, we propose a max scatter Hamiltonian path algorithm, which ensures that the ordering is done such that the don't cares are evenly distributed in the final ordering of test cubes, thereby leading to better input toggle savings than DP-fill. The proposed algorithms, when experimented on ITC99 benchmarks, produced peak power savings of up to 48\% over the best-known algorithms in literature. We have also pruned the solutions thus obtained using Greedy and Simulated Annealing strategies with iterative 1-bit neighborhood to validate our idea of optimal input toggle minimization as an effective technique for minimizing peak power dissipation during at-speed stuck-at testing.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2017:TSL, author = "Xingquan Li and Wenxing Zhu", title = "Two-Stage Layout Decomposition for Hybrid E-Beam and Triple Patterning Lithography", journal = j-TODAES, volume = "23", number = "1", pages = "6:1--6:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3084683", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hybrid e-beam lithography (EBL) and triple patterning lithography (TPL) are advanced technologies for the manufacture of integrated circuits. We propose a technology that combines the advantages of EBL and TPL, which is more promising for the pattern product industry. Layout decomposition is a crucial step in this technology. In this article, we propose a two-stage decomposition flow for the hybrid e-beam and triple patterning lithography of the general layout decomposition (HETLD) problem. At the first stage, we formulate two optimization problems: the e-beam and stitch-aware TPL mask assignment (ESTMA) problem and the extended minimum weight dominating set for R$_4$ mask assignment (MDS R$_4$ MA) problem. Binary linear program formulations of the two problems are solved by the cutting plane approach. At the second stage, solutions of the first stage problems are legalized to feasible solutions of the HETLD problem by stitch insertion and e-beam shot. To speed up decomposition, we reduce the problem size by removing some vertices and some minor conflict edges before decomposition. Experimental results show the effectiveness of our decomposition methods based on ESTMA and MDS R$_4$ MA.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Das:2017:VBP, author = "Sourav Das and Dongjin Lee and Wonje Choi and Janardhan Rao Doppa and Partha Pratim Pande and Krishnendu Chakrabarty", title = "{VFI}-Based Power Management to Enhance the Lifetime of High-Performance {$3$D} {NoCs}", journal = j-TODAES, volume = "23", number = "1", pages = "7:1--7:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3092843", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The emergence of 3D network-on-chip (NoC) has revolutionized the design of high-performance and energy-efficient manycore chips. However, the anticipated performance gain can be compromised due to the degradation and failure of vertical links (VLs). The Through-Silicon-Via (TSV)-enabled VLs may fail due to workload-induced stress; the failure of a VL can affect the neighboring VLs, thereby causing a cascade of failures and reducing the lifetime of the chip. To enhance the reliability of 3D NoC-enabled manycore chips, we propose to incorporate a voltage-frequency island (VFI)-based power management strategy that helps to reduce the energy consumption and hence, the workload-induced stress of the highly utilized VLs. The adopted power-management strategy relies on control decisions about the voltage/frequency (V/F) levels on VLs. We demonstrate that compared to the well-known spare TSV allocation and adaptive routing strategies, power management is more effective in enhancing the reliability of a 3D NoC. VFI-based power management improves the reliability of the 3D NoC by one order of magnitude compared to both adaptive routing and spare allocation while running popular SPLASH-2 and PARSEC benchmarks. The principal benefit of power management is that it is capable of reducing the operating temperature of the system, which in turn enhances the Mean-Time-To-Failure (MTTF) of the VLs and reliability of the overall 3D NoC.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Murugesan:2017:NRM, author = "Shanmugakumar Murugesan and Noor Mahammad Sk", title = "A Novel Range Matching Architecture for Packet Classification Without Rule Expansion", journal = j-TODAES, volume = "23", number = "1", pages = "8:1--8:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3105958", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The speed requirement for the routing table lookup and the packet classification is rapidly increasing due to the increase in the number of packets needed to be processed per second. The hardware-based packet classification relies on ternary content addressable memory (TCAM) to meet this speed requirement. However, TCAM consumes huge power and also supports only for longest prefix match and exact match, where the classification rule also has a range match (RM) field. Hence, it is mandatory to encode the RM into prefix match to accommodate the rule in TCAM. In the worst case, one rule is encoded into (2 W -2)$^2$ rules (where W is a number of bits to represent range). This work proposes a novel RM architecture, and a detailed analysis about the range field on the standard dataset and the real-life classifier rules are presented. In the literature, the existing RM architecture is used to avoid the range to prefix conversion, but due to the serial operation, it lacks in performance. For constant time lookup, TCAM is the best option, but it does not support RM. The proposed architecture takes one clock cycle for RM and does not require any encoding/ conversion. Hence, there will be a single entry for every rule. It is observed that just 4\% of the two-dimensional range rules are present in this dataset, and it will increase the rule set size by 4 times in the best case and nearly 30 times in the worst case. The proposed RM circuit is operated in parallel with TCAM without compromising the speed, and this circuit saves huge power around 70\% and area around 61\%, where the range to prefix conversion/encoding is completely avoided. The proposed architecture is well suited for current IPv4- and IPv6-based networks, as well as in software-defined networks in the near future.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chithira:2017:HTS, author = "P. R. Chithira and Vinita Vasudevan", title = "A Hierarchical Technique for Statistical Path Selection and Criticality Computation", journal = j-TODAES, volume = "23", number = "1", pages = "9:1--9:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3107030", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to process variations, every path in the circuit is associated with a probability of being critical and a measure of this probability is the criticality of the path. Identification of critical paths usually proceeds in two steps, namely, generation of a candidate path set followed by computation of path criticality. As criticality computation is expensive, the candidate path set is chosen using simpler metrics. However, these metrics are not directly related to path criticality and, often, the set also contains low criticality paths that do not need to be tested. In this article, we propose a hierarchical technique that directly gives all paths above a global criticality threshold. The circuit is divided into disjoint groups at various levels. We show that the criticality of a group at each level of hierarchy can be computed using criticality of the parent group and the local complementary delay within the group. Low criticality groups are pruned at every level, making the computation efficient. This recursive partitioning and group criticality computation is continued until the group criticality falls below a threshold. Beyond this, the path selection within the group is done using branch-and-bound algorithm with global criticality as the metric. This is possible, since our method for criticality computation is very efficient. Unlike other techniques, path selection and criticality computation are integrated together so that when the path selection is complete, path criticality is also obtained. The proposed algorithm is tested with ISCAS'85, ISCAS'89, and ITC'99 benchmark circuits and the results are verified using Monte Carlo simulation. The experimental results suggest that the proposed method gives better accuracy on average with around 90\% reduction in run-time.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Moon:2017:ASP, author = "Hyungon Moon and Jinyong Lee and Dongil Hwang and Seonhwa Jung and Jiwon Seo and Yunheung Paek", title = "Architectural Supports to Protect {OS} Kernels from Code-Injection Attacks and Their Applications", journal = j-TODAES, volume = "23", number = "1", pages = "10:1--10:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3110223", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The kernel code injection is a common behavior of kernel-compromising attacks where the attackers aim to gain their goals by manipulating an OS kernel. Several security mechanisms have been proposed to mitigate such threats, but they all suffer from non-negligible performance overhead. This article introduces a hardware reference monitor, called Kargos, which can detect the kernel code injection attacks with nearly zero performance cost. Kargos monitors the behaviors of an OS kernel from outside the CPU through the standard bus interconnect and debug interface available with most major microprocessors. By watching the execution traces and memory access events in the monitored target system, Kargos uncovers attempts to execute malicious code with the kernel privilege. On top of this, we also applied the architectural supports for Kargos to the detection of ROP attacks. KS-Stack is the hardware component that builds and maintains the shadow stacks using the existing supports to detect this ROP attacks. According to our experiments, Kargos detected all the kernel code injection attacks that we tested, yet just increasing the computational loads on the target CPU by less than 1\% on average. The performance overhead of the KS-Stack was also less than 1\%.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2017:ELD, author = "Yunfeng Yang and Wai-Shing Luk and Hai Zhou and David Z. Pan and Dian Zhou and Changhao Yan and Xuan Zeng", title = "An Effective Layout Decomposition Method for {DSA} with Multiple Patterning in Contact-Hole Generation", journal = j-TODAES, volume = "23", number = "1", pages = "11:1--11:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3131847", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Directed self-assembly (DSA) complemented with multiple patterning (MP) is an attractive next generation lithography (NGL) technique for contact-hole generation. Nevertheless, a high-quality DSA-aware layout decomposer is required to enable the technology. In this article, we introduce an efficient method which incorporates a set packing for generating DSA template candidates and a local search method. Besides, a multi-start strategy is integrated into the framework to prevent the local minima. Our framework encourages the reuse of existing coloring solvers. Hence, the development cost can significantly be reduced. In addition, for DSA multiple patterning where the number of masks is larger than two, we present an efficient iterative partition based method. Experimental results show that compared with the state-of-the-art work, our methods can achieve roughly 100$ \times $ speedup for double patterning, and 78.8\% conflict reduction with 5$ \times $ speedup for triple patterning on the dense graphs.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2017:AMM, author = "Chao Chen and Giovanni Beltrame", title = "An Adaptive {Markov} Model for the Timing Analysis of Probabilistic Caches", journal = j-TODAES, volume = "23", number = "1", pages = "12:1--12:??", month = oct, year = "2017", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3123877", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 22 09:03:33 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurate timing prediction for real-time embedded software execution is becoming a problem due to the increasing complexity of computer architecture, and the presence of mixed-criticality workloads. Probabilistic caches were proposed to set bounds to Worst Case Execution Time (WCET) estimates and help designers improve real-time embedded system resource use. Static Probabilistic Timing Analysis (SPTA) for probabilistic caches is nevertheless difficult to perform, because cache accesses depend on execution history, and the computational complexity of SPTA makes it intractable for calculation as the number of accesses increases. In this paper, we explore and improve SPTA for caches with evict-on-miss random replacement policy using a state space modeling technique. A nonhomogeneous Markov model is employed for single-path programs in discrete-time finite state space representation. To make this Markov model tractable, we limit the number of states and use an adaptive method for state modification. Experiments show that compared to the state-of-the-art methodology, the proposed adaptive Markov chain approach provides better results at the occurrence probability of 10$^{-15}$: in terms of accuracy, the state-of-the-art SPTA results are more conservative, by 11\% more on average. In terms of computation time, our approach is not significantly different from the state-of-the-art SPTA.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kritikakou:2018:DDS, author = "Angeliki Kritikakou and Thibaut Marty and Matthieu Roy", title = "{DYNASCORE}: {DYNAmic Software COntroller to Increase REsource} Utilization in Mixed-Critical Systems", journal = j-TODAES, volume = "23", number = "2", pages = "13:1--13:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3110222", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In real-time mixed-critical systems, Worst-Case Execution Time (WCET) analysis is required to guarantee that timing constraints are respected-at least for high-criticality tasks. However, the WCET is pessimistic compared to the real execution time, especially for multicore platforms. As WCET computation considers the worst-case scenario, it means that whenever a high-criticality task accesses a shared resource in multicore platforms, it is considered that all cores use the same resource concurrently. This pessimism in WCET computation leads to a dramatic underutilization of the platform resources, or even failing to meet the timing constraints. In order to increase resource utilization while guaranteeing real-time guarantees for high-criticality tasks, previous works proposed a runtime control system to monitor and decide when the interferences from low-criticality tasks cannot be further tolerated. However, in the initial approaches, the points where the controller is executed were statically predefined. In this work, we propose a dynamic runtime control which adapts its observations to online temporal properties, further increasing the dynamism of the approach, and mitigating the unnecessary overhead implied by existing static approaches. Our dynamic adaptive approach allows one to control the ongoing execution of tasks based on runtime information, and further increases the gains in terms of resource utilization compared with static approaches.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Boukhobza:2018:ENS, author = "Jalil Boukhobza and St{\'e}phane Rubini and Renhai Chen and Zili Shao", title = "Emerging {NVM}: a Survey on Architectural Integration and Research Challenges", journal = j-TODAES, volume = "23", number = "2", pages = "14:1--14:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3131848", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "There has been a surge of interest in Non-Volatile Memory (NVM) in recent years. With many advantages, such as density and power consumption, NVM is carving out a place in the memory hierarchy and may eventually change our view of computer architecture. Many NVMs have emerged, such as Magnetoresistive random access memory (MRAM), Phase Change random access memory (PCM), Resistive random access memory (ReRAM), and Ferroelectric random access memory (FeRAM), each with its own peculiar properties and specific challenges. The scientific community has carried out a substantial amount of work on integrating those technologies in the memory hierarchy. As many companies are announcing the imminent mass production of NVMs, we think that it is time to have a step back and discuss the body of literature related to NVM integration. This article surveys state-of-the-art work on integrating NVM into the memory hierarchy. Specially, we introduce the four types of NVM, namely, MRAM, PCM, ReRAM, and FeRAM, and investigate different ways of integrating them into the memory hierarchy from the horizontal or vertical perspectives. Here, horizontal integration means that the new memory is placed at the same level as an existing one, while vertical integration means that the new memory is interleaved between two existing levels. In addition, we describe challenges and opportunities with each NVM technique.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gao:2018:ECI, author = "Congming Gao and Liang Shi and Yejia Di and Qiao Li and Chun Jason Xue and Kaijie Wu and Edwin Sha", title = "Exploiting Chip Idleness for Minimizing Garbage Collection-Induced Chip Access Conflict on {SSDs}", journal = j-TODAES, volume = "23", number = "2", pages = "15:1--15:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3131850", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Solid state drives (SSDs) are normally constructed with a number of parallel-accessible flash chips, where host I/O requests are processed in parallel. In addition, there are many internal activities in SSDs, such as garbage collection and wear leveling induced read, write, and erase operations, to solve the issues of inability of in-place updates and limited lifetime. When internal activities are triggered on a chip, the chip will be blocked. Our preliminary studies on several workloads show that when internal activities are frequently triggered, the host I/O performance will be significantly impacted because of the access conflict between them. In this work, in order to improve the access conflict induced performance degradation, a novel access conflict minimization scheme is proposed. The basic idea of the scheme is motivated by an interesting observation in SSDs: several chips are idle when other chips are busy with internal activities and host I/O requests. Based on this observation, we propose to schedule internal activities induced operations for minimized access conflict by exploiting the idleness of the multiple chips of SSDs. This approach is realized by two steps: First, read internal activities accessed data to the controller; second, by exploiting the idle chips during internal activities, write internal activities accessed data back to these idle chips. With this scheme, the internal activities can be processed with minimized access conflict to the host requests. Simulation results show that the proposed approach significantly reduces the access conflict, and in turn leads to a significant performance improvement of SSDs.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jun:2018:RBD, author = "Jaeyung Jun and Kyu Hyun Choi and Hokwon Kim and Sang Ho Yu and Seon Wook Kim and Youngsun Han", title = "Recovering from Biased Distribution of Faulty Cells in Memory by Reorganizing Replacement Regions through Universal Hashing", journal = j-TODAES, volume = "23", number = "2", pages = "16:1--16:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3131241", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recently, scaling down dynamic random access memory (DRAM) has become more of a challenge, with more faults than before and a significant degradation in yield. To improve the yield in DRAM, a redundancy repair technique with intra-subarray replacement has been extensively employed to replace faulty elements (i.e., rows or columns with defective cells) with spare elements in each subarray. Unfortunately, such technique cannot efficiently handle a biased distribution of faulty cells because each subarray has a fixed number of spare elements. In this article, we propose a novel redundancy repair technique that uses a hashing method to solve this problem. Our hashing technique reorganizes replacement regions by changing the way in which their replacement information is referred, thus making faulty cells become evenly distributed to the regions. We also propose a fast repair algorithm to find the best hash function among all possible candidates. Even if our approach requires little hardware overhead, it significantly improves the yield when compared with conventional redundancy techniques. In particular, the results of our experiment show that our technique saves spare elements by about 57\% and 55\% for a yield of 99\% at BER 1e-6 and 5e-7, respectively.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhou:2018:RRD, author = "Hongxia Zhou and Chiu-Wing Sham and Hailong Yao", title = "Revisiting Routability-Driven Placement for Analog and Mixed-Signal Circuits", journal = j-TODAES, volume = "23", number = "2", pages = "17:1--17:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3131849", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The exponential increase in scale and complexity of very large-scale integrated circuits (VLSIs) poses a great challenge to current electronic design automation (EDA) techniques. As an essential step in the whole EDA layout synthesis, placement is attracting more and more attention, especially for analog and mixed-signal integrated circuits. Recently, experts in this field have observed a variety of analog-specific layout constraints to obtain high-performance placement solutions. These constraints include symmetry, alignment, boundary, preplace, abutment, range and maximum separation, and routability of the placement solutions. In this article, the effectiveness of slicing and nonslicing representation is investigated. Additionally, the technique of congestion-based virtual sizing is proposed. Experimental results show that the routability can be improved significantly by applying congestion-based virtual sizing. Results also show that the slicing representation can improve the regularity of the placement solutions and hence improve the routability with higher efficiency compared to the nonslicing representation.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2018:ACS, author = "Shao-Chung Wang and Li-Chen Kan and Chao-Lin Lee and Yuan-Shin Hwang and Jenq-Kuen Lee", title = "Architecture and Compiler Support for {GPUs} Using Energy-Efficient Affine Register Files", journal = j-TODAES, volume = "23", number = "2", pages = "18:1--18:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3133218", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "A modern GPU can simultaneously process thousands of hardware threads. These threads are grouped into fixed-size SIMD batches executing the same instruction on vectors of data in a lockstep to achieve high throughput and performance. The register files are huge due to each SIMD group accessing a dedicated set of vector registers for fast context switching, and consequently the power consumption of register files has become an important issue. One proposed solution is to replace some of the vector registers by scalar registers, as different threads in a same SIMD group operate on scalar values and so the redundant computations and accesses of these scalar values can be eliminated. However, it has been observed that a significant number of registers containing affine vectors $ \upsilon $ such that $ \upsilon [i] = b + i \times s $ can be represented by base $b$ and stride $s$. Therefore, this article proposes an affine register file design for GPUs that is energy efficient due to it reducing the redundant executions of both the uniform and affine vectors. This design uses a pair of registers to store the base and stride of each affine vector and provides specific affine ALUs to execute affine instructions. A method of compiler analysis has been developed to detect scalars and affine vectors and annotate instructions for facilitating their corresponding scalar and affine computations. Furthermore, a priority-based register allocation scheme has been implemented to assign scalars and affine vectors to appropriate scalar and affine register files. Experimental results show that this design was able to dispatch 43.56\% of the computations to scalar and affine ALUs when using eight scalar and four affine registers per warp. This resulted in the current design also reducing the energy consumption of the register files and ALUs to 21.86\% and 26.54\%, respectively, and it reduced the overall energy consumption of the GPU by an average of 5.18\%.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pereira-Santos:2018:RFB, author = "Leonardo Pereira-Santos and Gabriel Luca Nazar and Luigi Carro", title = "Repair of {FPGA}-Based Real-Time Systems With Variable Slacks", journal = j-TODAES, volume = "23", number = "2", pages = "19:1--19:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3144533", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Field-programmable gate arrays (FPGAs) based on SRAM cells are an attractive alternative for real-time system designers, as they offer high density, low cost, and high performance. The use of SRAM cells in the FPGA's configuration memory, while enabling these desirable characteristics, also creates a reliability hazard as RAM cells are susceptible to single-event upsets (SEUs). The usual approach is the use of double or triple redundancy allied with a correction mechanism, such as periodic scrubbing. Although scrubbing is an effective technique to remove SEU-induced errors, the repair of real-time systems presents specific challenges, such as avoiding failures by missing real-time deadlines. In this article, a novel approach is proposed to use a deadline-aware scrubbing scheme with negligible area costs that dynamically chooses the scrubbing starting position. Such a scheme allows us to avoid missing real-time deadlines while maximizing the repair probability given a bounded repair time. Our approach reduces the failure rate, considering the probability of missing deadlines due to faults, by 33.39\% on average, with an average area cost of 1.23\%.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2018:CMD, author = "Chen-Hsuan Lin and Lu Wan and Deming Chen", title = "{C-Mine}: Data Mining of Logic Common Cases for Improved Timing Error Resilience with Energy Efficiency", journal = j-TODAES, volume = "23", number = "2", pages = "20:1--20:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3144534", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The better-than-worst-case (BTW) design methodology can achieve higher circuit energy efficiency, performance, or reliability by allowing timing errors for rare cases and rectifying them with error correction mechanisms. Therefore, the performance of BTW design heavily depends on the correctness of common cases, which are frequent input patterns in a workload. However, most existing methods do not provide sufficiently scalable solutions and also overlook the whole picture of the design. Thus, we propose a new technique, common-case mining method (C-Mine), which combines two scalable techniques, data mining and Boolean satisfiability (SAT) solving, to overcome these limitations. Data mining can efficiently extract patterns from an enormous dataset, and SAT solving is famous for its scalable verification. In this article, we present two versions of C-Mine, C-Mine-DCT and C-Mine-APR, which aim at faster runtime and better energy saving, respectively. The experimental results show that, compared to a recent publication, C-Mine-DCT can achieve compatible performance with an additional 8\% energy savings and 54x speedup for bigger benchmarks on average. Furthermore, C-Mine-APR can achieve up to 13\% more energy saving than C-Mine-DCT while confronting designs with more common cases.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Rosvall:2018:FTA, author = "Kathrin Rosvall and Ingo Sander", title = "Flexible and Tradeoff-Aware Constraint-Based Design Space Exploration for Streaming Applications on Heterogeneous Platforms", journal = j-TODAES, volume = "23", number = "2", pages = "21:1--21:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3133210", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Due to its complexity, the problem of mapping and scheduling streaming applications on heterogeneous MPSoCs under real-time and performance constraints has traditionally been tackled by incomplete heuristic algorithms. In recent years, approaches based on Constraint Programming (CP) have shown promising results as complete methods for finding optimal mappings, in particular concerning throughput. However, so far none of the available CP approaches consider the tradeoff between throughput and buffer requirements or throughput and power consumption. This article integrates tradeoff awareness into the CP model and introduces a two-step solving approach that utilizes the advantages of heuristics, while still keeping the completeness property of CP. With a number of experiments considering several streaming applications and different platform models, the article illustrates not only the efficiency of the presented model but also its suitability for solving different problems with various combinations of performance constraints.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Knechtel:2018:MOF, author = "Johann Knechtel and Jens Lienig and Ibrahim (Abe) M. Elfadel", title = "Multi-Objective {$3$D} Floorplanning with Integrated Voltage Assignment", journal = j-TODAES, volume = "23", number = "2", pages = "22:1--22:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3149817", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Voltage assignment is a well-known technique for circuit design, which has been applied successfully to reduce power consumption in classical 2D integrated circuits (ICs). Its usage in the context of 3D ICs has not been fully explored yet although reducing power in 3D designs is of crucial importance, for example, to tackle the ever-present challenge of thermal management. In this article, we investigate the effective and efficient partitioning of 3D designs into multiple voltage domains during the floorplanning step of physical design. In particular, we introduce, implement, and evaluate novel algorithms for effective integration of voltage assignment into the inner floorplanning loops. Our algorithms are compatible not only with the traditional objectives of 2D floorplanning but also with the additional objectives and constraints of 3D designs, including the planning of through-silicon vias (TSVs) and the thermal management of stacked dies. We test our 3D floorplanner extensively on the GSRC benchmarks as well as on an augmented version of the IBM-HB+ benchmarks. The 3D floorplans are shown to achieve effective trade-offs for power and delays throughout different configurations-our results surpass na{\"\i}ve low-power and high-performance voltage assignment by 17\% and 10\%, on average. Finally, we release our 3D floorplanning framework as open-source code.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2018:HEP, author = "Kun Yang and Haoting Shen and Domenic Forte and Swarup Bhunia and Mark Tehranipoor", title = "Hardware-Enabled Pharmaceutical Supply Chain Security", journal = j-TODAES, volume = "23", number = "2", pages = "23:1--23:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3144532", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The pharmaceutical supply chain is the pathway through which prescription and over-the-counter (OTC) drugs are delivered from manufacturing sites to patients. Technological innovations, price fluctuations of raw materials, as well as tax, regulatory, and market demands are driving change and making the pharmaceutical supply chain more complex. Traditional supply chain management methods struggle to protect the pharmaceutical supply chain, maintain its integrity, enhance customer confidence, and aid regulators in tracking medicines. To develop effective measures that secure the pharmaceutical supply chain, it is important that the community is aware of the state-of-the-art capabilities available to the supply chain owners and participants. In this article, we will be presenting a survey of existing hardware-enabled pharmaceutical supply chain security schemes and their limitations. We also highlight the current challenges and point out future research directions. This survey should be of interest to government agencies, pharmaceutical companies, hospitals and pharmacies, and all others involved in the provenance and authenticity of medicines and the integrity of the pharmaceutical supply chain.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Noltsis:2018:RSC, author = "Michail Noltsis and Dimitrios Rodopoulos and Nikolaos Zompakis and Francky Catthoor and Dimitrios Soudris", title = "Runtime Slack Creation for Processor Performance Variability using System Scenarios", journal = j-TODAES, volume = "23", number = "2", pages = "24:1--24:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3152158", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern microprocessors contain a variety of mechanisms used to mitigate errors in the logic and memory, referred to as Reliability, Availability, and Serviceability (RAS) techniques. Many of these techniques, such as component disabling, come at a performance cost. With the aggressive downscaling of device dimensions, it is reasonable to expect that chip-wide error rates will intensify in the future and perhaps vary throughout system lifetime. As a result, it is important to reclaim the temporal RAS overheads in a systematic way and enable dependable performance. The current article presents a closed-loop control scheme that actuates processor's frequency based on detected timing interference to ensure performance dependability. The concepts of slack and deadline vulnerability factor are introduced to support the formulation of a discrete time control problem. Default application timing is derived using the system scenario methodology, the applicability of which is demonstrated through simulations. Additionally, the proposed concept is demonstrated on a real platform and application: a Proportional-Integral-Differential controller, implemented within the application, actuates the Dynamic Voltage and Frequency Scaling (DVFS) framework of the Linux kernel to effectively reclaim temporal overheads injected at runtime. The current article discusses the responsiveness and energy efficiency of the proposed performance dependability scheme. Finally, additional formulation is introduced to predict the upper bound of timing interference that can be absorbed by actuating the DVFS of any processor and is also validated on a representative reduction to practice.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shafiee:2018:DFB, author = "M. Shafiee and N. Beohar and P. Bakliwal and S. Roy and D. Mandal and B. Bakkaloglu and S. Ozev", title = "A Disturbance-Free Built-In Self-Test and Diagnosis Technique for {DC--DC} Converters", journal = j-TODAES, volume = "23", number = "2", pages = "25:1--25:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3152157", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Complex electronic systems include multiple power domains and drastically varying dynamic power consumption patterns, requiring the use of multiple power conversion and regulation units. High-frequency switching converters have been gaining prominence in the DC-DC converter market due to their high efficiency and smaller form factor. Unfortunately, they are also subject to higher process variations, and faster in-field degradation, jeopardizing stable operation of the power supply. This article presents a technique to track changes in the dynamic loop characteristics of DC-DC converters without disturbing the normal mode of operation using a white noise-based excitation and correlation. Using multiple points for injection and analysis, we show that the degraded part can be diagnosed to take remedial action. White noise excitation is generated via a pseudo-random disturbance at reference, load current, and pulse-width modulation (PWM) nodes of the converter with the test signal energy being spread over a wide bandwidth, without significantly affecting the converter noise and ripple floor. The impulse response is extracted by correlating the random input sequence with the disturbed output generated. Test signal analysis is achieved by correlating the pseudo-random input sequence with the output response and thereby accumulating the desired behavior over time and pulling it above the noise floor of the measurement set-up. An off-the-shelf power converter, LM27402, is used as the device-under-test (DUT) for experimental verification. Experimental results show that the proposed technique can estimate converter natural frequency and quality factor ($Q$-factor) within $ \pm 2.5$ \% and $ \pm 0.7$ \% error margin respectively, over changes in load inductance and capacitance. For the diagnosis purpose, a measure of inductor's DC resistance (DCR) value, which is the inductor's series resistance and indicative of the degradation in inductor's $Q$-factor, is estimated within less than $ \pm 1.6$ \% error margin.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Emeretlis:2018:SMA, author = "Andreas Emeretlis and George Theodoridis and Panayiotis Alefragis and Nikolaos Voros", title = "Static Mapping of Applications on Heterogeneous Multi-Core Platforms Combining Logic-Based {Benders} Decomposition with Integer Linear Programming", journal = j-TODAES, volume = "23", number = "2", pages = "26:1--26:??", month = jan, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3133219", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The proper mapping of an application on a multi-core platform and the scheduling of its tasks are key elements to achieve the maximum performance. In this article, a novel hybrid approach based on integrating the Logic-Based Benders Decomposition (LBBD) principle with a pure Integer Linear Programming (ILP) model is introduced for mapping applications described by Directed Acyclic Graphs (DAGs) on platforms consisting of heterogeneous cores. The LBBD approach combines two optimization techniques with complementary strengths, namely ILP and Constraint Programming (CP), and is employed as a cut generation scheme. The generated constraints are utilized by the ILP model to cut possible assignment combinations aiming at improving the solution or proving the optimality of the best-found one. The introduced approach was applied both on synthetic DAGs and on DAGs derived from real applications. Through the proposed approach, many problems were optimally solved that could not be solved by any of the above methods (ILP, LBBD) alone within a time limit of 2 hours, while the overall solution time was also significantly decreased. Specifically, the hybrid method exhibited speedups equal to $ 4.2 \times $ for the synthetic instances and $ 10 \times $ for the real-application DAGs over the LBBD approach and two orders of magnitude over the ILP model.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gomez:2018:SCP, author = "Andres F. Gomez and Victor Champac", title = "Selection of Critical Paths for Reliable Frequency Scaling under {BTI}-Aging Considering Workload Uncertainty and Process Variations Effects", journal = j-TODAES, volume = "23", number = "3", pages = "27:1--27:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177864", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Conventional clock guardbanding to assure a circuit's reliable operation under device aging due to NBTI/PBTI and process variations introduce significant performance loss in modern nanometer circuits. Dynamic Frequency Scaling (DFS) is a more efficient technique that allows us to adjust the system clock frequency according to the process condition and aging deterioration of the circuit. At the design phase, the DFS technique requires the identification of the logic paths to be monitored to introduce the required circuitry to monitor their delay. However, critical path identification is a complex problem due to three major challenges: (1) The critical paths of the circuit depend on the stress duty cycle of the devices, which are unknown in advance at design phase; (2) the critical paths of the circuit depend on the process parameters variations, whose impact on delay depend on the spatial correlation due to proximity at the circuit layout; and (3) the critical paths reordering probability may change over time due to aging. This article presents a methodology for efficient selection of the critical paths to be monitored under a DFS framework, addressing the aforementioned challenges. Experimental results on ISCAS 85/89 benchmark circuits show the feasibility of the proposed approach to select a restricted path set while providing reliable aging monitoring.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2018:PSC, author = "Sheng-Min Huang and Li-Pin Chang", title = "Providing {SLO} Compliance on {NVMe SSDs} Through Parallelism Reservation", journal = j-TODAES, volume = "23", number = "3", pages = "28:1--28:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3174867", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Non-Volatile Memory Express (NVMe) is a specification for next-generation solid-state disks (SSDs). Benefited from the massive internal parallelism and the high-speed PCIe bus, NVMe SSDs achieve extremely high data transfer rates, and they are an ideal solution of shared storage in virtualization environments. Providing virtual machines with Service Level Objective (SLO) compliance on NVMe SSDs is a challenging task, because garbage collection activities inside of NVMe SSDs globally affect the I/O performance of all virtual machines. In this study, we introduce a novel approach, called parallelism reservation, which is inspired by the rich internal parallelism of NVMe SSDs. The degree of parallelism stands for how many flash chips are concurrently active. Our basic idea is to reserve sufficient degrees of parallelism for read, write, and garbage collection operations, making sure that an NVMe SSD delivers stable read and write throughput and reclaims free space at a constant rate. The stable read and write throughput are proportionally distributed among virtual machines for SLO compliance. Our experimental results show that our parallelism reservation approach delivered satisfiable throughput and highly predictable response to virtual machines.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2018:RRE, author = "Kun Yang and Domenic Forte and Mark Tehranipoor", title = "{ReSC}: an {RFID-Enabled} Solution for Defending {IoT} Supply Chain", journal = j-TODAES, volume = "23", number = "3", pages = "29:1--29:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3174850", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Internet of Things (IoT), an emerging global network of uniquely identifiable embedded computing devices within the existing Internet infrastructure, is transforming how we live and work by increasing the connectedness of people and things on a scale that was once unimaginable. In addition to facilitated information and service exchange between connected objects, enhanced computing power and analytic capabilities of individual objects, and increased interaction between objects and their environments, the IoT also raises new security and privacy challenges. Hardware trust across the IoT supply chain is the foundation of IoT security and privacy. Two major supply chain issues --- disappearance/theft of authentic IoT devices and appearance of unauthentic ones --- have to be addressed to secure the IoT supply chain and lay the foundation for further security and privacy-defensive measures. Comprehensive solutions that enable IoT device authentication and traceability across the entire supply chain (i.e., during distribution and after being provisioned) need to be established. Existing hardware, software, and network protection methods, however, do not address IoT supply chain issues. To mitigate this shortcoming, we propose an RFID-enabled solution called ReSC that aims at defending the IoT supply chain. By incorporating three techniques-one-to-one mapping between RFID tag identity and control chip identity; unique tag trace, which records tag provenance and history information; and neighborhood attestation of IoT devices-ReSC is resistant to split attacks (i.e., separating tag from product, swapping tags), counterfeit injection, product theft throughout the entire supply chain, device recycling, and illegal network service access (e.g., Internet, cable TV, online games, remote firmware updates). Simulations, theoretical analysis, and experimental results based on a printed circuit board (PCB) prototype demonstrate the effectiveness of ReSC. Finally, we evaluate the security of our proposed scheme against various attacks.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2018:LBF, author = "Dongwook Lee and Andreas Gerstlauer", title = "Learning-Based, Fine-Grain Power Modeling of System-Level Hardware {IPs}", journal = j-TODAES, volume = "23", number = "3", pages = "30:1--30:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177865", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurate power and performance models are needed to enable rapid, early system-level analysis and optimization. There is, however, a lack of fast yet fine-grain power models of hardware components at such high levels of abstraction. In this article, we present novel learning-based approaches for extending fast functional simulation models of accelerators and other hardware intellectual property components (IPs) with accurate cycle-, block-, and invocation-level power estimates. Our proposed power modeling approach is based on annotating functional hardware descriptions with capabilities that, depending on observability, allow capturing data-dependent resource, block, or input and output (I/O) activity without a significant loss in simulation speed. We further leverage advanced machine learning techniques to synthesize abstract power models using novel decomposition techniques that reduce model complexities and increase estimation accuracy. Results of applying our approach to various industrial-strength design examples show that our power models can predict cycle-, basic block-, and invocation-level power consumption to within 10\%, 9\%, and 3\% of a commercial gate-level power estimation tool, respectively, all while running at several order of magnitude faster speeds of 1-10Mcycles/sec. Model training and synthesis takes less than 34 minutes in all cases, including up to 30 minutes for training data and trace generation using gate-level simulations.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Naderan-Tahan:2018:DCE, author = "Mahmood Naderan-Tahan and Hamid Sarbazi-Azad", title = "{Domino Cache}: an Energy-Efficient Data Cache for Modern Applications", journal = j-TODAES, volume = "23", number = "3", pages = "31:1--31:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3174848", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The energy consumption for processing modern workloads is challenging in data centers. Due to the large datasets of cloud workloads, the miss rate of the L1 data cache is high, and with respect to the energy efficiency concerns, such misses are costly for memory instructions because lower levels of memory hierarchy consume more energy per access than the L1. Moreover, large last-level caches are not performance effective, in contrast to traditional scientific workloads. The aim of this article is to propose a large L1 data cache, called Domino, to reduce the number of accesses to lower levels in order to improve the energy efficiency. In designing Domino, we focus on two components that use the on-chip area and are not energy efficient, which makes them good candidates to use their area for enlarging the L1 data cache. Domino is a highly associative cache that extends the conventional cache by borrowing the prefetcher and last-level-cache storage budget and using it as additional ways for data cache. In Domino, the additional ways are separated from the conventional cache ways; hence, the critical path of the first access is not altered. On a miss in the conventional part, it searches the added ways in a mix of parallel-sequential fashion to compromise the latency and energy consumption. Results on the Cloudsuite benchmark suite show that read and write misses are reduced by 30\%, along with a 28\% reduction in snoop messages. The overall energy consumption per access is then reduced by 20\% on average (maximum 38\%) as a result of filtering accesses to the lower levels.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Abolmaali:2018:EFP, author = "Sheis Abolmaali and Mehdi Kamal and Ali Afzali-Kusha and Massoud Pedram", title = "An Efficient False Path-Aware Heuristic Critical Path Selection Method with High Coverage of the Process Variation Space", journal = j-TODAES, volume = "23", number = "3", pages = "32:1--32:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177866", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present a critical path selection method that efficiently finds true (sensitizable) critical paths of a circuit in the presence of process variations. The method, which is based on the viability analysis, tries to select the least number of true critical paths that cover all of circuit critical gates. Critical gates are those that make a path critical with a probability higher than a predefined threshold value. Selecting fewer critical paths leads to less computation time for the algorithm and shorter test time of fabricated chips. For this purpose, an efficient Statistical Static Timing Analysis- (SSTA) based technique is suggested. This technique tries to find circuit-critical gates whose process parameter variations cover a major part of the process space. Improving the process space coverage using fewer paths is achieved by considering both spatial (proximity of gates) and structural (having common gates) correlations in the analysis of choosing the critical paths. In the selection process, paths with low similarities in their characteristics are preferred. In addition, only true paths whose delays affect the maximum delay of the circuit are included. The selected paths can be used in the test process of the fabricated chips to determine if the chip meets its timing requirements. Also, a modified viability analysis that incorporates statistical computations is used in the SSTA. The efficacy of the proposed method is evaluated by comparing its results for combinational and sequential ISCAS benchmarks with those obtained by exhaustive search. Results indicate although, on average, only 4.38\% of all the critical paths found by the exhaustive search are selected by the proposed method, the maximum probability of criticality for the paths that are not considered in our method is, on average, less than 4\%.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jalili:2018:ERM, author = "Majid Jalili and Hamid Sarbazi-Azad", title = "Express Read in {MLC} Phase Change Memories", journal = j-TODAES, volume = "23", number = "3", pages = "33:1--33:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177876", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the era of big data, the capability of computer systems must be enhanced to support 2.5 quintillion byte/day data delivery. Among the components of a computer system, main memory has a great impact on overall system performance. DRAM technology has been used over the past four decades to build main memories. However, the scalability of DRAM technology has faced serious challenges. To keep pace with the ever-increasing demand for larger main memory, some new alternative technologies have been introduced. Phase change memory (PCM) is considered as one of such technologies for substituting DRAM. PCM offers some noteworthy properties such as low static power consumption, nonvolatility, and capability of storing more than one bit per cell (multilevel cell, or MLC). However, the short lifetime and long access latency of PCM (specifically MLC PCM) require feasible and efficient solutions. In this article, based on the observation that applications access a significant number of read-friendly data blocks, we propose Express Read to prevent the MLC PCM read circuit to spend unnecessary time sensing the cells of a memory block. A read-friendly data block (RFDB) is composed of only ``11'' and ``00'' bit pairs, and thus upon sensing the most significant bit of a cell, the read operation can be early terminated to reduce the MLC read time and power consumption. Moreover, we increase the number of RFDBs using two simple techniques to better exploit the benefits of Express Read. Results obtained from full-system simulation near 6\% performance improvement and 21\% energy gain, on average, over the baseline system.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yan:2018:DCR, author = "Jin-Tai Yan", title = "Direction-Constrained Rectangle Escape Routing", journal = j-TODAES, volume = "23", number = "3", pages = "34:1--34:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3178047", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Given a set of buses with available escape directions inside a chip, a two-phase algorithm is proposed to assign one feasible escape direction onto any bus such that the number of used layers is minimized and to allocate the pin rectangle and the projection rectangle of any escape bus onto the minimized layers in direction-constrained rectangle escape routing. In our proposed algorithm, based on the concept of two-dimensional maximum density inside a chip, the escape directions of the buses can be first assigned to minimize the number of the used layers by iteratively eliminating unnecessary escape directions for any bus inside a chip. Furthermore, based on the construction of the represented intervals and the assignment constraints for the escape buses, a modified left-edge algorithm can be used to allocate all the escape buses onto the minimized layers. Compared with Ma's integer linear program (ILP)-based algorithm [10] using lp\_solve and Gurobi in rectangle escape routing, the experimental results show that our proposed algorithm obtains the same results but reduces CPU time by 94.2\% and 35.7\% when using lp\_solve and Gurobi for 16 tested examples with no direction constraint on average, respectively. Compared with the modified algorithm from Ma's ILP-based algorithm [10] using lp\_solve and Gurobi in direction-constrained rectangle escape routing, the experimental results show that our proposed algorithm obtains the same results but reduces CPU time by 94.3\% and 37.7\% when using lp\_solve and Gurobi for 16 tested examples with direction constraints on average, respectively. Besides that, compared with Yan's iterative algorithm, the experimental results show that our proposed algorithm increases CPU time by 1.0\% to reduce the number of used layers 11.1\% for 16 tested examples on average.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2018:MTI, author = "Shengcheng Wang and Ran Wang and Krishnendu Chakrabarty and Mehdi B. Tahoori", title = "Multicast Testing of Interposer-Based {$ 2.5 $D} {ICs}: Test-Architecture Design and Test Scheduling", journal = j-TODAES, volume = "23", number = "3", pages = "35:1--35:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177879", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Interposer-based 2.5D integrated circuits (ICs) are seen today as a precursor to 3D ICs based on through-silicon vias (TSVs). All the dies in a 2.5D IC must be adequately tested for product qualification. However, due to the limited number of package pins, it is a major challenge to test 2.5D ICs using conventional methods. Moreover, due to higher integration levels, test-application time and test power consumption for 2.5D ICs are also increased compared to their 2D counterparts. Therefore, it is imperative to take these issues into account during 2.5D IC testing. In this article, we present an efficient multicast test architecture for targeting defects in dies, in which multiple dies can be tested simultaneously to reduce the test-application time under constraints on test power and fault coverage. We also propose a test scheduling and optimization technique that can be utilized with the multicast test architecture. By considering the trade-off between test-application time, test-power budget, and test quality, the proposed technique provides test schedules with minimum test-application time under constraints on power consumption and fault coverage. Compared to previous work, the proposed technique can reduce test-application time by up to 53.4 for benchmark designs while achieving higher fault coverage. Since the loss in fault coverage due to multicast testing is extremely small, we can use top-off patterns to achieve full fault coverage for the dies at negligible additional cost.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhai:2018:ENG, author = "Jinyuan Zhai and Changhao Yan and Sheng-Guo Wang and Dian Zhou and Hai Zhou and Xuan Zeng", title = "An Efficient Non-{Gaussian} Sampling Method for High Sigma {SRAM} Yield Analysis", journal = j-TODAES, volume = "23", number = "3", pages = "36:1--36:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3174866", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Yield$^1$ analysis of SRAM is a challenging issue, because the failure rates of SRAM cells are extremely small. In this article, an efficient non-Gaussian sampling method of cross entropy optimization is proposed for estimating the high sigma SRAM yield. Instead of sampling with the Gaussian distribution in existing methods, a non-Gaussian distribution, i.e., a joint one-dimensional generalized Pareto distribution and ( n -1)-dimensional Gaussian distribution, is taken as the function family of practical distribution, which is proved to be more suitable to fit the ideal distribution in the view of extreme failure event. To minimize the cross entropy between practical and ideal distributions, a sequential quadratic programming solver with multiple starting points strategy is applied for calculating the optimal parameters of practical distributions. Experimental results show that the proposed non-Gaussian sampling is a $ 2.2$--$ 4.1 \times $ speedup over the Gaussian sampling, on the whole, it is about a $ 1.6$--$ 2.3 \times $ speedup over state-of-the-art methods with low- and high-dimensional cases without loss of accuracy", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lu:2018:FDR, author = "Guan-Ruei Lu and Chun-Hao Kuo and Kuen-Cheng Chiang and Ansuman Banerjee and Bhargab B. Bhattacharya and Tsung-Yi Ho and Hung-Ming Chen", title = "Flexible Droplet Routing in Active Matrix-Based Digital Microfluidic Biochips", journal = j-TODAES, volume = "23", number = "3", pages = "37:1--37:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3184388", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The active matrix (AM)-based architecture offers many advantages over conventional digital electrowetting-on-dielectric (EWOD) microfluidic biochips, such as the capability of handling variable-size droplets, more flexible droplet movement, and precise control over droplet navigation. However, a major challenge in choosing the routing paths is to decide when the droplets are to be reshaped depending on the congestion of the intended path, or split- and route sub droplets,and merging them at their respective destinations. As the number of microelectrodes in AM-EWOD chips is large, the path selection problem becomes further complicated. In this article, we propose a negotiation-guided flow based on routing of subdroplets that obviates the explicit need for deciding when the droplets are to be manipulated, yet fully utilizing the power of droplet reshaping, splitting, and merging them to facilitate their journey. The proposed algorithm reduces routing cost and provides more freedom in deadlock avoidance in the presence of multiple routing tasks by assigning certain congestion penalty for sibling subdroplets and fluidic penalty for heterogeneous droplets. Compared to existing techniques, it reduces latest arrival time by an average of 29\% for several benchmark and random test suites. Furthermore, our method is observed to provide 100\% routability of nets for all test cases, whereas existing and baseline routers fail to produce feasible solutions in many instances. We also propose a reliable mode droplet routing strategy where the number of unreliable splitting operations can be reduced by paying a small penalty on latest arrival time.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xie:2018:ADI, author = "Mimi Xie and Chen Pan and Mengying Zhao and Yongpan Liu and Chun Jason Xue and Jingtong Hu", title = "Avoiding Data Inconsistency in Energy Harvesting Powered Embedded Systems", journal = j-TODAES, volume = "23", number = "3", pages = "38:1--38:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3182170", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Energy harvesting is becoming a favorable alternative to power future generation embedded systems, as it is more environmentally and user friendly. However, energy harvesting powered embedded systems suffer from frequent execution interruption due to unstable energy supply. To tackle this problem, nonvolatile memory has been deployed to save the whole volatile state for computation. When power resumes, the processor can restore the state back to volatile memories and continue execution. However, without careful consideration, the process of checkpointing and resuming could cause inconsistency between volatile and nonvolatile memories, which leads to irreversible errors. In this article, we propose a consistency-aware adaptive checkpointing scheme that ensures correctness for all checkpoints. The proposed technique efficiently identifies all possible inconsistency positions in programs and inserts auxiliary code to ensure correctness by offline analysis. In addition, adaptive checkpointing assisted register file profiling and online tracking techniques further reduce the overhead of each checkpoint. Evaluation results show that the proposed checkpointing strategy can successfully eliminate inconsistency errors and greatly reduce the checkpointing overhead.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Arcaro:2018:RTG, author = "Lu{\'\i}s Fernando Arcaro and Karila {Palma Silva} and R{\^o}mulo {Silva De Oliveira}", title = "On the Reliability and Tightness of {GP} and Exponential Models for Probabilistic {WCET} Estimation", journal = j-TODAES, volume = "23", number = "3", pages = "39:1--39:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3185154", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As computer architectures evolve, guaranteeing that Real-Time Systems' (RTSs') timing requirements are met through Worst Case Execution Time (WCET) upper bounds becomes increasingly difficult. Techniques such as Measurement-Based Probabilistic Timing Analysis (MBPTA) have emerged that estimate WCET bounds exceeded only with arbitrarily low probabilities (i.e., pWCETs) through Extreme Value Theory (EVT). The Peaks Over Threshold (POT) approach for applying EVT involves adjusting a tail-shaped distribution, e.g., Generalized Pareto (GP) or Exponential, to the values that exceed a carefully selected high threshold. Several works suggest that GP should be used within POT for best representing different tail shapes, while others consider the Exponential model more adequate for providing upper bounds with increased reliability. This work presents empirical reliability and tightness evaluations of the pWCET estimates yielded by the GP and Exponential models while applying MBPTA through the POT approach. It mainly provides counter-evidence to the GP model reliability and evidence of the Exponential model adequacy in this context.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jassi:2018:GGB, author = "Munish Jassi and Yong Hu and Daniel Mueller-Gritschneder and Ulf Schlichtmann", title = "Graph-Grammar-Based {IP}-Integration ({GRIP}) --- An {EDA} Tool for Software-Defined {SoCs}", journal = j-TODAES, volume = "23", number = "3", pages = "40:1--40:??", month = apr, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3139381", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In modern system-on-chip (SoC) designs, IP-reuse is considered a driving force to increase productivity. To support various designs, a huge amount of Intellectual Property (IP) hardware blocks have been developed. The integration of those IPs into an SoC may require significant effort-up to days or weeks depending on experience and complexity. This article presents a novel approach to significantly reduce the design effort to bring-up a working SoC design by automatic IP integration as part of a library-based Software-defined SoC flow. In detail, the IP-supplier prepares a HW-accelerated software library (HASL) for the SoC architect, who wants to use the IP in an SoC design. As a key point of our approach, integration knowledge is encoded in the library as a set of integration rules. These rules are defined in the machine-readable standardized IP-XACT format by the IP supplier, who has a good knowledge of the IP's hardware details. The library preparation step on the IP supplier's side is also partly automated in the proposed flow, including a partial generation of configurable HW drivers, schedulers, and the software library functions. For the SoC architect, we have developed the graph-grammar-based IP-integration (GRIP) tool. The software application is developed using the functions supplied in the HASL. According to the calls to the HASL functions, the GRIP tool automatically integrates IP-blocks using the rule information supplied with the library and runs a full Design Space Exploration. For this, the SoC architecture and rules are transformed into the graph domain to apply graph rewriting methods. The GRIP tool is model-driven and based on the Eclipse Modeling Framework. With code generation techniques, SoC candidate architectures can be transformed to hardware descriptions for the target platform. The HW/SW interfaces between SW library functions and IP blocks can be automatically generated for bare-metal or Linux-based applications. The approach is demonstrated with two case-studies on the Xilinx Zynq-based ZedBoard evaluation board using a HASL for computer vision. It can yield $ 10 \times $--$ 150 \times $ performance improvement for the bare-metal application versions and $ 4 \times $--$ 7 \times $ performance improvement for the Linux-based application versions, when executed on an optimized HW-accelerated SoC architecture compared to a non HW-accelerated SoC. The effort for IP integration is comparable to using a software library, hence, providing a significant advantage over a manual IP integration.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chu:2018:ISS, author = "Chris Chu and Mustafa Ozdal", title = "Introduction to the Special Section on Advances in Physical Design Automation", journal = j-TODAES, volume = "23", number = "4", pages = "41:1--41:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3199220", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2018:UHP, author = "Wuxi Li and Yibo Lin and Meng Li and Shounak Dhar and David Z. Pan", title = "{UTPlaceF 2.0}: a High-Performance Clock-Aware {FPGA} Placement Engine", journal = j-TODAES, volume = "23", number = "4", pages = "42:1--42:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3174849", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern field-programmable gate array (FPGA) devices contain complex clock architectures on top of configurable logics. Unlike application specific integrated circuits (ASICs), the physical structure of clock networks in an FPGA is pre-manufactured and cannot be adjusted to different applications. Furthermore, clock routing resources are typically limited for high-utilization designs. Consequently, clock architectures impose extra clock constraints and further complicate physical implementation tasks such as placement. Traditional ASIC placement techniques only optimize conventional design metrics such as wirelength, routability, power, and timing without clock legality consideration. It is imperative to have new techniques to honor clock constraints during placement for FPGAs. In this article, we propose a high-performance FPGA placement engine, UTPlaceF 2.0, that optimizes wirelength and routability while honoring complex clock constraints. Our proposed approaches consist of an iterative minimum-cost-flow-based cell assignment as well as a clock-aware packing for producing clock-legal yet high-quality placement solutions. UTPlaceF 2.0 won first place in the ISPD'17 clock-aware FPGA placement contest organized by Xilinx, outperforming the second- and the third-place winners by 4.0\% and 10.0\%, respectively, in routed wirelength with competitive runtime, on a set of industry benchmarks.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Darav:2018:ELH, author = "Nima Karimpour Darav and Ismail S. Bustany and Andrew Kennings and David Westwick and Laleh Behjat", title = "{Eh?Legalizer}: a High Performance Standard-Cell Legalizer Observing Technology Constraints", journal = j-TODAES, volume = "23", number = "4", pages = "43:1--43:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3158215", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The legalization step is performed after global placement where wire length and routability are optimized or during timing optimization where buffer insertion or gate sizing are applied to meet timing requirements. Therefore, an ideal legalization approach must preserve the quality of the input placement in terms of routability, wire length, and timing constraints. These requirements indirectly impose maximum and average cell movement constraints during legalization. In addition, the legalization step should effectively manage white space availability with a highly efficient runtime in order to be used in an iterative process such as timing optimization. In this article, a robust and fast legalization method called Eh?Legalizer for standard-cell placement is presented. Eh?Legalizer legalizes input placements while minimizing the maximum and average cell movements using a highly efficient novel network flow-based approach. In contrast to the traditional network flow-based legalizers, areas with high cell utilizations are effectively legalized by finding several candidate paths and there is no need for a post-process step. The experimental results conducted on several benchmarks show that Eh?Legalizer results in 2.5 times and 3.3 times less the maximum and average cell movement, respectively, while its runtime is significantly ($ 18 \times $) lower compared to traditional legalizers. In addition, the experimental results illustrate the scalability and robustness of Eh?Legalizer with respect to the floorplan complexity. Finally, the detailed-routing results show detailed-routing violations are reduced on average by 23\% when Eh?Legalizer is used to generate legal solutions.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Wang:2018:VAG, author = "Chen Wang and Yanan Sun and Shiyan Hu and Li Jiang and Weikang Qian", title = "Variation-Aware Global Placement for Improving Timing-Yield of Carbon-Nanotube Field Effect Transistor Circuit", journal = j-TODAES, volume = "23", number = "4", pages = "44:1--44:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3175500", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the conventional silicon-based CMOS technology marches toward the sub-10nm region, the problem of high power density becomes increasingly serious. Under this circumstance, the carbon-nanotube field effect transistors (CNFETs) emerge as a promising alternative to the conventional silicon-based CMOS devices. However, they experience a much larger variation than the silicon-based CMOS devices, which results in a large circuit delay variation and hence, a significant timing yield loss. One of the main variation sources is the carbon-nanotube (CNT) density variation. However, it shows a special property not existing for silicon-based CMOS devices, namely the asymmetric spatial correlation. In this work, we propose novel global placement algorithms to reduce the timing yield loss caused by the CNT density variation. To effectively reduce the statistical circuit delay, we first develop a statistical delay measure for a segment of gates. Based on this measure, we further develop a segment-based strategy and a path-based placement strategy to reduce the delays of the statistically critical paths. Experimental results demonstrated that both of our approaches effectively improve the timing yield.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2018:MRB, author = "Kuen-Wey Lin and Yeh-Sheng Lin and Yih-Lang Li and Rung-Bin Lin", title = "A Maze Routing-Based Methodology With Bounded Exploration and Path-Assessed Retracing for Constrained Multilayer Obstacle-Avoiding Rectilinear {Steiner} Tree Construction", journal = j-TODAES, volume = "23", number = "4", pages = "45:1--45:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177878", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Owing to existing intellectual properties, prerouted nets, and power/ground wires, the routing of a system on chip design demands to detour around multilayer obstacles. Traditional approaches for the multilayer obstacle-avoiding rectilinear Steiner tree (ML-OARST) problem are thus nonmaze routing-based approaches for runtime issues, yet they cannot be directly applied to deal with additional constraints such as variant edge weights on a routing layer. In this article, we propose the maze routing-based methodology with bounded exploration and path-assessed retracing to reduce runtime and routing cost for the constrained ML-OARST construction problem. The exploration of maze routing is bounded to reduce the runtime; the costs of connecting pins are computed to select Steiner points in the retracing phase. To further reduce the routing cost, we develop a Steiner point-based ripping-up and rebuilding scheme for altering tree topology. Experimental results on industrial and randomly generated benchmarks demonstrate that the proposed methodology can provide a solution with good quality in terms of routing cost and has a significant speedup compared to traditional maze routing. A commercial tool is also used to show the effectiveness of the proposed methodology.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jiao:2018:OER, author = "Fengxian Jiao and Sheqin Dong", title = "Ordered Escape Routing with Consideration of Differential Pair and Blockage", journal = j-TODAES, volume = "23", number = "4", pages = "46:1--46:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3185783", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Ordered escape routing is a critical issue in high-speed PCB routing. Differential pair and thermal-blockage-avoided are useful in PCB design to obtain high noise immunity and low electromagnetic interference. In this article, a Min-cost Multi-commodity Flow (MMCF) approach is proposed to solve the ordered escape routing. First, the characteristic of grid pin array and staggered pin array is analyzed and then a basic network model is used to convert ordered escape routing to MMCF model. To satisfy the constraints of ordered escape routing, three novel transformations, such as non-crossing transformation, ordering transformation, and capacity transformation, are used to convert the basic network model to the final correct MMCF model. After that, the differential pair in ordered escape routing is discussed. Finally, a method to deal with the blockage issue is proposed. Experimental results show that our method achieves 100\% routability for all the test cases. The method can get both a feasible solution and an optimal solution for ordered escape routing. Compared to published approaches, our method improves in both wire length and CPU time remarkably. At the same time, the proposed method can effectively avoid the blockage and deal with the differential pair.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Liu:2018:RML, author = "Bo Liu and Gong Chen and Bo Yang and Shigetoshi Nakatake", title = "Routable and Matched Layout Styles for Analog Module Generation", journal = j-TODAES, volume = "23", number = "4", pages = "47:1--47:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3182169", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Two$^1$ novel automatic generation methods for analog layout-a symmetrical twin-row method for MOS transistors and a twisted common-centroid method for capacitor arrays-are introduced. Based on the proposed layout styles and the corresponding algorithms, the symmetry and common-centroid placement patterns for analog devices are realized to guarantee matching properties. On this basis, as the most prominent contribution of this article, channel routing-based algorithms for the proposed layout styles are presented and could achieve 100\% routability due to well-arranged devices and corresponding low routing complexity. The algorithms benefits include a small layout area that maximizes the diffusion-sharing of MOS transistors and less routing layer usage for common-centroid device arrays. Moreover, we successfully applied our algorithms to the layout designs of two typical analog modules including a two-stage operating amplifier and a Successive Approximation Register Analog-to-Digital Converter (SAR-ADC). The generated layouts and the circuit simulation results demonstrate the effectiveness of our algorithms in terms of their routability and matching properties. Our algorithms can also be extended to apply to a variety of essential MOS analog circuits.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2018:ICA, author = "Pei-Yu Lee and Iris Hui-Ru Jiang", title = "{iTimerM}: a Compact and Accurate Timing Macro Model for Efficient Hierarchical Timing Analysis", journal = j-TODAES, volume = "23", number = "4", pages = "48:1--48:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3149818", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As designs continue to grow in size and complexity, EDA paradigm shifts from flat to hierarchical timing analysis. In this article, we present compact and accurate timing macro modeling, which is the key to efficient and accurate hierarchical timing analysis. Our goal is to contain only a minimal amount of interface logic in our timing macro model. The main idea is to separate the interface logic into variant and constant timing regions. Then, the variant timing region is reserved for accuracy, while the constant timing region is reduced for compactness. For reducing the constant timing region, we propose anchor pin insertion and deletion by generalizing existing timing graph reduction techniques. Furthermore, we devise a lookup table index selection technique to achieve high model accuracy over the possible operating condition range. Compared with two common models used in industry, extracted timing model and interface logic model, our model has high model accuracy and small model size. Based on the TAU 2016 and 2017 timing macro modeling contest benchmark suites, our results show that our algorithm delivers superior efficiency and accuracy: Hierarchical timing analysis using our model can significantly reduce runtime and memory compared with flat timing analysis on the original design. Moreover, our algorithm outperforms TAU 2016 and 2017 contest winners in model accuracy, model size, model generation performance, and model usage performance.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sadat:2018:OAL, author = "Sayed Abdullah Sadat and Mustafa Canbolat and Sel{\c{c}}uk K{\"o}se", title = "Optimal Allocation of {LDOs} and Decoupling Capacitors within a Distributed On-Chip Power Grid", journal = j-TODAES, volume = "23", number = "4", pages = "49:1--49:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3177877", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Parallel on-chip voltage regulation, where multiple regulators are connected to the same power grid, has recently attracted significant attention with the proliferation of small on-chip voltage regulators. In this article, the number, size, and location of parallel low-dropout (LDO) regulators and intentional decoupling capacitors are optimized using mixed integer non-linear programming formulation. The proposed optimization function concurrently considers multiple objectives such as area, power noise, and overall power consumption. Certain objectives are optimized by putting constraints on the other objectives with the proposed technique. Additional constraints have been added to avoid the overlap of LDOs and decoupling capacitors in the optimization process. The results of an optimized LDO allocation in the POWER8 chip is compared with the recent LDO allocation in the same IBM chip in a case study where a 20\% reduction in the noise is achieved. The results of the proposed multi-criteria objective function under a different area, power, and noise constraints are also evaluated with a sample ISPD'11 benchmark circuits in another case study.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Cakir:2018:RED, author = "Burcin Cakir and Sharad Malik", title = "Reverse Engineering Digital {ICs} through Geometric Embedding of Circuit Graphs", journal = j-TODAES, volume = "23", number = "4", pages = "50:1--50:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3193121", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Outsourcing of design and manufacturing processes makes integrated circuits (ICs) vulnerable to adversarial changes and raises concerns about their integrity. Reverse engineering the manufactured netlist helps identify malicious insertions. In this article, we present an automated approach that, given a reference design description with high-level blocks, infers these blocks in an untrusted gate-level (test) implementation. Using the graph connectivity of the netlists, we compute a geometric embedding for each wire in the circuits, which, then, is used to compute a bipartite matching between the nodes of the two designs and identify high-level blocks in the test circuit. Experiments to evaluate the efficacy of the proposed technique on various-sized designs, including the multi-core processor OpenSparc T1, show that it can correctly match over 90\% of gates in the test circuit to their corresponding block in the reference model.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Ittershagen:2018:IFM, author = "Philipp Ittershagen and Kim Gr{\"u}ttner and Wolfgang Nebel", title = "An Integration Flow for Mixed-Critical Embedded Systems on a Flexible Time-Triggered Platform", journal = j-TODAES, volume = "23", number = "4", pages = "51:1--51:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3190837", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The rise of mixed-critical embedded systems imposes novel challenges on the specification, development, and functional validation in a design flow. In the emerging dynamic scheduling context of mixed-criticality platforms, the system behaviour needs to be estimated in an early step in the design flow to assess the integration impact, especially for quality of service-driven, low-critical subsystems. We provide a modelling and integration flow for specifying, estimating, and evaluating software functions, ranging from an initial executable specification to an implementation candidate on an MPSoC. Based on a data-driven model to evaluate dynamic resource consumption effects of high-critical subsystems and the scheduling overhead, we propose a systematic method for constructing workload models of high-critical software components on the target. Our proxies provide an integration environment for low-critical functions by mimicking the high-critical temporal behaviour on the target. By integrating a low-critical video encoding subsystem with a benchmark suite as the high-critical subsystem we show that the performance model allows for evaluating end-to-end execution times in the low-critical function with an average error of 0.37\% and the application proxy only introduces a maximum error of 1.14\% in a performance evaluation.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2018:ESA, author = "Yung-Chih Chen", title = "Enhancements to {SAT} Attack: Speedup and Breaking Cyclic Logic Encryption", journal = j-TODAES, volume = "23", number = "4", pages = "52:1--52:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3190853", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Logic encryption is an IC protection technique for preventing an IC design from overproduction and unauthorized use. It hides a design's functionality by inserting key gates and key inputs, such that a secret key is required to activate the design and make it function correctly. The security of a logic encryption algorithm is evaluated according to the difficulty of cracking the secret key. The state-of-the-art attack method identifies a secret key with a series of SAT-solving calls to prune all the incorrect keys. Although it can break most of the existing logic encryption algorithms within a few hours, we observe that there exist two enhancements for increasing its efficiency. First, we introduce a preprocess to identify and eliminate redundant key inputs and simplify SAT problems. Second, we present a key checking process for increasing the pruned incorrect keys in each SAT-solving iteration. We conducted the experiments on a set of benchmark circuits encrypted by six different logic encryption algorithms. The simulation results show that the enhanced method can successfully unlock 10 benchmark circuits which originally could not be cracked within 1 hour. For all the benchmark circuits, the average speedup is approximately 2.2x in terms of simulation time. Furthermore, a recent logic encryption method locks a design by creating cyclic paths, which can invalidate the SAT-based attack method. We analyze the impact of cyclic paths and propose an enhancement to break the cyclic logic encryption method.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2018:PIP, author = "Irith Pomeranz", title = "Partially Invariant Patterns for {LFSR}-Based Generation of Close-to-Functional Broadside Tests", journal = j-TODAES, volume = "23", number = "4", pages = "53:1--53:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3201405", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Close-to-functional scan-based tests are expected to create close-to-functional operation conditions in order to avoid overtesting of delay faults. Existing metrics for the proximity to functional operation conditions are based on the scan-in state. For example, they consider the distance between the scan-in state and a reachable state (a state that the circuit can visit during functional operation). However, the deviation from functional operation conditions can increase during a test beyond the deviation that is measured by the scan-in state. To ensure that the deviation does not increase, this article introduces the concept of a partially invariant pattern. The article describes a procedure for extracting partially invariant patterns from functional broadside tests whose scan-in states are reachable states. Being partially specified, partially invariant patterns are suitable for test data compression. The article studies the use of partially invariant patterns for linear-feedback shift-register ( LFSR ) based test data compression. Noting that a seed may not exist for a given partially invariant pattern with a given LFSR, the procedure described in this article uses an iterative process that not only matches a seed to a partially invariant pattern, but also adjusts the partially invariant pattern based on the test that the seed produces. The article also addresses the selection of LFSR's for the generation of close-to-functional broadside tests based on partially invariant patterns. Experimental results are presented to demonstrate the feasibility of the procedure.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zhao:2018:TSB, author = "Hengyang Zhao and Qi Hua and Hai-Bao Chen and Yaoyao Ye and Hai Wang and Sheldon X.-D. Tan and Esteban Tlelo-Cuautle", title = "Thermal-Sensor-Based Occupancy Detection for Smart Buildings Using Machine-Learning Methods", journal = j-TODAES, volume = "23", number = "4", pages = "54:1--54:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3200904", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we propose a novel approach to detect the occupancy behavior of a building through the temperature and/or possible heat source information. The new method can be used for energy reduction and security monitoring for emerging smart buildings. Our work is based on a building simulation program, EnergyPlus, from the Department of Energy. EnergyPlus can model various time-series inputs to a building such as ambient temperature; heating, ventilation, and air-conditioning (HVAC) inputs; power consumption of electronic equipment; lighting; and number of occupants in a room, sampled each hour, and produce resulting temperature traces of zones (rooms). Two machine-learning-based approaches for detecting human occupancy of a smart building are applied herein, namely support vector regression (SVR) and recurrent neural network (RNN). Experimental results with SVR show that the four-feature model provides accurate detection rates, giving a 0.638 average error and 5.32\% error rate, and the five-feature model delivers a 0.317 average error and 2.64\% error rate. This indicates that SVR is a viable option for occupancy detection. In the RNN method, Elman's RNN can estimate occupancy information of each room of a building with high accuracy. It has local feedback in each layer and, for a five-zone building, it is very accurate for occupancy behavior estimation. The error level, in terms of number of people, can be as low as 0.0056 on average and 0.288 at maximum, considering ambient, room temperatures, and HVAC powers as detectable information. Without knowing HVAC powers, the estimation error can still be 0.044 on average, and only 0.71\% estimated points have errors greater than 0.5. Our article further shows that both methods deliver similar accuracy in the occupancy detection. But the SVR model is more stable for adding or removing features of the system, while the RNN method can deliver more accuracy when the features used in the model do not change a lot.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Shalu:2018:DDS, author = "Shalu and Srijan Kumar and Ananya Singla and Sudip Roy and Krishnendu Chakrabarty and Partha P. Chakrabarti and Bhargab B. Bhattacharya", title = "Demand-Driven Single- and Multitarget Mixture Preparation Using Digital Microfluidic Biochips", journal = j-TODAES, volume = "23", number = "4", pages = "55:1--55:??", month = jul, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3200903", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:39 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent studies in algorithmic microfluidics have led to the development of several techniques for automated solution preparation using droplet-based digital microfluidic (DMF) biochips. A major challenge in this direction is to produce a mixture of several reactants with a desired ratio while optimizing reactant cost and preparation time. The sequence of mix-split operations that are to be performed on the droplets is usually represented as a mixing tree (or graph). In this article, we present an efficient mixing algorithm, namely, Mixing Tree with Common Subtrees ( MTCS ), for preparing single-target mixtures. MTCS attempts to best utilize intermediate droplets, which were otherwise wasted, and uses morphing based on permutation of leaf nodes to further reduce the graph size. The technique can be generalized to produce multitarget ratios, and we present another algorithm, namely, Multiple Target Ratios ( MTR ). Additionally, in order to enhance the output load, we also propose an algorithm for droplet streaming called Multitarget Multidemand ( MTMD ). Simulation results on a large set of target ratios show that MTCS can reduce the mean values of the total number of mix-split steps ($ T_{ms}$) and waste droplets ($W$) by 16\% and 29\% over Min-Mix (Thies et al. 2008) and by 22\% and 34\% over RMA (Roy et al. 2015), respectively. Experimental results also suggest that MTR can reduce the average values of T$_{ms}$ and W by 23\% and 44\% over the repeated version of Min-Mix, by 30\% and 49\% over the repeated version of RMA, and by 9\% and 22\% over the repeated-version of MTCS, respectively. It is observed that MTMD can reduce the mean values of T$_{ms}$ and W by 64\% and 85\%, respectively, over MTR. Thus, the proposed multitarget techniques MTR and MTMD provide efficient solutions to multidemand, multitarget mixture preparationon a DMF platform.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2018:DML, author = "Hantao Huang and Hang Xu and Yuehua Cai and Rai Suleman Khalid and Hao Yu", title = "Distributed Machine Learning on Smart-Gateway Network toward Real-Time Smart-Grid Energy Management with Behavior Cognition", journal = j-TODAES, volume = "23", number = "5", pages = "56:1--56:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3209888", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Real-time data analytics for smart-grid energy management is challenging with consideration of both occupant behavior profiles and energy profiles. This article proposes a distributed and networked machine-learning platform on smart-gateway-based smart-grid in residential buildings. It can analyze occupant behaviors, provide short-term load forecasting, and allocate renewable energy resources. First, occupant behavior profile is captured by real-time indoor positioning system with WiFi data analytics; and the energy profile is extracted by real-time meter system with electricity load data analytics. Then, the 24-hour occupant behavior profile and energy profile are fused with prediction using an online distributed machine-learning algorithm with real-time data update. Based on the forecasted occupant behavior profile and energy profile, solar energy source is allocated to reduce peak demand on the main electricity power-grid. The whole management flow can be operated on the distributed smart-gateway network with limited computational resources but with a supported general machine-learning engine. Experimental results on occupant behavior extraction show that the proposed algorithm can achieve 91.2\% positioning accuracy within 3.64m. Moreover, $ 50 \times $ and $ 38 \times $ speed-up is obtained during data testing and training, respectively, when compared to traditional support vector machine (SVM) method. For short-term load forecasting, it is 14.83\% more accurate when compared to SVM-based data analytics. Based on the predicted occupant behavior profile and energy profile, our proposed energy management system can achieve 19.66\% more peak load reduction and 26.41\% more cost saving as compared to the SVM-based method.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zoni:2018:CSC, author = "Davide Zoni and Alessandro Barenghi and Gerardo Pelosi and William Fornaciari", title = "A Comprehensive Side-Channel Information Leakage Analysis of an In-Order {RISC CPU} Microarchitecture", journal = j-TODAES, volume = "23", number = "5", pages = "57:1--57:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3212719", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Side-channel attacks are a prominent threat to the security of embedded systems. To perform them, an adversary evaluates the goodness of fit of a set of key-dependent power consumption models to a collection of side-channel measurements taken from an actual device, identifying the secret key value as the one yielding the best-fitting model. In this work, we analyze for the first time the microarchitectural components of a 32-bit in-order RISC CPU, showing which one of them is accountable for unexpected side-channel information leakage. We classify the leakage sources, identifying the data serialization points in the microarchitecture and providing a set of hints that can be fruitfully exploited to generate implementations resistant against side-channel attacks, either writing or generating proper assembly code.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Seo:2018:NIS, author = "Minjun Seo and Roman Lysecky", title = "Non-Intrusive In-Situ Requirements Monitoring of Embedded System", journal = j-TODAES, volume = "23", number = "5", pages = "58:1--58:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3206213", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accounting for all operating conditions of a system at the design stage is typically infeasible for complex systems. Monitoring and verifying system requirements at runtime enable a system to continuously and introspectively ensure the system is operating correctly in the presence of dynamic execution scenarios. In this article, we present a requirements-driven methodology enabling efficient runtime monitoring of embedded systems. The proposed approach extracts a runtime monitoring graph from system requirements specified using UML sequence diagrams. Non-intrusive, on-chip hardware dynamically monitors the system execution, verifies the execution adheres to the requirements model, and in the event of a failure provides detailed information that can be analyzed to determine the root cause. Using case studies of an autonomous vehicle and pacemaker prototypes, we analyze the relationship between event coverage, detection rate, and hardware requirements", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2018:DDP, author = "Irith Pomeranz", title = "Dynamically Determined Preferred Values and a Design-for-Testability Approach for Multiplexer Select Inputs under Functional Test Sequences", journal = j-TODAES, volume = "23", number = "5", pages = "59:1--59:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3219778", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Earlier works observed that certain primary inputs have preferred values, which help increase the gate-level fault coverage when they appear in a functional test sequence. This article observes that multiplexers present additional opportunities for increasing the fault coverage of a functional test sequence, which are not captured by preferred primary input values. Because multiplexers are prevalent, their effect on the fault coverage can be significant. A static analysis that is independent of any functional test sequence is performed in this article to identify preferred values for the outputs of multiplexers. This is followed by a dynamic analysis that adjusts the select inputs of the multiplexers for a given functional test sequence to ensure that the preferred values appear on the outputs of the multiplexers more often. The analysis yields design-for-testability logic for the select inputs of the multiplexers that have preferred values. The logic is independent of the functional test sequence, and it allows the fault coverage to be increased when the select inputs are not primary inputs, or when the same select inputs are used for different multiplexers. Experimental results are presented to demonstrate that this approach has a significant effect on the fault coverage of functional test sequences.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lee:2018:PTT, author = "Dongjin Lee and Sourav Das and Janardhan Rao Doppa and Partha Pratim Pande and Krishnendu Chakrabarty", title = "Performance and Thermal Tradeoffs for Energy-Efficient Monolithic {$3$D} Network-on-Chip", journal = j-TODAES, volume = "23", number = "5", pages = "60:1--60:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3223046", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Three-dimensional (3D) integration enables the design of high-performance and energy-efficient network on chip (NoC) architectures as communication backbones for manycore chips. To exploit the benefits of the vertical dimension of 3D integration, through-silicon-via (TSV) has been predominantly used in state-of-the-art manycore chip design. However, for TSV-based systems, high power density and the resultant thermal hotspot remain major concerns from the perspectives of chip functionality and overall reliability. The power consumption and thermal profiles of 3D NoCs can be improved by incorporating a Voltage-Frequency-Island (VFI)-based power management strategy. However, due to inherent thermal constraints of a TSV-based 3D system, we are unable to fully exploit the benefits offered by the power management methodology. In this context, emergence of monolithic 3D (M3D) integration has opened up new possibility of designing ultra-low-power and high-performance circuits and systems. The smaller dimensions of the inter-layer dielectric (ILD) and monolithic inter-tier vias (MIVs) offer high-density integration, flexibility of partitioning logic blocks across multiple tiers, and significant reduction of total wire-length. In this work, we present the first-ever study of the performance-thermal tradeoffs for energy efficient monolithic 3D manycore chips. In particular, we present a comparative performance evaluation of M3D NoCs with respect to their conventional TSV-based counterparts. We demonstrate that the proposed M3D-based NoC architecture incorporating VFI-based power management achieves a maximum of 29.4\% lower energy-delay-product (EDP) compared to the TSV-based designs for a large set of benchmarks. We also demonstrate that the M3D-based NoC shows up to 29.1\% lower maximum temperature than the TSV-based counterpart for these benchmarks.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Han:2018:FCS, author = "Inhak Han and Youngsoo Shin", title = "Folded Circuit Synthesis: Min-Area Logic Synthesis Using Dual-Edge-Triggered Flip-Flops", journal = j-TODAES, volume = "23", number = "5", pages = "61:1--61:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3229082", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The area required by combinational logic of a sequential circuit based on standard flip-flops can be reduced by identifying subcircuits that are identical. Pairs of matching subcircuits can then be replaced by circuits in which dual-edge-triggered flip-flops operate on multiplexed data at the rising and falling edges of the clock signal. We show how to modify the Boolean network describing a combinational logic to increase the opportunities for folding, without affecting its function. Experiments with benchmark circuits achieved an average reduction in circuit area of 18\%.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Elmandouh:2018:GFV, author = "Eman M. Elmandouh and Amr G. Wassal", title = "Guiding Formal Verification Orchestration Using Machine Learning Methods", journal = j-TODAES, volume = "23", number = "5", pages = "62:1--62:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3224206", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Typical modern HW designs include many blocks associated with thousands of design properties. Having today's commercial formal verifiers utilize a complementary set of state-of-art formal algorithms is a key in enabling the formal verification tools to successfully cope with verification problems of different sizes, types, and complexities. Formal engines orchestration is the methodology used to pick the most appropriate formal engine for a specific verification problem. It assures proper scheduling of the formal engines to minimize the time consumed to solve individual design verification problems, hence highly impacts the time required to verify the overall design properties. This work proposes the utilization of supervised machine learning classification techniques to guide the orchestration step by predicting the formal engines that should be assigned to a design property. Up to 16,500 formal verification runs on RTL designs and their properties are used to train the classifier to create a prediction model. The classifier assigns any new verification problem to an appropriate list of formal engines associated with a probability distribution over the set of engines classes. Our results indicate how the proposed model is able to improve the formal suite total run-time by up to 59\% of its maximum allowable time improvement using multi-classification-based orchestration and to nominate with 88\% accuracy the appropriate formal engines for new-to-verify HW designs.", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{K:2018:AAF, author = "Keerthi K and Chester Rebeiro and Aritra Hazra", title = "An Algorithmic Approach to Formally Verify an {ECC} Library", journal = j-TODAES, volume = "23", number = "5", pages = "63:1--63:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3224205", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The weakest link in cryptosystems is quite often due to the implementation rather than the mathematical underpinnings. A vast majority of attacks in the recent past have targeted programming flaws and bugs to break security systems. Due to the complexity, empirically verifying such systems is practically impossible, while manual verification as well as testing do not provide adequate guarantees. In this article, we leverage model checking techniques to prove the functional correctness of an elliptic curve cryptography (ECC) library with respect to its formal specification. We demonstrate how the huge state space of the C library can be aptly verified using a hierarchical assume-guarantee verification strategy. To test the scalability of this approach, we verify the correctness of five NIST-specified elliptic curve implementations. We also verify the newer curve25519 elliptic curve, which is finding multiple applications, due to its higher security and simpler implementation. The 192-bit NIST elliptic curve took 1 day to verify. This was the smallest curve we verified. The largest curve with a 521-bit prime field took 26 days to verify. Curve25519 took 1.5 days to verify.", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2018:EFM, author = "Tseng-Yi Chen and Yuan-Hao Chang and Yuan-Hung Kuan and Ming-Chang Yang and Yu-Ming Chang and Pi-Cheng Hsiu", title = "Enhancing Flash Memory Reliability by Jointly Considering Write-back Pattern and Block Endurance", journal = j-TODAES, volume = "23", number = "5", pages = "64:1--64:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3229192", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Owing to high cell density caused by the advanced manufacturing process, the reliability of flash drives turns out to be rather challenging in flash system designs. To enhance the reliability of flash drives, error-correcting code (ECC) has been widely utilized in flash drives to correct error bits during programming/reading data to/from flash drives. Although ECC can effectively enhance the reliability of flash drives by correcting error bits, the capability of ECC would degrade while the program/erase (P/E) cycles of flash blocks is increased. Finally, ECC could not correct a flash page, because a flash page contains too many error bits. As a result, reducing error bits is an effective solution to further improve the reliability of flash drives when a specific ECC is adopted in the flash drive. This work focuses on how to reduce the probability of producing error bits in a flash page. Thus, we propose a pattern-aware write strategy for flash reliability enhancement. The proposed write strategy considers both the P/E cycle of blocks and the pattern of written data while a flash block is allocated to store the written data. Since the proposed write strategy allocates young blocks (respectively, old blocks) for hot data (respectively, cold data) and flips the bit pattern of the written data to the appropriate bit pattern, the proposed strategy can effectively improve the reliability of flash drives. The experimental results show that the proposed strategy can reduce the number of error pages by up to 50\%, compared with the well-known DFTL solution. Moreover, the proposed strategy is orthogonal with all ECC mechanisms so that the reliability of the flash drives with ECC mechanisms can be further improved by the proposed strategy.", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xie:2018:TER, author = "Guoqi Xie and Zhetao Li and Na Yuan and Renfa Li and Keqin Li", title = "Toward Effective Reliability Requirement Assurance for Automotive Functional Safety", journal = j-TODAES, volume = "23", number = "5", pages = "65:1--65:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3230620", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Automotive functional safety requirement includes response time and reliability requirements learning from the functional safety standard ISO 26262. These two requirements must be simultaneously satisfied to assure automotive functional safety requirement. However, increasing reliability increases the response time intuitively. This study proposes a method to find the solution with the minimum response time while assuring reliability requirement. Pre-assigning reliability values to unassigned tasks by transferring the reliability requirement of the function to each task is a useful reliability requirement assurance approach proposed in recent years. However, the pre-assigned reliability values in state-of-the-art studies have unbalanced distribution of the reliability of all tasks, thereby resulting in a limited reduction in response time. This study presents the geometric mean-based non-fault-tolerant reliability pre-assignment (GMNRP) and geometric mean-based fault-tolerant reliability pre-assignment (GMFRP) approaches, in which geometric mean-based reliability values are pre-assigned to unassigned tasks. Geometric mean can make the pre-assigned reliability values of unassigned tasks to the central tendency, such that it can distribute the reliability requirements in a more balanced way. Experimental results show that GMNRP and GMFRP can effectively reduce the response time compared with their individual state-of-the-art counterparts.", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Abuowaimer:2018:GRD, author = "Ziad Abuowaimer and Dani Maarouf and Timothy Martin and Jeremy Foxcroft and Gary Gr{\'e}wal and Shawki Areibi and Anthony Vannelli", title = "{GPlace3.0}: Routability-Driven Analytic Placer for {UltraScale FPGA} Architectures", journal = j-TODAES, volume = "23", number = "5", pages = "66:1--66:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3233244", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Optimizing for routability during FPGA placement is becoming increasingly important, as failure to spread and resolve congestion hotspots throughout the chip, especially in the case of large designs, may result in placements that either cannot be routed or that require the router to work excessively hard to obtain success. In this article, we introduce a new, analytic routability-aware placement algorithm for Xilinx UltraScale FPGA architectures. The proposed algorithm, called GPlace3.0, seeks to optimize both wirelength and routability. Our work contains several unique features including a novel window-based procedure for satisfying legality constraints in lieu of packing, an accurate congestion estimation method based on modifications to the pathfinder global router, and a novel detailed placement algorithm that optimizes both wirelength and external pin count. Experimental results show that compared to the top three winners at the recent ISPD'16 FPGA placement contest, GPlace3.0 is able to achieve (on average) a 7.53\%, 15.15\%, and 33.50\% reduction in routed wirelength, respectively, while requiring less overall runtime. As well, an additional 360 benchmarks were provided directly from Xilinx Inc. These benchmarks were used to compare GPlace3.0 to the most recently improved versions of the first- and second-place contest winners. Subsequent experimental results show that GPlace3.0 is able to outperform the improved placers in a variety of areas including number of best solutions found, fewest number of benchmarks that cannot be routed, runtime required to perform placement, and runtime required to perform routing.", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Fallahzadeh:2018:TPC, author = "Ramin Fallahzadeh and Hassan Ghasemzadeh", title = "Trading Off Power Consumption and Prediction Performance in Wearable Motion Sensors: an Optimal and Real-Time Approach", journal = j-TODAES, volume = "23", number = "5", pages = "67:1--67:??", month = oct, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3198457", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Power consumption is identified as one of the main complications in designing practical wearable systems, mainly due to their stringent resource limitations. When designing wearable technologies, several system-level design choices, which directly contribute to the energy consumption of these systems, must be considered. In this article, we propose a computationally lightweight system optimization framework that trades off power consumption and performance in connected wearable motion sensors. While existing approaches exclusively focus on one or a few hand-picked design variables, our framework holistically finds the optimal power-performance solution with respect to the specified application need. Our design tackles a multi-variant non-convex optimization problem that is theoretically hard to solve. To decrease the complexity, we propose a smoothing function that reduces this optimization to a convex problem. The reduced optimization is then solved in linear time using a devised derivative-free optimization approach, namely cyclic coordinate search. We evaluate our framework against several holistic optimization baselines using a real-world wearable activity recognition dataset. We minimize the energy consumption for various activity-recognition performance thresholds ranging from 40\% to 80\% and demonstrate up to 64\% energy savings.", acknowledgement = ack-nhfb, articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Daboul:2018:AAT, author = "Siad Daboul and Stephan Held and Jens Vygen and Sonja Wittke", title = "An Approximation Algorithm for Threshold Voltage Optimization", journal = j-TODAES, volume = "23", number = "6", pages = "68:1--68:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3232538", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a primal-dual approximation algorithm for minimizing the leakage power of an integrated circuit by assigning gate threshold voltages. While most existing techniques do not provide a performance guarantee, we prove an upper bound on the power consumption. The algorithm is practical and works with an industrial sign-off timer. It can be used for post-routing power reduction or for optimizing leakage power throughout the design flow. We demonstrate the practical performance on recent microprocessor units. Our implementation obtains significant leakage power reductions of up to 8\% on top of one of the most successful algorithms for gate sizing and threshold voltage optimization. After timing-aware global routing, we achieve leakage power reductions of up to 34\%.", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Delledonne:2018:CDA, author = "Lorenzo Delledonne and Vittorio Zaccaria and Ruggero Susella and Guido Bertoni and Filippo Melzani", title = "{CASCA}: a Design Automation Approach for Designing Hardware Countermeasures Against Side-Channel Attacks", journal = j-TODAES, volume = "23", number = "6", pages = "69:1--69:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3241047", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Implementing a cryptographic circuit poses challenges not always acknowledged in the backing mathematical theory. One of them is the vulnerability against side-channel attacks. A side-channel attack is a procedure that uses information leaked by the circuit through, for example, its own power consumption or electromagnetic emissions, to derive sensitive data (e.g, the secret key used for encryption). Nowadays, we design circuitry to keep this sensitive information from leaking (i.e., a countermeasure ), but the path from specification down to implementation is far from being fully automatic. As we know, manual refinement steps can be error prone and the sheer potential of these errors can be devastating in a scenario such as the one we are dealing with. In this article, we investigate whether a single embedded domain specific language (EDSL) can, at the same time, help us in specifying and enforcing the functionality of the circuit as well as its protection against side-channel attacks. The EDSL is a fundamental block of an original design flow (named Countermeasure Against Side-Channel Attacks, i.e., CASCA) whose aim is to complement an existing industrial scenario and to provide the necessary guarantee that a secure primitive is not vulnerable up to a first-order attack. As a practical case study, we will show how we applied the proposed tools to ensure both functional and extra-functional correctness of a composite-field Advanced Encryption Standard (AES) S-Box. To ensure the reproducibility of this research, this article is accompanied by an open source release of the EDSL$^1$ that contains the presented S-Box implementation and an additional 3-Shares threshold implementation of the Keccak $ \chi $ function [7].", acknowledgement = ack-nhfb, articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chang:2018:DMU, author = "Doohwang Chang and Ganapati Bhat and Umit Ogras and Bertan Bakkaloglu and Sule Ozev", title = "Detection Mechanisms for Unauthorized Wireless Transmissions", journal = j-TODAES, volume = "23", number = "6", pages = "70:1--70:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3241046", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With increasing diversity of supply chains from design to delivery, there is an increasing risk that unauthorized changes can be made within an IC. One of the motivations for this type of change is to learn important information (such as encryption keys, spreading codes) from the hardware, and transmit this information to a malicious party. To evade detection, such unauthorized communication can be hidden within legitimate bursts of transmit signal. In this article, we present several signal processing techniques to detect unauthorized transmissions which can be hidden within the legitimate signal. We employ a scheme where the legitimate transmission is configured to emit a single sinusoidal waveform. We use time and spectral domain analysis techniques to explore the transmit spectrum. Since every transmission, no matter how low the signal power is, must have a spectral signature, we identify unauthorized transmission by eliminating the desired signal from the spectrum after capture. Experiment results show that when spread spectrum techniques are used, the presence of an unauthorized signal can be determined without the need for decoding the malicious signal. The proposed detection techniques need to be used as enhancements to the regular testing and verification procedures if hardware security is a concern.", acknowledgement = ack-nhfb, articleno = "70", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Dong:2018:PAA, author = "Xuan Dong and Lihong Zhang", title = "{PV}-Aware Analog Sizing for Robust Analog Layout Retargeting with Optical Proximity Correction", journal = j-TODAES, volume = "23", number = "6", pages = "71:1--71:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3236624", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "For analog integrated circuits (ICs) in nanometer technology nodes, process variation (PV) induced by lithography may not only cause serious wafer pattern distortion, but also result in device mismatch, which can readily ruin circuit performance. Although the conventional optical proximity correction (OPC) operations can effectively improve the wafer image fidelity, an analog circuit without robust device sizes is still highly vulnerable to such a mismatch effect. In this article, a PV-aware sizing-inclusive analog layout retargeting framework, which encloses an efficient hybrid OPC scheme for yield enhancement, is proposed. The device sizes are tuned during the layout retargeting process by using a deterministic circuit-sizing algorithm considering PV conditions. Our hybrid OPC method combines global rule-based OPC with local model-based OPC functions to boost the wafer image quality improvement but without degrading the computational efficiency. The experimental results show that our proposed framework can achieve the best wafer image quality and circuit performance preservation compared to any other alternative approaches.", acknowledgement = ack-nhfb, articleno = "71", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Eslami:2018:RTC, author = "Fatemeh Eslami and Steven J. E. Wilton", title = "Rapid Triggering Capability Using an Adaptive Overlay during {FPGA} Debug", journal = j-TODAES, volume = "23", number = "6", pages = "72:1--72:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3241045", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Field Programmable Gate Array (FPGA) technology is rapidly gaining traction in a wide range of applications. Nonetheless, FPGAs still require long design and debug cycles. To debug hardware circuits, trace-based instrumentation is inserted into the design that enables capturing data during the circuit execution into on-chip memories for later offline analysis. Since on-chip memories are limited, a trigger circuitry is used to only record data related to specific events during the execution. However, during debugging, a circuit recompilation is required on modifying these instruments. This can be very slow, reducing debug productivity. In this article, we propose a non-intrusive and rapid triggering solution with a tailored overlay fabric and mapping algorithm that seeks to enable fast debug iterations without performing a recompilation. This overlay is specialized for small combinational and sequential circuits with a single output; such circuits are typical of common trigger functions. We present an adaptive strategy to construct the overlay fabric using spare FPGA resources at compile time. At debug time, our proposed trigger mapping algorithms adapt to this specialized overlay to rapidly implement combinational and sequential trigger circuits. Our results show that the overlay fabric can be reconfigured to map different triggering scenarios in less than 40s instead of recompiling the circuit during debug iterations, increasing debug productivity.", acknowledgement = ack-nhfb, articleno = "72", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Xiang:2018:FTU, author = "Dong Xiang and Krishnendu Chakrabarty and Hideo Fujiwara", title = "Fault-Tolerant Unicast-Based Multicast for Reliable Network-on-Chip Testing", journal = j-TODAES, volume = "23", number = "6", pages = "73:1--73:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3243214", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a unified test technique that targets faults in links, routers, and cores of a network-on-chip design based on test sessions. We call an entire procedure, that delivers test packets to the subset of routers/cores, a test session. Test delivery for router/core testing is formulated as two fault-tolerant multicast algorithms. Test packet delivery for routers is implemented as a fault-tolerant unicast-based multicast scheme via the fault-free links and routers that were identified in the previous test sessions to avoid packet corruption. A new fault-tolerant routing algorithm is also proposed for the unicast-based multicast core test delivery in the whole network. Identical cores share the same test set, and they are tested within the same test session. Simulation results highlight the effectiveness of the proposed method in reducing test time.", acknowledgement = ack-nhfb, articleno = "73", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Topaloglu:2018:ETS, author = "Rasit O. Topaloglu and Farinaz Koushanfar", title = "Editorial for {TODAES} Special Issue on {Internet of Things} System Performance, Reliability, and Security", journal = j-TODAES, volume = "23", number = "6", pages = "74:1--74:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3276908", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, articleno = "74e", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Yang:2018:UUE, author = "Kun Yang and Ulbert Botero and Haoting Shen and Damon L. Woodard and Domenic Forte and Mark M. Tehranipoor", title = "{UCR}: an Unclonable Environmentally Sensitive Chipless {RFID} Tag For Protecting Supply Chain", journal = j-TODAES, volume = "23", number = "6", pages = "74:1--74:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3264658", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Chipless Radio Frequency Identification (RFID) tags that do not include an integrated circuit (IC) in the transponder are more appropriate for supply-chain management of low-cost commodities and have been gaining extensive attention due to their relatively lower price. However, existing chipless RFID tags consume considerable tag area and manufacturing time/cost because of complex fabrication process (e.g., requiring removing or shorting some resonators on the tag substrate to encode data). Worse still, their identifiers (IDs) are deterministic, clonable, and small in terms of bitwidth. To address these shortcomings and help preserve the cold chain for commodities (e.g., vaccines, pharmaceuticals, etc.) sensitive to temperature, we develop a novel unclonable environmentally sensitive chipless RFID (UCR) tag that intrinsically generates a unique ID from both manufacturing variations and ambient temperature variation. A UCR tag consists of two parts: (i) a certain number of concentric ring slot resonators integrated on a certain laminate (e.g., TACONIC TLX-0), whose resonance frequencies rely on geometric parameters of slot resonators and dielectric constant of substrate material that are sensitive to manufacturing variations, and (ii) a stand-alone circular ring slot resonator integrated on a particular substrate (e.g., grease) that will be melted at a high temperature, whose resonance frequency relies on geometric parameters of slot resonator, dielectric constant of substrate material, and ambient temperature. UCR tags have the capability to track commodities and their temperatures in the supply chain. The area of UCR tag is comparable to regular quick response (QR) code. Experimental results based on UCR tag prototypes have verified their uniqueness and reliability.", acknowledgement = ack-nhfb, articleno = "74", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hussain:2018:SSH, author = "Siam Umar Hussain and M. Sadegh Riazi and Farinaz Koushanfar", title = "{SHAIP}: {Secure Hamming Distance for Authentication of Intrinsic PUFs}", journal = j-TODAES, volume = "23", number = "6", pages = "75:1--75:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3274669", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we present SHAIP, a secure Hamming distance-based mutual authentication protocol. It allows an unlimited number of authentications by employing an intrinsic Physical Unclonable Function (PUF). PUFs are being increasingly employed for remote authentication of devices. Most of these devices have limited resources. Therefore, the intrinsic PUFs are most suitable for this task as they can be built with little or no modification to the underlying hardware platform. One major drawback of the current authentication schemes is that they expose the PUF response. This makes the intrinsic PUFs, which have a limited number of challenge-response pairs, unusable after a certain number of authentication sessions. Moreover, these schemes are one way in the sense that they only allow one party, the prover, to authenticate herself to the verifier. We propose a symmetric mutual authentication scheme based on secure (privacy-preserving) computation of the Hamming distance between the PUF response from the remote device and reference response stored at the verifier end. This allows both parties to authenticate each other without revealing their respective sets of inputs. We show that our scheme is effective with all state-of-the-art intrinsic PUFs. The proposed scheme is lightweight and does not require any modification to the underlying hardware.", acknowledgement = ack-nhfb, articleno = "75", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Winograd:2018:PGU, author = "Ted Winograd and Gaurav Shenoy and Hassan Salmani and Hamid Mahmoodi and Setareh Rafatirad and Houman Homayoun", title = "Programmable Gates Using Hybrid {CMOS--STT} Design to Prevent {IC} Reverse Engineering", journal = j-TODAES, volume = "23", number = "6", pages = "76:1--76:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3236622", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents a rigorous step towards design-for-assurance by introducing a new class of logically reconfigurable design resilient to design reverse engineering. Based on the non-volatile spin transfer torque (STT) magnetic technology, we introduce a basic set of non-volatile reconfigurable Look-Up-Table (LUT) logic components (NV-STT-based LUTs). An STT-based LUT with a significantly different set of characteristics compared to CMOS provides new opportunities to enhance design security yet makes it challenging to remain highly competitive with custom CMOS or even SRAM-based LUT in terms of power, performance, and area. To address these challenges, we propose several algorithms to select and replace custom CMOS gates with reconfigurable STT-based LUTs during design implementation such that the functionality of STT-based components and therefore the entire design cannot be determined in any manageable time, rendering any design reverse engineering attack ineffective. Our study, conducted on a large number of standard circuit benchmarks, concludes significant resiliency of hybrid STT-CMOS circuits against various types of attacks. Furthermore, the selection algorithms on average have a small impact on the performance of the circuit. We also tested these techniques against satisfiability attacks developed recently and show that these techniques also render more advanced reverse-engineering techniques computationally infeasible.", acknowledgement = ack-nhfb, articleno = "76", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Truong:2018:LSE, author = "Anh Truong and S. Rasoul Etesami and Negar Kiyavash", title = "Learning From Sleeping Experts: Rewarding Informative, Available, and Accurate Experts", journal = j-TODAES, volume = "23", number = "6", pages = "77:1--77:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3236617", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We consider a generalized model of learning from expert advice in which experts could abstain from participating at some rounds. Our proposed online algorithm falls into the class of weighted average predictors and uses a time-varying multiplicative weight update rule. This update rule changes the weight of an expert based on his or her relative performance compared to the average performance of available experts at the current round. This makes the algorithm suitable for recommendation systems in the presence of an adversary with many potential applications in the new emerging area of the Internet of Things. We prove the convergence of our algorithm to the best expert, defined in terms of both availability and accuracy, in the stochastic setting. In particular, we show the applicability of our definition of best expert through convergence analysis of another well-known algorithm in this setting. Finally, through simulation results on synthetic and real datasets, we justify the out-performance of our proposed algorithms compared to the existing ones in the literature.", acknowledgement = ack-nhfb, articleno = "77", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chopra:2018:OAC, author = "Abhimanyu Chopra and Hakan Aydin and Setareh Rafatirad and Houman Homayoun", title = "Optimal Allocation of Computation and Communication in an {IoT} Network", journal = j-TODAES, volume = "23", number = "6", pages = "78:1--78:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3236623", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Internet of things (IoT) is being developed for a wide range of applications from home automation and personal fitness to smart cities. With the extensive growth in adaptation of IoT devices comes the uncoordinated and substandard designs aimed at promptly making products available to the end consumer. This substandard approach restricts the growth of IoT in the near future and necessitates that studies understand requirements for an efficient design. A particular area where IoT applications have grown significantly is surveillance and monitoring. Applications of IoT in this domain are relying on distributed sensors, each equipped with a battery, capable of collecting images, processing images, and communicating the raw or processed data to the nearest node until it reaches the base station for decision making. In such an IoT network where processing can be distributed over the network, the important research question is how much of data each node should process and how much it should communicate for a given objective. This work answers this question and provides a deeper understanding of energy and delay tradeoffs in an IoT network with three different target metrics.", acknowledgement = ack-nhfb, articleno = "78", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hussain:2018:PPP, author = "Siam Umar Hussain and Farinaz Koushanfar", title = "{P3}: Privacy Preserving Positioning for Smart Automotive Systems", journal = j-TODAES, volume = "23", number = "6", pages = "79:1--79:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3236625", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article presents the first privacy-preserving localization method based on provably secure primitives for smart automotive systems. Using this method, a car that is lost due to unavailability of GPS can compute its location with assistance from three nearby cars, while the locations of all the participating cars including the lost car remain private. Technological enhancement of modern vehicles, especially in navigation and communication, necessitates parallel enhancement in security and privacy. Previous approaches to maintaining user location privacy suffered from one or more of the following drawbacks: trade-off between accuracy and privacy, one-sided privacy, and the need of a trusted third party that presents a single point to attack. The localization method presented here is one of the very first location-based services that eliminates all these drawbacks. Two protocols for computing the location is presented here based on two Secure Function Evaluation (SFE) techniques that allow multiple parties to jointly evaluate a function on inputs that are encrypted to maintain privacy. The first one is based on the two-party protocol named Yao's Garbled Circuit (GC). The second one is based on the Beaver-Micali-Rogaway (BMR) protocol that allows inputs from more than two parties. The two secure localization protocols exhibit trade-offs between performance and resilience against collusion. Along with devising the protocols, we design and optimize netlists for the functions required for location computation by leveraging conventional logic synthesis tools with custom libraries optimized for SFE. Proof-of-concept implementation of the protocol shows that the complete operation can be performed within only 355ms. The fast computing time enables localization of even moving cars.", acknowledgement = ack-nhfb, articleno = "79", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Muztoba:2018:IAI, author = "Md Muztoba and Rohit Voleti and Fatih Karabacak and Jaehyun Park and Umit Y. Ogras", title = "Instinctive Assistive Indoor Navigation using Distributed Intelligence", journal = j-TODAES, volume = "23", number = "6", pages = "80:1--80:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3212720", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Cyber-physical systems (CPS) and the Internet of Things (IoT) offer a significant potential to improve the effectiveness of assistive technologies for those with physical disabilities. Practical assistive technologies should minimize the number of inputs from users to reduce their cognitive and physical effort. This article presents an energy-efficient framework and algorithm for assistive indoor navigation with multi-modal user input. The goal of the proposed framework is to simplify the navigation tasks and make them more instinctive for the user. Our framework automates indoor navigation using only a few user commands captured through a wearable device. The proposed methodology is evaluated using both a virtual smart building and a prototype. The evaluations for three different floorplans show one order of magnitude reduction in user effort and communication energy required for navigation, when compared to conventional navigation methodologies that require continuous user inputs.", acknowledgement = ack-nhfb, articleno = "80", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Karabacak:2018:RDU, author = "Fatih Karabacak and Umit Ogras and Sule Ozev", title = "Remote Detection of Unauthorized Activity via Spectral Analysis", journal = j-TODAES, volume = "23", number = "6", pages = "81:1--81:??", month = dec, year = "2018", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3276770", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Unauthorized hardware or firmware modifications, known as trojans, can steal information, drain the battery, or damage IoT devices. Since trojans may be triggered in the field at an unknown instance, it is important to detect their presence at runtime. However, it is difficult to run sophisticated detection algorithms on these devices due to limited computational power and energy and, in some cases, lack of accessibility. This article presents a stand-off self-referencing technique for detecting unauthorized activity. The proposed technique processes involuntary electromagnetic emissions on a separate hardware, which is physically decoupled from the device under test. When the device enters the test mode, a predefined test application is run on the device repetitively for a known period. The periodicity ensures that the spectral electromagnetic power of the test application concentrates at known frequencies, leaving the remaining frequencies within the operating bandwidth at the noise level. Any deviations from the noise level for these unoccupied frequency locations indicate the presence of unknown (unauthorized) activity. Hence, we are able to differentiate trojan activity without using a golden reference, or any knowledge of the attributes of the trojan activity. Experiments based on hardware measurements show that the proposed technique achieves close to 100\% detection accuracy at up to 120cm distance.", acknowledgement = ack-nhfb, articleno = "81", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Lin:2019:QEO, author = "Chun-Han Lin and Chih-Kai Kang and Pi-Cheng Hsiu", title = "Quality-Enhanced {OLED} Power Savings on Mobile Devices", journal = j-TODAES, volume = "24", number = "1", pages = "1:1--1:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3243215", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the future, mobile systems will increasingly feature more advanced organic light-emitting diode (OLED) displays. The power consumption of these displays is highly dependent on the image content. However, existing OLED power-saving techniques either change the visual experience of users or degrade the visual quality of images in exchange for a reduction in the power consumption. Some techniques attempt to enhance the image quality by employing a compound objective function. In this article, we present a win-win scheme that always enhances the image quality while simultaneously reducing the power consumption. We define metrics to assess the benefits and cost for potential image enhancement and power reduction. We then introduce algorithms that ensure the transformation of images into their quality-enhanced power-saving versions. Next, the win-win scheme is extended to process videos at a justifiable computational cost. All the proposed algorithms are shown to possess the win-win property without assuming accurate OLED power models. Finally, the proposed scheme is realized through a practical camera application and a video camcorder on mobile devices. The results of experiments conducted on a commercial tablet with a popular image database and on a smartphone with real-world videos are very encouraging and provide valuable insights for future research and practices.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Amir:2019:SPC, author = "Maral Amir and Frank Vahid and Tony Givargis", title = "Switching Predictive Control Using Reconfigurable State-Based Model", journal = j-TODAES, volume = "24", number = "1", pages = "2:1--2:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3267126", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Advanced control methodologies have helped the development of modern vehicles that are capable of path planning and path following. For instance, Model Predictive Control (MPC) employs a predictive model to predict the behavior of the physical system for a specific time horizon in the future. An optimization problem is solved to compute optimal control actions while handling model uncertainties and nonlinearities. However, these prediction routines are computationally intensive and the computational overhead grows with the complexity of the model. Switching MPC addresses this issue by combining multiple predictive models, each with a different precision granularity. In this article, we proposed a novel switching predictive control method based on a model reduction scheme to achieve various model granularities for path following in autonomous vehicles. A state-based model with tunable parameters is proposed to operate as a reconfigurable predictive model of the vehicle. A runtime switching algorithm is presented that selects the best model using machine learning. We employed a metric that formulates the tradeoff between the error and computational savings due to model reduction. Our simulation results show that the use of the predictive model in the switching scheme as opposed to single granularity scheme, yields a 45\% decrease in execution time in tradeoff for a small 12\% loss in accuracy in prediction of future outputs and no loss of accuracy in tracking the reference trajectory.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Erol:2019:KSB, author = "Osman Emir Erol and Sule Ozev", title = "Knowledge- and Simulation-Based Synthesis of Area-Efficient Passive Loop Filter Incremental {Zoom-ADC} for Built-In Self-Test Applications", journal = j-TODAES, volume = "24", number = "1", pages = "3:1--3:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3266227", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We propose a fully differential, synthesizable zoom-ADC architecture with a passive loop filter for low-frequency Built-In Self-Test (BIST) applications, along with a synthesis tool that can target various design specifications. We present the detailed ADC architecture and a step-by-step process for designing the zoom-ADC. The design flow does not rely on the extensive knowledge of an experienced ADC designer. Two ADCs have been synthesized with different performance requirements in the 65nm CMOS process. The first ADC achieves a 90.4dB Signal-to-Noise Ratio (SNR) in 512 $ \mu $ s measurement time and consumes 17 $ \mu $ W power. The second design achieves a 78.2dB SNR in 31.25 $ \mu $ s measurement time and consumes 63 $ \mu $ W power.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2019:SAT, author = "Yukai Chen and Sara Vinco and Enrico Macii and Massimo Poncino", title = "{SystemC-AMS} Thermal Modeling for the Co-simulation of Functional and Extra-Functional Properties", journal = j-TODAES, volume = "24", number = "1", pages = "4:1--4:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3267125", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Temperature is a critical property of smart systems, due to its impact on reliability and to its inter-dependence with power consumption. Unfortunately, the current design flows evaluate thermal evolution ex-post on offline power traces. This does not allow to consider temperature as a dimension in the design loop, and it misses all the complex inter-dependencies with design choices and power evolution. In this article, by adopting the functional language SystemC-AMS (Analog Mixed Signal), we propose a method to enable thermal/power/functional co-simulation. The system thermal model is built by using state-of-the-art circuit equivalent models, by exploiting the support for electrical linear networks intrinsic of SystemC-AMS. The experimental results will show that the choice of SystemC-AMS is a winning strategy for building a simultaneous simulation of multiple functional and extra-functional properties of a system. The generated code exposes an accuracy comparable to that of the reference thermal simulator HotSpot. Additionally, the initial overhead due to the general purpose nature of SystemC-AMS is compensated by the surprisingly high performance of transient simulation, with speedups as high as two orders of magnitude.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Song:2019:HRB, author = "Yang Song and Olivier Alavoine and Bill Lin", title = "Harvesting Row-Buffer Hits via Orchestrated Last-Level Cache and {DRAM} Scheduling for Heterogeneous Multicore Systems", journal = j-TODAES, volume = "24", number = "1", pages = "5:1--5:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3269982", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In heterogeneous multicore systems, the memory subsystem, including the last-level cache and DRAM, is widely shared among the CPU, the GPU, and the real-time cores. Due to their distinct memory traffic patterns, heterogeneous cores result in more frequent cache misses at the last-level cache. As cache misses travel through the memory subsystem, two schedulers are involved for the last-level cache and DRAM, respectively. Prior studies treated the scheduling of the last-level cache and DRAM as independent stages. However, with no orchestration and limited visibility of memory traffic, neither scheduling stage is able to ensure optimal scheduling decisions for memory efficiency. Unnecessary precharges and row activations happen in DRAM when the memory scheduler is ignorant of incoming cache misses, and DRAM row-buffer states are invisible to the last-level cache. In this article, we propose a unified memory controller for the the last-level cache and DRAM with orchestrated schedulers. The memory scheduler harvests row-buffer hit opportunities in cache request buffers during spare time without inducing significant implementation cost. We further introduce a dynamic orchestrated scheduling policy to improve memory efficiency while achieving target CPU IPC. Extensive evaluations show that the proposed controller improves the total memory bandwidth of DRAM by 16.8\% on average and saves DRAM energy by up to 29.7\% while achieving comparable CPU IPCs. With the dynamic scheduling policy, the unified controller achieves the same IPC as the conventional design and increases DRAM bandwidth by 9.2\%. In addition, we explore the potential of the proposed memory controller to attain improvements on both memory bandwidth and CPU IPC.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Choi:2019:OFT, author = "Junchul Choi and Hoeseok Yang and Soonhoi Ha", title = "Optimization of Fault-Tolerant Mixed-Criticality Multi-Core Systems with Enhanced {WCRT} Analysis", journal = j-TODAES, volume = "24", number = "1", pages = "6:1--6:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3275154", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article proposes a novel optimization technique of fault-tolerant mixed-criticality multi-core systems with worst-case response time (WCRT) guarantees. Typically, in fault-tolerant multi-core systems, tasks can be replicated or re-executed in order to enhance the reliability. In addition, based on the policy of mixed-criticality scheduling, low-criticality tasks can be dropped at runtime. Such uncertainties caused by hardening and mixed-criticality scheduling make WCRT analysis very difficult. We show that previous analysis techniques are pessimistic as they consider avoidably extreme cases that can be safely ignored within the given reliability constraint. We improve the analysis in order to tighten the pessimism of WCRT estimates by considering the maximum number of faults to be tolerated. Further, we improve the mixed-criticality scheduling by allowing partial dropping of low-criticality tasks. On top of those, we explore the design space of hardening, task-to-core mapping, and quality-of-service of the multi-core mixed-criticality systems. The effectiveness of the proposed technique is verified by extensive experiments with synthetic and real-life benchmarks.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2019:BFB, author = "Irith Pomeranz", title = "Boundary-Functional Broadside and Skewed-Load Tests", journal = j-TODAES, volume = "24", number = "1", pages = "7:1--7:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3276976", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Close-to-functional broadside tests are used for avoiding overtesting of delay faults that can result from non-functional operation conditions, while avoiding test escapes because of faults that cannot be detected under functional operation conditions. When a close-to-functional broadside test deviates from functional operation conditions, the deviation can affect the entire circuit. This article defines the concept of a boundary-functional broadside test where non-functional operation conditions are prevented from crossing a preselected boundary. Using the procedure described in this article, the boundary maintains the same values under a boundary-functional broadside test as under a functional broadside test from which it is derived. Indirectly, this ensures that the deviations from functional operation conditions throughout the entire circuit are limited. The concept of a boundary-functional broadside test is extended to skewed-load tests, and to partial-boundary-functional tests. Experimental results are presented for benchmark circuits to demonstrate the fault coverage improvements that can be achieved using boundary-functional broadside and skewed-load tests as well as partial-boundary-functional tests of both types.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2019:SEA, author = "Jiajun Li and Guihai Yan and Wenyan Lu and Shijun Gong and Shuhao Jiang and Jingya Wu and Xiaowei Li", title = "{SynergyFlow}: an Elastic Accelerator Architecture Supporting Batch Processing of Large-Scale Deep Neural Networks", journal = j-TODAES, volume = "24", number = "1", pages = "8:1--8:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3275243", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Neural networks (NNs) have achieved great success in a broad range of applications. As NN-based methods are often both computation and memory intensive, accelerator solutions have been proved to be highly promising in terms of both performance and energy efficiency. Although prior solutions can deliver high computational throughput for convolutional layers, they could incur severe performance degradation when accommodating the entire network model, because there exist very diverse computing and memory bandwidth requirements between convolutional layers and fully connected layers and, furthermore, among different NN models. To overcome this problem, we proposed an elastic accelerator architecture, called SynergyFlow, which intrinsically supports layer-level and model-level parallelism for large-scale deep neural networks. SynergyFlow boosts the resource utilization by exploiting the complementary effect of resource demanding in different layers and different NN models. SynergyFlow can dynamically reconfigure itself according to the workload characteristics, maintaining a high performance and high resource utilization among various models. As a case study, we implement SynergyFlow on a P395-AB FPGA board. Under 100MHz working frequency, our implementation improves the performance by 33.8\% on average (up to 67.2\% on AlexNet) compared to comparable provisioned previous architectures.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Smirnov:2019:AOV, author = "Fedor Smirnov and Felix Reimann and J{\"u}rgen Teich and Michael Gla{\ss}", title = "Automatic Optimization of the {VLAN} Partitioning in Automotive Communication Networks", journal = j-TODAES, volume = "24", number = "1", pages = "9:1--9:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3278120", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Dividing the communication network into so-called Virtual Local Area Networks (VLANs), i.e., subnetworks that are isolated at the data link layer (OSI layer 2), is a promising approach to address the increasing security challenges in automotive networks. The automation of the VLAN partitioning is a well-researched problem in the domain of local or metropolitan area networks. However, the approaches used there are hardly applicable for the design of automotive networks as they mainly focus on reducing the amount of broadcast traffic and cannot capture the many design objectives of automotive networks like the message timing or the link load, which are affected by the VLAN partitioning. As a remedy, this article proposes an approach based on a set of Pseudo-Boolean constraints to generate a message routing which is feasible with respect to the VLAN-related routing restrictions in automotive networks. This approach can be used for a design space exploration to optimize not only the VLAN partitioning but also other routing-related objectives. We demonstrate both the efficiency of our message routing approach and the now accessible optimization potential for the complete Electric/Electronic architecture with a mixed-criticality system from the automotive domain. There we thoroughly investigate the impact of the VLAN partitioning on the message timing and the link loads by optimizing these design objectives concurrently. During the exploration of the huge design space, where each resource can be assigned to one of four VLANs, our approach requires less than 40ms for the creation of a valid solution and ensures that all messages satisfy their deadlines and link load bounds.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Huang:2019:ILA, author = "Bo-Yuan Huang and Hongce Zhang and Pramod Subramanyan and Yakir Vizel and Aarti Gupta and Sharad Malik", title = "Instruction-Level Abstraction {(ILA)}: a Uniform Specification for System-on-Chip {(SoC)} Verification", journal = j-TODAES, volume = "24", number = "1", pages = "10:1--10:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3282444", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern Systems-on-Chip (SoC) designs are increasingly heterogeneous and contain specialized semi-programmable accelerators in addition to programmable processors. In contrast to the pre-accelerator era, when the ISA played an important role in verification by enabling a clean separation of concerns between software and hardware, verification of these ``accelerator-rich'' SoCs presents new challenges. From the perspective of hardware designers, there is a lack of a common framework for formal functional specification of accelerator behavior. From the perspective of software developers, there exists no unified framework for reasoning about software/hardware interactions of programs that interact with accelerators. This article addresses these challenges by providing a formal specification and high-level abstraction for accelerator functional behavior. It formalizes the concept of an Instruction Level Abstraction (ILA), developed informally in our previous work, and shows its application in modeling and verification of accelerators. This formal ILA extends the familiar notion of instructions to accelerators and provides a uniform, modular, and hierarchical abstraction for modeling software-visible behavior of both accelerators and programmable processors. We demonstrate the applicability of the ILA through several case studies of accelerators (for image processing, machine learning, and cryptography), and a general-purpose processor (RISC-V). We show how the ILA model facilitates equivalence checking between two ILAs, and between an ILA and its hardware finite-state machine (FSM) implementation. Further, this equivalence checking supports accelerator upgrades using the notion of ILA compatibility, similar to processor upgrades using ISA compatibility.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Carpent:2019:RAS, author = "Xavier Carpent and Norrathep Rattanavipanon and Gene Tsudik", title = "Remote Attestation via Self-Measurement", journal = j-TODAES, volume = "24", number = "1", pages = "11:1--11:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3279950", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Remote attestation (RA) is a popular means of detecting malware in embedded and IoT devices. RA is usually realized as an interactive protocol, whereby a trusted party ( verifier ) measures software integrity of a potentially compromised remote device ( prover). Early work focused on purely software-based and fully hardware-based techniques, neither of which is ideal for low-end embedded devices. More recent results yielded hybrid (SW/HW) architectures with a minimal set of features to support efficient and secure RA on low-end devices. All prior techniques require on-demand operation, i.e., RA is performed in real time. We identify some drawbacks of this general approach in the context of unattended devices: First, it fails to detect mobile malware that enters and leaves prover between successive RA instances. Second, it requires prover to engage in a potentially expensive (in terms of time and energy) computation, which can be harmful for mission-critical or real-time devices. To address these drawbacks, we introduce the concept of self-measurement, whereby prover periodically and securely measures and records its own software state, based on a pre-established schedule. A (possibly untrusted) verifier occasionally collects and verifies these measurements. We present the design of a concrete technique, called Efficient Remote Attestation via Self-Measurement for Unattended Settings, (ERASMUS), justify its features and evaluate its performance. In the process, we also define a new metric, Quality of Attestation (QoA). We believe that ERASMUS is well suited for time-sensitive and/or safety-critical applications that are not served well by on-demand RA. Finally, we show that ERASMUS is a promising stepping stone toward handling attestation of multiple devices (i.e., a group or swarm) with high mobility.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Tan:2019:EMI, author = "Jingweijia Tan and Kaige Yan", title = "Efficiently Managing the Impact of Hardware Variability on {GPUs}' Streaming Processors", journal = j-TODAES, volume = "24", number = "1", pages = "12:1--12:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3287308", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Graphics Processing Units (GPUs) are widely used in general-purpose high-performance computing fields due to their highly parallel architecture. In recent years, a new era with the nanometer scale integrated circuit manufacture process has come. As a consequence, GPUs' computation capability gets even stronger. However, as process technology scales down, hardware variability, e.g., process variations (PVs) and negative bias temperature instability (NBTI), has a higher impact on the chip quality. The parallelism of GPU desires high consistency of hardware units on chip; otherwise, the worst unit will inevitably become the bottleneck. So the hardware variability becomes a pressing concern to further improve GPUs' performance and lifetime, not only in integrated circuit fabrication, but more in GPU architecture design. Streaming Processors (SPs) are the key units in GPUs, which perform most of parallel computing operations. Therefore, in this work, we focus on mitigating the impact of hardware variability in GPU SPs. We first model and analyze SPs' performance variations under hardware variability. Then, we observe that both PV and NBTI have a large impact on SPs' performance. We further observe unbalanced SP utilization, e.g., some SPs are idle when others are active, during program execution. Leveraging this observation, we propose a Hardware Variability-aware SPs' Management policy (HVSM), which dynamically dispatches computation in appropriate SPs to balance the utilizations. In addition, we find that a large portion of compute operations are duplicate. We also propose an Operation Compression (OC) technique to minimize the unnecessary computations to further mitigate the hardware variability effects. Our experimental results show the combined HVSM and OC technique effectively reduces the impact of hardware variability, which can translate to 37\% performance improvement or 18.3\% lifetime extension for a GPU chip.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Kang:2019:TDF, author = "Ilgweon Kang and Fang Qiao and Dongwon Park and Daniel Kane and Evangeline Fung Yu Young and Chung-Kuan Cheng and Ronald Graham", title = "Three-dimensional Floorplan Representations by Using Corner Links and Partial Order", journal = j-TODAES, volume = "24", number = "1", pages = "13:1--13:??", month = jan, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3289179", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Three-dimensional integrated circuit (3D IC) technology offers a potential breakthrough to enable a paradigm-shift strategy, called ``more than Moore,'' with novel features and advantages over the conventional 2D process technology. By having three-dimensional interconnections, 3D IC provides substantial wirelength reduction and a massive amount of bandwidth, which gives significant performance improvement to overcome many of the nontrivial challenges in semiconductor industry. Moreover, 3D integration technology enables to stack disparate technologies with various functionalities into a single system-in-package (SiP), introducing ``true 3D IC'' design. As the first physical design (PD) step, IC floorplanning takes a crucial role to determine IC's overall design qualities such as footprint area, timing closure, power distribution, thermal management, and so on. However, lack of efficient 3D floorplanning algorithms that practically implement advantages of 3D integration technology is a critical bottleneck for PD automation of 3D IC design and implementation. 3D floorplanning (or packing, block partitioning) is a well-known NP-hard problem, and most of 3D floorplanning algorithms rely on heuristics and iterative improvements. Thus, developing complete and efficient 3D floorplan representations is important, since floorplan representation provides the foundation of data structure to search the solution space for 3D IC floorplanning. A well-defined floorplan representation provides a well-organized and cost-effective methodology to design high-performance 3D IC. We propose a new 3D IC floorplan representation methodology using corner links and partial order. Given a fixed number of cuboidal blocks and their volume, algorithmic 3D floorplan representations describe topological structure and physical positions/orientations of each block relative to the origin in the 3D floorplan space. In this article, (1) we introduce our novel 3D floorplan representation, called corner links representation, (2) we analyze the equivalence relation between the corner links representation and its corresponding partial order representation, and (3) we discuss several key properties of the corner links representation and partial order representation. The corner links representation provides a complete and efficient structure to assemble the original 3D mosaic floorplan. Also, the corner links representation for the non-degenerate 3D mosaic floorplan can be equivalently expressed by the four trees representation. The partial order representation defines the topological structure of the 3D floorplan with three transitive closure graphs (TCG) for each direction and captures all stitching planes in the 3D floorplan in the order of their respective directions. We demonstrate that the corner links representation can be reduced to its corresponding partial order representation, indicating that the corner links representation shares well-defined and -studied features/properties of 3D TCG-based floorplan representation. If the partial order representation describes relations between any pairs of blocks in the 3D floorplan, then the floorplan is a valid floorplan. We show that the partial order representation can restore the absolute coordinates of all blocks in the 3D mosaic floorplan by using the given physical dimensions of blocks.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gong:2019:PEH, author = "Yanping Gong and Fengyu Qian and Lei Wang", title = "Probabilistic Evaluation of Hardware Security Vulnerabilities", journal = j-TODAES, volume = "24", number = "2", pages = "14:1--14:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3290405", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3290405", abstract = "Various design techniques can be applied to implement the finite state machine (FSM) functions in order to optimize timing, performance, power, and to reduce overhead. Recently, malicious attacks to hardware systems have emerged as a critical problem. Fault injection attacks, in particular, alter the function or reveal the critical information of a hardware system through precisely controlled fault injection processes. Attackers can utilize the loopholes and vulnerabilities of FSM functions to access the states that are under protection. A probabilistic model is developed in this article to evaluate the potential vulnerabilities of FSM circuits at the design stage. Analysis based on the statistical behaviors of FSM also shows that the induced circuit errors can be exploited to access the protected states. An effective solution based on state re-encoding is proposed to minimize the risk of unauthorized transitions. Simulation results demonstrate that vulnerable transition paths can be protected with small hardware overheads.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Zheng:2019:HEB, author = "Jianwei Zheng and Chao Lu and Jiefeng Guo and Deming Chen and Donghui Guo", title = "A Hardware-Efficient Block Matching Algorithm and Its Hardware Design for Variable Block Size Motion Estimation in Ultra-High-Definition Video Encoding", journal = j-TODAES, volume = "24", number = "2", pages = "15:1--15:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3290408", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3290408", abstract = "Variable block size motion estimation has contributed greatly to achieving an optimal interframe encoding, but involves high computational complexity and huge memory access, which is the most critical bottleneck in ultra-high-definition video encoding. This article presents a hardware-efficient block matching algorithm with an efficient hardware design that is able to reduce the computational complexity of motion estimation while providing a sustained and steady coding performance for high-quality video encoding. A three-level memory organization is proposed to reduce memory bandwidth requirement while supporting a predictive common search window. By applying multiple search strategies and early termination, the proposed design provides 1.8 to 3.7 times higher hardware efficiency than other works. Furthermore, on-chip memory has been reduced by 96.5\% and off-chip bandwidth requirement has been reduced by 39.4\% thanks to the proposed three-level memory organization. The corresponding power consumption is only 198mW at the highest working frequency of 500MHz. The proposed design is attractive for high-quality video encoding in real-time applications with low power consumption.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bakhshalipour:2019:RWT, author = "Mohammad Bakhshalipour and Aydin Faraji and Seyed Armin Vakil Ghahani and Farid Samandi and Pejman Lotfi-Kamran and Hamid Sarbazi-Azad", title = "Reducing Writebacks Through In-Cache Displacement", journal = j-TODAES, volume = "24", number = "2", pages = "16:1--16:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3289187", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3289187", abstract = "Non-Volatile Memory (NVM) technology is a promising solution to fulfill the ever-growing need for higher capacity in the main memory of modern systems. Despite having many great features, however, NVM's poor write performance remains a severe obstacle, preventing it from being used as a DRAM alternative in the main memory. Most of the prior work targeted optimizing writes at the main memory side and neglected the decisive role of upper-level cache management policies on reducing the number of writes. In this article, we propose a novel cache management policy that attempts to maximize write-coalescing in the on-chip SRAM last-level cache (LLC) for the sake of reducing the number of costly writes to the off-chip NVM. We decouple a few physical ways of the LLC to have a dedicated and exclusive storage for the dirty blocks after being evicted from the cache and before being sent to the off-chip memory. By displacing dirty blocks in exclusive storage, they are kept in the cache based on their rewrite distance and are evicted when they are unlikely to be reused shortly. To maximize the effectiveness of exclusive storage, we manage it as a Cuckoo Cache to offer associativity based on the various applications' demands. Through detailed evaluations targeting various single- and multi-threaded applications, we show that our proposal reduces the number of writebacks by 21\%, on average, over the state-of-the-art method and enhances both performance and energy efficiency.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Bhowmik:2019:PAT, author = "Biswajit Bhowmik and Jatindra Kumar Deka and Santosh Biswas and Bhargab B. Bhattacharya", title = "Performance-Aware Test Scheduling for Diagnosing Coexistent Channel Faults in Topology-Agnostic Networks-on-Chip", journal = j-TODAES, volume = "24", number = "2", pages = "17:1--17:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3291532", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3291532", abstract = "High--performance multiprocessor SoCs used in practice require a complex network-on-chip (NoC) as communication architecture, and the channels therein often suffer from various manufacturing defects. Such physical defects cause a multitude of system-level failures and subsequent degradation of reliability, yield, and performance of the computing platform. Most of the existing test approaches consider mesh-based NoC channels only and do not perform well for other regular topologies such as octagons or spidergons, with regard to test time and overhead issues. This article proposes a topology-agnostic test mechanism that is capable of diagnosing on-line, coexistent channel-short, and stuck-at faults in these special NoCs as well as in traditional mesh architectures. We introduce a new test model called Damaru to decompose the network and present an efficient scheduling scheme to reduce test time without compromising resource utilization during testing. Additionally, the proposed scheduling scheme scales well with network size, channel width, and topological diversity. Simulation results show that the method achieves nearly 92\% fault coverage and improves area overhead by almost 60\% and test time by 98\% compared to earlier approaches. As a sequel, packet latency and energy consumption are also improved by 67.05\% and 54.69\%, respectively, and they are further improved with increasing network size.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pourshirazi:2019:WAL, author = "Bahareh Pourshirazi and Majed Valad Beigi and Zhichun Zhu and Gokhan Memik", title = "Writeback-Aware {LLC} Management for {PCM-Based} Main Memory Systems", journal = j-TODAES, volume = "24", number = "2", pages = "18:1--18:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3292009", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3292009", abstract = "With the increase in the number of data-intensive applications on today's workloads, DRAM-based main memories are struggling to satisfy the growing data demand capacity. Phase Change Memory (PCM) is a type of non-volatile memory technology that has been explored as a promising alternative for DRAM-based main memories due to its better scalability and lower leakage energy. Despite its many advantages, PCM also has shortcomings such as long write latency, high write energy consumption, and limited write endurance, which are all related to the write operations. In this article, we propose a novel writeback-aware Last Level Cache (LLC) management scheme named WALL to reduce the number of LLC writebacks and consequently improve performance, energy efficiency, and lifetime of a PCM-based main memory system. First, we investigate the writeback behavior of LLC sets and show that writebacks are not uniformly distributed among sets; some sets observe much higher writeback rates than others. We then propose a writeback-aware set-balancing mechanism, which employs the underutilized LLC sets with few writebacks as an auxiliary storage for the evicted dirty lines from sets with frequent writebacks. We also propose a simple and effective writeback-aware replacement policy to avoid the eviction of the dirty blocks that are highly reused after being evicted from the cache. Our experimental results show that WALL achieves an average of 30.9\% reduction in the total number of LLC writebacks, compared to the baseline scheme, which uses the LRU replacement policy. As a result, WALL can reduce the memory energy consumption by 23.1\% and enhance PCM lifetime by $ 1.29 \times $, on average, on an 8-core system with a 4GB PCM main memory, running memory-intensive applications.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Muhammad:2019:RBS, author = "Shaheer Muhammad and M. Usman Rafique and Shuai Li and Zili Shao and Qixin Wang and Xue Liu", title = "Reconfigurable Battery Systems: a Survey on Hardware Architecture and Research Challenges", journal = j-TODAES, volume = "24", number = "2", pages = "19:1--19:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3301301", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301301", abstract = "In a reconfigurable battery pack, the connections among cells can be changed during operation to form different configurations. This can lead a battery, a passive two-terminal device, to a smart battery that can reconfigure itself according to the requirement to enhance operational performance. Several hardware architectures with different levels of complexities have been proposed. Some researchers have used existing hardware and demonstrated improved performance on the basis of novel optimization and scheduling algorithms. The possibility of software techniques to benefit the energy storage systems is exciting, and it is the perfect time for such methods as the need for high-performance and long-lasting batteries is on the rise. This novel field requires new understanding, principles, and evaluation metrics of proposed schemes. In this article, we systematically discuss and critically review the state of the art. This is the first effort to compare the existing hardware topologies in terms of flexibility and functionality. We provide a comprehensive review that encompasses all existing research works, starting from the details of the individual battery including modeling and properties as well as fixed-topology traditional battery packs. To stimulate further research in this area, we highlight key challenges and open problems in this domain.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Sahoo:2019:FMV, author = "Debiprasanna Sahoo and Swaraj Sha and Manoranjan Satpathy and Madhu Mutyam and S. Ramesh and Partha Roop", title = "Formal Modeling and Verification of a Victim {DRAM} Cache", journal = j-TODAES, volume = "24", number = "2", pages = "20:1--20:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3306491", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3306491", abstract = "The emerging Die-stacking technology enables DRAM to be used as a cache to break the ``Memory Wall'' problem. Recent studies have proposed to use DRAM as a victim cache in both CPU and GPU memory hierarchies to improve performance. DRAM caches are large in size and, hence, when realized as a victim cache, non-inclusive design is preferred. This non-inclusive design adds significant differences to the conventional DRAM cache design in terms of its probe, fill, and writeback policies. Design and verification of a victim DRAM cache can be much more complex than that of a conventional DRAM cache. Hence, without rigorous modeling and formal verification, ensuring the correctness of such a system can be difficult. The major focus of this work is to show how formal modeling is applied to design and verify a victim DRAM cache. In this approach, we identify the agents in the victim DRAM cache design and model them in terms of interacting state machines. We derive a set of properties from the specifications of a victim cache and encode them using Linear Temporal Logic. The properties are then proven using symbolic and bounded model checking. Finally, we discuss how these properties are related to the dataflow paths in a victim DRAM cache.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Gupta:2019:DAD, author = "Ankur Gupta and Juinn-Dar Huang and Shigeru Yamashita and Sudip Roy", title = "Design Automation for Dilution of a Fluid Using Programmable Microfluidic Device-Based Biochips", journal = j-TODAES, volume = "24", number = "2", pages = "21:1--21:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3306492", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3306492", abstract = "Microfluidic lab-on-a-chip has emerged as a new technology for implementing biochemical protocols on small-sized portable devices targeting low-cost medical diagnostics. Among various efforts of fabrication of such chips, a relatively new technology is a programmable microfluidic device (PMD) for implementation of flow-based lab-on-a-chip. A PMD chip is suitable for automation due to its symmetric nature. In order to implement a bioprotocol on such a reconfigurable device, it is crucial to automate a sample preparation on-chip as well. In this article, we propose a dilution PMD algorithm (namely DPMD ) and its architectural mapping scheme (namely generalized architectural mapping algorithm ( GAMA )) for addressing fluidic cells of such a device to perform dilution of a reagent fluid on-chip. We used an optimization function that first minimizes the number of mixing steps and then reduces the waste generation and further reagent requirement. Simulation results show that the proposed DPMD scheme is comparative to the existing state-of-the-art dilution algorithm. The proposed design automation using the architectural mapping scheme reduces the required chip area and, hence, minimizes the valve switching that, in turn, increases the life span of the PMD-chip.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Jung:2019:ILP, author = "Jinwook Jung and Gi-Joon Nam and Woohyun Chung and Youngsoo Shin", title = "Integrated Latch Placement and Cloning for Timing Optimization", journal = j-TODAES, volume = "24", number = "2", pages = "22:1--22:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3301613", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301613", abstract = "This article presents an algorithm for integrated timing-driven latch placement and cloning. Given a circuit placement, the proposed algorithm relocates some latches while circuit timing is improved. Some latches are replicated to further improve the timing; the number of replicated latches along with their locations are automatically determined. After latch cloning, each of the replicated latches is set to drive a subset of the fanouts that have been driven by the original single latch. The proposed algorithm is then extended such that relocation and cloning are applied to some latches together with their neighbor logic gates. Experimental results demonstrate that the worst negative slack and the total negative slack are improved by 24\% and 59\%, respectively, on average of test circuits. The negative impacts on circuit area and power consumption are both marginal, at 0.7\% and 1.9\% respectively.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Pomeranz:2019:ITU, author = "Irith Pomeranz", title = "Incomplete Tests for Undetectable Faults to Improve Test Set Quality", journal = j-TODAES, volume = "24", number = "2", pages = "23:1--23:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3306493", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3306493", abstract = "The presence of undetectable faults in a set of target faults implies that tests, which may be important for detecting defects, are missing from the test set. This article suggests an approach for addressing missing tests that fits with the rationale for computing an $n$-detection test set. The article defines the concept of an incomplete test that is relevant when a target fault is undetectable. An incomplete test activates the fault but fails to detect it because of one or more assignments that are missing from the test. The procedure described in this article improves the quality of a test set by attempting to ensure that every undetectable fault has n incomplete tests with the smallest possible numbers of missing assignments, for a constant n {$>$}= 1. The incomplete tests are expected to contribute to the detection of detectable defects around the site of the undetectable fault. The computation of missing assignments for a test is performed in linear time by avoiding fault simulation and considering all the undetectable faults simultaneously. Experimental results demonstrate the extent to which a given test set can be improved without increasing the number of tests.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Hyun:2019:IAA, author = "Daijoon Hyun and Youngsoo Shin", title = "Integrated Approach of Airgap Insertion for Circuit Timing Optimization", journal = j-TODAES, volume = "24", number = "2", pages = "24:1--24:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3306494", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3306494", abstract = "Airgap technology enables air to be introduced in inter-metal dielectric (IMD). Airgap between certain wires reduces coupling capacitance due to the reduced permittivity; this can be utilized to decrease circuit delay. We propose an integrated approach of airgap insertion with the goal of circuit timing optimization. It consists of three sub-problems. We first select the layers that employ airgap, called airgap layers, that maximize total negative slack (TNS) improvement; this yields TNS improvement of 7\% to 15\% and worst negative slack (WNS) improvement of 2\% to 8\%, compared to a simple assumption of airgap layers. Second, we reassign the layers of wires such that more wires on critical paths can be placed in airgap layers. This is formulated as integer linear programming (ILP), and a more practical heuristic algorithm is also proposed. It provides an additional 17\% TNS improvement and 6\% WNS improvement. Finally, we perform airgap insertion through ILP formulation, where a number of design rules are modeled with linear constraints. To reduce the heavy runtime of ILP, a layout partitioning technique is also applied. It implements a feasible airgap mask in a manageable time where the amount of inserted airgap is close to the optimal solution.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Li:2019:NRM, author = "Taozhong Li and Qin Wang and Yongxin Zhu and Jianfei Jiang and Guanghui He and Jing Jin and Zhigang Mao and Naifeng Jing", title = "A Novel Resistive Memory-based Process-in-memory Architecture for Efficient Logic and Add Operations", journal = j-TODAES, volume = "24", number = "2", pages = "25:1--25:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3306495", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The coming era of big data revives the Processing-in-memory (PIM) architecture to relieve the memory wall problem that embarrasses the modern computing system. However, most existing PIM designs just put computing units closer to memory, rather than a complete integration of them due to their incompatibility in CMOS manufacturing. Fortunately, the emerging Resistive-RAM (ReRAM) offers new hope to this dilemma owing to its inherent memory and computing capability using the same device. In this article, we propose a ReRAM memory structure with efficient PIM capability of both logic and add operations. It first leverages non-linearity to suppress sneak current and thus sustains high memory density. Using a differential bit cell, it also enables efficient processing of arbitrary logic functions using the same memory cells with non-destructive operations. Then, a novel PIM adder is proposed, which customizes a sneak current path as the carry-chain for fast carry propagation and improves adder performance significantly. In the experiment, the proposed PIM demonstrates higher efficiency in both computing area and performance for logic and addition, which greatly increases the ReRAM PIM applicability for future computable architectures.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Nongpoh:2019:ESE, author = "Bernard Nongpoh and Rajarshi Ray and Moumita Das and Ansuman Banerjee", title = "Enhancing Speculative Execution With Selective Approximate Computing", journal = j-TODAES, volume = "24", number = "2", pages = "26:1--26:??", month = mar, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3307651", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Mar 22 16:58:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3307651", abstract = "Speculative execution is an optimization technique used in modern processors by which predicted instructions are executed in advance with an objective of overlapping the latencies of slow operations. Branch prediction and load value speculation are examples of speculative execution used in modern pipelined processors to avoid execution stalls. However, speculative executions incur a performance penalty as an execution rollback when there is a misprediction. In this work, we propose to aid speculative execution with approximate computing by relaxing the execution rollback penalty associated with a misprediction. We propose a sensitivity analysis method for data and branches in a program to identify the data load and branch instructions that can be executed without any rollback in the pipeline and yet can ensure a certain user-specified quality of service of the application with a probabilistic reliability. Our analysis is based on statistical methods, particularly hypothesis testing and Bayesian analysis. We perform an architectural simulation of our proposed approximate execution and report the benefits in terms of CPU cycles and energy utilization on selected applications from the AxBench, ACCEPT, and Parsec 3.0 benchmarks suite.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Vinco:2019:CLV, author = "Sara Vinco and Nicola Bombieri and Daniele Jahier Pagliari and Franco Fummi and Enrico Macii and Massimo Poncino", title = "A Cross-level Verification Methodology for Digital {IPs} Augmented with Embedded Timing Monitors", journal = j-TODAES, volume = "24", number = "3", pages = "27:1--27:23", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3308565", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3308565", abstract = "Smart systems are characterized by the integration in a single device of multi-domain subsystems of different technological domains, namely, analog, digital, discrete and power devices, MEMS, and power sources. Such challenges, emerging from the \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Oh:2019:TAS, author = "Deok Keun Oh and Mu Jun Choi and Ju Ho Kim", title = "Thermal-aware {$3$D} Symmetrical Buffered Clock Tree Synthesis", journal = j-TODAES, volume = "24", number = "3", pages = "28:1--28:22", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3313798", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3313798", abstract = "The semiconductor industry has accepted three-dimensional integrated circuits (3D ICs) as a possible solution to address speed and power management problems. In addition, 3D ICs have recently demonstrated a huge potential in reducing wire length and \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Schwarzer:2019:CDA, author = "Tobias Schwarzer and Joachim Falk and Simone M{\"u}ller and Martin Letras and Christian Heidorn and Stefan Wildermann and J{\"u}rgen Teich", title = "Compilation of Dataflow Applications for Multi-Cores using Adaptive Multi-Objective Optimization", journal = j-TODAES, volume = "24", number = "3", pages = "29:1--29:23", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3310249", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3310249", abstract = "State-of-the-art system synthesis techniques employ meta-heuristic optimization techniques for Design Space Exploration (DSE) to tailor application execution, e.g., defined by a dataflow graph, for a given target platform. Unfortunately, the performance \ldots{}", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tu:2019:AOS, author = "Chia-Heng Tu and Te-Sheng Lin", title = "Augmenting Operating Systems with {OpenCL} Accelerators", journal = j-TODAES, volume = "24", number = "3", pages = "30:1--30:29", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3315569", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3315569", abstract = "Heterogeneous computing leverages more than one kind of processors to boost the performance of user-space applications with the heterogeneous programming languages, e.g., OpenCL. While some works have been done to accelerate the computations required by \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2019:ESC, author = "Xiaolin Xu and Fahim Rahman and Bicky Shakya and Apostol Vassilev and Domenic Forte and Mark Tehranipoor", title = "Electronics Supply Chain Integrity Enabled by Blockchain", journal = j-TODAES, volume = "24", number = "3", pages = "31:1--31:25", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3315571", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3315571", abstract = "Electronic systems are ubiquitous today, playing an irreplaceable role in our personal lives, as well as in critical infrastructures such as power grids, satellite communications, and public transportation. In the past few decades, the security of \ldots{}", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Valencia:2019:CPA, author = "Juan Valencia and Dip Goswami and Kees Goossens", title = "Comparing Platform-aware Control Design Flows for Composable and Predictable {TDM}-based Execution Platforms", journal = j-TODAES, volume = "24", number = "3", pages = "32:1--32:26", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3315572", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3315572", abstract = "We compare three platform-aware feedback control design flows that are tailored for a composable and predictable Time Division Multiplexing (TDM)-based execution platform. The platform allows for independent execution of multiple applications. Using the \ldots{}", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lu:2019:DDA, author = "Sixing Lu and Roman Lysecky", title = "Data-driven Anomaly Detection with Timing Features for Embedded Systems", journal = j-TODAES, volume = "24", number = "3", pages = "33:1--33:27", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3279949", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3279949", abstract = "Malware is a serious threat to network-connected embedded systems, as evidenced by the continued and rapid growth of such devices, commonly referred to as the Internet of Things. Their ubiquitous use in critical applications require robust protection to \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Metwalli:2019:SAS, author = "Sara Ayman Metwalli and Yuko Hara-Azumi", title = "{SSA-AC}: Static Significance Analysis for Approximate Computing", journal = j-TODAES, volume = "24", number = "3", pages = "34:1--34:17", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3314575", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3314575", abstract = "Recently, the quest to reduce energy consumption in digital systems has been the subject of a number of ongoing studies. One of the most researched focuses is approximate computing (AC). AC is a new computing paradigm in both hardware and software \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Monteiro:2019:OCF, author = "Jucemar Monteiro and Marcelo Johann and Laleh Behjat", title = "An Optimized Cost Flow Algorithm to Spread Cells in Detailed Placement", journal = j-TODAES, volume = "24", number = "3", pages = "35:1--35:16", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3317575", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3317575", abstract = "Placement is an important and challenging step in VLSI physical design. The placement solution can significantly impact timing and routability. In sub-nanometric technology nodes, several restrictions have been imposed on the placement solutions. These \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Islam:2019:EIT, author = "Md Nazmul Islam and Sandip Kundu", title = "Enabling {IC} Traceability via Blockchain Pegged to Embedded {PUF}", journal = j-TODAES, volume = "24", number = "3", pages = "36:1--36:23", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3315669", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3315669", abstract = "Globalization of IC supply chain has increased the risk of counterfeit, tampered, and re-packaged chips in the market. Counterfeit electronics poses a security risk in safety critical applications like avionics, SCADA systems, and defense. It also \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wan:2019:DRP, author = "Bo Wan and Xi Li and Bo Zhang and Caixu Zhao and Xianglan Chen and Chao Wang and Xuehai Zhou", title = "{DCW}: a Reactive and Predictable Programming Framework for {LET}-Based Distributed Real-Time Systems", journal = j-TODAES, volume = "24", number = "3", pages = "37:1--37:35", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3317574", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3317574", abstract = "Real-time systems continuously interact with the physical environment and often have to satisfy stringent timing constraints imposed by their interactions. Those systems involve two main properties: reactivity and predictability. Reactivity allows the \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Basu:2019:CBA, author = "Kanad Basu and Samah Mohamed Saeed and Christian Pilato and Mohammed Ashraf and Mohammed Thari Nabeel and Krishnendu Chakrabarty and Ramesh Karri", title = "{CAD-Base}: an Attack Vector into the Electronics Supply Chain", journal = j-TODAES, volume = "24", number = "4", pages = "38:1--38:30", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3315574", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3315574", abstract = "Fabless semiconductor companies design system-on-chips (SoC) by using third-party intellectual property (IP) cores and fabricate them in offshore, potentially untrustworthy foundries. Owing to the globally distributed electronics supply chain, security \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rokni:2019:SLF, author = "Seyed Ali Rokni and Hassan Ghasemzadeh", title = "{Share-n-Learn}: a Framework for Sharing Activity Recognition Models in Wearable Systems With Context-Varying Sensors", journal = j-TODAES, volume = "24", number = "4", pages = "39:1--39:27", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3318044", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3318044", abstract = "Wearable sensors utilize machine learning algorithms to infer important events such as the behavioral routine and health status of their end users from time-series sensor data. A major obstacle in large-scale utilization of these systems is that the \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zimmermann:2019:ADL, author = "Thomas Zimmermann and Mathias Mora and Sebastian Steinhorst and Daniel Mueller-Gritschneder and Andreas Jossen", title = "Analysis of Dissipative Losses in Modular Reconfigurable Energy Storage Systems Using {SystemC TLM} and {SystemC-AMS}", journal = j-TODAES, volume = "24", number = "4", pages = "40:1--40:33", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3321387", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3321387", abstract = "Battery storage systems are becoming more popular in the automotive industry as well as in stationary applications. To fulfill the requirements in terms of power and energy, the literature is increasingly discussing electrically reconfigurable \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sayed:2019:CAP, author = "Nour Sayed and Longfei Mao and Rajendra Bishnoi and Mehdi B. Tahoori", title = "Compiler-Assisted and Profiling-Based Analysis for Fast and Efficient {STT-MRAM} On-Chip Cache Design", journal = j-TODAES, volume = "24", number = "4", pages = "41:1--41:25", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3321693", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3321693", abstract = "Spin Transfer Torque Magnetic Random Access Memory (STT-MRAM) is a promising candidate for large on-chip memories as a zero-leakage, high-density and non-volatile alternative to the present SRAM technology. Since memories are the dominating component of \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2019:LRA, author = "Naixing Wang and Irith Pomeranz and Sudhakar M. Reddy and Arani Sinha and Srikanth Venkataraman", title = "Layout Resynthesis by Applying Design-for-manufacturability Guidelines to Avoid Low-coverage Areas of a Cell-based Design", journal = j-TODAES, volume = "24", number = "4", pages = "42:1--42:19", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3325066", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325066", abstract = "Design-for-manufacturability (DFM) guidelines are recommended layout design practices intended to capture layout features that are difficult to manufacture correctly. Avoiding such features prevents the occurrence of potential systematic defects. Layout \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Burcea:2019:MIR, author = "Florin Burcea and Andreas Herrmann and Bing Li and Helmut Graeb", title = "{MEMS-IC} Robustness Optimization Considering Electrical and Mechanical Design and Process Parameters", journal = j-TODAES, volume = "24", number = "4", pages = "43:1--43:24", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3325068", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325068", abstract = "MEMS-based sensor circuits are traditionally designed separately using CAD tools specific to each energy domain (electrical and mechanical). This article presents a complete approach for combined MEMS-IC robustness optimization. Advanced methods for \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Afacan:2019:CRC, author = "Eng{\'\i}n Afacan and G{\"u}nhan D{\"u}ndar and Fa{\'\i}k Baskaya and Al{\'\i} Emre Pusane and Mustafa Berke Yelten", title = "On Chip Reconfigurable {CMOS} Analog Circuit Design and Automation Against Aging Phenomena: Sense and React", journal = j-TODAES, volume = "24", number = "4", pages = "44:1--44:22", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3325069", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325069", abstract = "Performance of analog circuits degrades over time due to several time-dependent degradation mechanisms. Due to the increased aging problems in ever-shrinking dimensions, reliability of complementary metal-oxide-semiconductor analog circuits has become a \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2019:ATR, author = "Yanjun Li and Ender Yilmaz and Pete Sarson and Sule Ozev", title = "Adaptive Test for {RF}\slash Analog Circuit Using Higher Order Correlations among Measurements", journal = j-TODAES, volume = "24", number = "4", pages = "45:1--45:16", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3308566", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3308566", abstract = "As process variations increase and devices get more diverse in their behavior, using the same test list for all devices is increasingly inefficient. Methodologies that adapt the test sequence with respect to lot, wafer, or even a device's own behavior \ldots{}", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2019:CPR, author = "Chengning Wang and Dan Feng and Wei Tong and Jingning Liu and Zheng Li and Jiayi Chang and Yang Zhang and Bing Wu and Jie Xu and Wei Zhao and Yilin Li and Ruoxi Ren", title = "Cross-point Resistive Memory: Nonideal Properties and Solutions", journal = j-TODAES, volume = "24", number = "4", pages = "46:1--46:37", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3325067", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325067", abstract = "Emerging computational resistive memory is promising to overcome the challenges of scalability and energy efficiency that DRAM faces and also break through the memory wall bottleneck. However, cell-level and array-level nonideal properties of resistive \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jun:2019:FTT, author = "Jaeyung Jun and Yoonah Paik and Gyeong Il Min and Seon Wook Kim and Youngsun Han", title = "Fault Tolerance Technique Offlining Faulty Blocks by Heap Memory Management", journal = j-TODAES, volume = "24", number = "4", pages = "47:1--47:25", month = jul, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3329079", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:31 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3329079", abstract = "As dynamic random access memory (DRAM) cells continue to be scaled down for higher density and capacity, they have more faults. Thus, DRAM reliability becomes a major concern in computer systems. Previous studies have proposed many techniques preserving \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Vegesna:2019:NRM, author = "S. M. Srinivasavarma Vegesna and Ashok Chakravarthy Nara and Noor Mahammad Sk", title = "A Novel Rule Mapping on {TCAM} for Power Efficient Packet Classification", journal = j-TODAES, volume = "24", number = "5", pages = "48:1--48:23", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3328103", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3328103", abstract = "Packet Classification is the enabling function performed in commodity switches for providing various services such as access control, intrusion detection, load balancing, and so on. Ternary Content Addressable Memories (TCAMs) are the de facto standard \ldots{}", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2019:ITD, author = "Hongfei Wang and Kun He", title = "Improving Test and Diagnosis Efficiency through Ensemble Reduction and Learning", journal = j-TODAES, volume = "24", number = "5", pages = "49:1--49:26", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3328754", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3328754", abstract = "Machine learning is a powerful lever for developing, improving, and optimizing test methodologies to cope with the demand from the advanced nodes. Ensemble methods are a particular learning paradigm that uses multiple models to boost performance. In \ldots{}", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cakir:2019:RCH, author = "Burcin Cakir and Sharad Malik", title = "Revealing Cluster Hierarchy in Gate-level {ICs} Using Block Diagrams and Cluster Estimates of Circuit Embeddings", journal = j-TODAES, volume = "24", number = "5", pages = "50:1--50:19", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3329081", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3329081", abstract = "Contemporary integrated circuits (ICs) are increasingly being constructed using intellectual property blocks (IPs) obtained from third parties in a globalized supply chain. The increased vulnerability to adversarial changes during this untrusted supply \ldots{}", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2019:SIP, author = "Tengtao Li and Sachin S. Sapatnekar", title = "Stress-Induced Performance Shifts in {$3$D} {DRAMs}", journal = j-TODAES, volume = "24", number = "5", pages = "51:1--51:21", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3331527", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3331527", abstract = "3D-stacked DRAMs can significantly increase cell density and bandwidth while also lowering power consumption. However, 3D structures experience significant thermomechanical stress due to the differential rate of contraction of the constituent materials, \ldots{}", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chakraborty:2019:ERL, author = "Shounak Chakraborty and Hemangee K. Kapoor", title = "Exploring the Role of Large Centralised Caches in Thermal Efficient Chip Design", journal = j-TODAES, volume = "24", number = "5", pages = "52:1--52:28", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3339850", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3339850", abstract = "In the era of short channel length, Dynamic Thermal Management (DTM) has become a challenging task for the architects and designers engineering modern Chip Multi-Processors (CMPs). Ever-increasing demand of processing power along with the developed \ldots{}", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Choi:2019:RDR, author = "Kyu Hyun Choi and Jaeyung Jun and Minseong Kim and Seon Wook Kim", title = "Reducing {DRAM} Refresh Rate Using Retention Time Aware Universal Hashing Redundancy Repair", journal = j-TODAES, volume = "24", number = "5", pages = "53:1--53:31", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3339851", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3339851", abstract = "As the device capacity of Dynamic Random Access Memory (DRAM) increases, refresh operation becomes a significant contributory factor toward total power consumption and memory throughput of the device. To reduce the problems associated with the refresh \ldots{}", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2019:TMF, author = "Xiangwei Li and Douglas L. Maskell", title = "Time-Multiplexed {FPGA} Overlay Architectures: a Survey", journal = j-TODAES, volume = "24", number = "5", pages = "54:1--54:19", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3339861", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3339861", abstract = "This article presents a comprehensive survey of time-multiplexed (TM) FPGA overlays from the research literature. These overlays are categorized based on their implementation into two groups: processor-based overlays, as their implementation follows \ldots{}", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gade:2019:EEC, author = "Sri Harsha Gade and M. Meraj Ahmed and Sujay Deb and Amlan Ganguly", title = "Energy Efficient Chip-to-Chip Wireless Interconnection for Heterogeneous Architectures", journal = j-TODAES, volume = "24", number = "5", pages = "55:1--55:27", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3340109", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3340109", abstract = "Heterogeneous multichip architectures have gained significant interest in high-performance computing clusters to cater to a wide range of applications. In particular, heterogeneous systems with multiple multicore CPUs, GPUs, and memory have become \ldots{}", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Osawa:2019:ADR, author = "Hisashi Osawa and Yuko Hara-Azumi", title = "Approximate Data Reuse-based Accelerator Design for Embedded Processor", journal = j-TODAES, volume = "24", number = "5", pages = "56:1--56:25", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3342098", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342098", abstract = "Due to increasing diversity and complexity of applications in embedded systems, accelerator designs trading-off area/energy-efficiency and design-productivity are becoming a further crucial issue. Targeting applications in the category of Recognition, \ldots{}", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Raval:2019:III, author = "Rajkumar K. Raval and Atta Badii", title = "Investigating the Impact of Image Content on the Energy Efficiency of Hardware-accelerated Digital Spatial Filters", journal = j-TODAES, volume = "24", number = "5", pages = "57:1--57:34", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3341819", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3341819", abstract = "Battery-operated low-power portable computing devices are becoming an inseparable part of human daily life. One of the major goals is to achieve the longest battery life in such a device. Additionally, the need for performance in processing multimedia \ldots{}", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bonna:2019:MSD, author = "Ricardo Bonna and Denis S. Loubach and George Ungureanu and Ingo Sander", title = "Modeling and Simulation of Dynamic Applications Using Scenario-Aware Dataflow", journal = j-TODAES, volume = "24", number = "5", pages = "58:1--58:29", month = oct, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3342997", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342997", abstract = "The tradeoff between analyzability and expressiveness is a key factor when choosing a suitable dataflow model of computation (MoC) for designing, modeling, and simulating applications considering a formal base. A large number of techniques and analysis \ldots{}", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2019:EEQ, author = "Li Jiang and Zhuoran Song and Haiyue Song and Chengwen Xu and Qiang Xu and Naifeng Jing and Weifeng Zhang and Xiaoyao Liang", title = "Energy-Efficient and Quality-Assured Approximate Computing Framework Using a Co-Training Method", journal = j-TODAES, volume = "24", number = "6", pages = "59:1--59:25", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3342239", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342239", abstract = "Approximate computing is a promising design paradigm that introduces a new dimension-error-into the original design space. By allowing the inexact computation in error-tolerance applications, approximate computing can gain both performance and energy \ldots{}", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Charles:2019:ECR, author = "Subodha Charles and Alif Ahmed and Umit Y. Ogras and Prabhat Mishra", title = "Efficient Cache Reconfiguration Using Machine Learning in {NoC}-Based Many-Core {CMPs}", journal = j-TODAES, volume = "24", number = "6", pages = "60:1--60:23", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3350422", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3350422", abstract = "Dynamic cache reconfiguration (DCR) is an effective technique to optimize energy consumption in many-core architectures. While early work on DCR has shown promising energy saving opportunities, prior techniques are not suitable for many-core \ldots{}", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Song:2019:COR, author = "Youngsoo Song and Daijoon Hyun and Jingon Lee and Jinwook Jung and Youngsoo Shin", title = "Cut Optimization for Redundant Via Insertion in Self-Aligned Double Patterning", journal = j-TODAES, volume = "24", number = "6", pages = "61:1--61:21", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3355391", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3355391", abstract = "Redundant via (RV) insertion helps prevent via defects and hence leads to yield enhancement. However, RV insertion in self-aligned double patterning (SADP) processes is challenging since cut optimization has to be considered together. In SADP, parallel \ldots{}", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2019:IEC, author = "Dongjin Lee and Sourav Das and Janardhan Rao Doppa and Partha Pratim Pande and Krishnendu Chakrabarty", title = "Impact of Electrostatic Coupling on Monolithic {$3$D}-enabled Network on Chip", journal = j-TODAES, volume = "24", number = "6", pages = "62:1--62:22", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3357158", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3357158", abstract = "Monolithic-3D-integration (M3D) improves the performance and energy efficiency of 3D ICs over conventional through-silicon-vias-based counterparts. The smaller dimensions of monolithic inter-tier vias offer high-density integration, the flexibility of \ldots{}", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kukkala:2019:JSF, author = "Vipin Kumar Kukkala and Sudeep Pasricha and Thomas Bradley", title = "{JAMS-SG}: a Framework for Jitter-Aware Message Scheduling for Time-Triggered Automotive Networks", journal = j-TODAES, volume = "24", number = "6", pages = "63:1--63:31", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3355392", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3355392", abstract = "Time-triggered automotive networks use time-triggered protocols (FlexRay, TTEthernet, etc.) for periodic message transmissions that often originate from safety and time-critical applications. One of the major challenges with time-triggered transmissions \ldots{}", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Asgarieh:2019:SHA, author = "Yashar Asgarieh and Bill Lin", title = "Smart-Hop Arbitration Request Propagation: Avoiding Quadratic Arbitration Complexity and False Negatives in {SMART NoCs}", journal = j-TODAES, volume = "24", number = "6", pages = "64:1--64:25", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3356235", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3356235", abstract = "SMART-based NoC designs achieve ultra-low latencies by enabling flits to traverse multiple hops within a single clock cycle. Notwithstanding the clear performance benefits, SMART-based NoCs suffer from several shortcomings: each router must arbitrate \ldots{}", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shamsi:2019:IPS, author = "Kaveh Shamsi and Meng Li and Kenneth Plaks and Saverio Fazzari and David Z. Pan and Yier Jin", title = "{IP} Protection and Supply Chain Security through Logic Obfuscation: a Systematic Overview", journal = j-TODAES, volume = "24", number = "6", pages = "65:1--65:36", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3342099", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342099", abstract = "The globalization of the semiconductor supply chain introduces ever-increasing security and privacy risks. Two major concerns are IP theft through reverse engineering and malicious modification of the design. The latter concern in part relies on \ldots{}", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2019:RTS, author = "Kankan Wang and Xu Jiang and Nan Guan and Di Liu and Weichen Liu and Qingxu Deng", title = "Real-Time Scheduling of {DAG} Tasks with Arbitrary Deadlines", journal = j-TODAES, volume = "24", number = "6", pages = "66:1--66:22", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3358603", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3358603", abstract = "Real-time and embedded systems are shifting from single-core to multi-core processors, on which the software must be parallelized to fully utilize the computation capacity of the hardware. Recently, much work has been done on real-time scheduling of \ldots{}", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2019:OTL, author = "Yung-Chih Chen and Li-Cheng Zheng and Fu-Lian Wong", title = "Optimization of Threshold Logic Networks with Node Merging and Wire Replacement", journal = j-TODAES, volume = "24", number = "6", pages = "67:1--67:18", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3358748", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3358748", abstract = "In this article, we present an optimization method for threshold logic networks (TLNs) based on observability don't-care-based node merging. To reduce gate count in a TLN, it iteratively merges two gates that are functionally equivalent or whose \ldots{}", acknowledgement = ack-nhfb, articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yan:2019:TSN, author = "Jin-Tai Yan", title = "Two-sided Net Untangling with Internal Detours for Single-layer Bus Routing", journal = j-TODAES, volume = "24", number = "6", pages = "68:1--68:23", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3363184", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3363184", abstract = "It is known that one-sided net untangling can be used to untangle the twisted nets inside a bus for single-layer bus routing. However, limited space behind one pin-row may make one-sided net untangling unsuccessful for single-layer bus routing. In this \ldots{}", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2019:RSE, author = "Hai Wang and Tao Xiao and Darong Huang and Lang Zhang and Chi Zhang and He Tang and Yuan Yuan", title = "Runtime Stress Estimation for Three-dimensional {IC} Reliability Management Using Artificial Neural Network", journal = j-TODAES, volume = "24", number = "6", pages = "69:1--69:29", month = nov, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3363185", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:32 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3363185", abstract = "Heat dissipation and the related thermal-mechanical stress problems are the major obstacles in the development of the three-dimensional integrated circuit (3D IC). Reliability management techniques can be used to alleviate such problems and enhance the \ldots{}", acknowledgement = ack-nhfb, articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mahfouzi:2020:SAR, author = "Rouhollah Mahfouzi and Amir Aminifar and Soheil Samii and Petru Eles and Zebo Peng", title = "Security-aware Routing and Scheduling for Control Applications on {Ethernet TSN} Networks", journal = j-TODAES, volume = "25", number = "1", pages = "1:1--1:26", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3358604", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3358604", abstract = "Today, it is common knowledge in the cyber-physical systems domain that the tight interaction between the cyber and physical elements provides the possibility of substantially improving the performance of these systems that is otherwise impossible. On \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shi:2020:ASF, author = "Guoyong Shi", title = "Automatic Stage-form Circuit Reduction for Multistage Opamp Design Equation Generation", journal = j-TODAES, volume = "25", number = "1", pages = "2:1--2:26", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3363499", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3363499", abstract = "An automatic stage-form circuit reduction method for multistage operational amplifiers (opamps) is proposed. A tool based on this method can reduce a multistage opamp into a condensed stage-form macromodel, from which design equations can be generated \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2020:IBT, author = "Chih-Hao Wang and Tong-Yu Hsieh", title = "An Implication-based Test Scheme for Both Diagnosis and Concurrent Error Detection Applications", journal = j-TODAES, volume = "25", number = "1", pages = "3:1--3:27", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3364681", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3364681", abstract = "This article describes a diagnosis-aware hybrid concurrent error detection ( DAH-CED ) scheme that can facilitate both off-line and on-line test applications. By using the proposed scheme, not only the probability of detecting errors (on-line) but also \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hoque:2020:HPO, author = "Tamzidul Hoque and Kai Yang and Robert Karam and Shahin Tajik and Domenic Forte and Mark Tehranipoor and Swarup Bhunia", title = "Hidden in Plaintext: an Obfuscation-based Countermeasure against {FPGA} Bitstream Tampering Attacks", journal = j-TODAES, volume = "25", number = "1", pages = "4:1--4:32", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3361147", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3361147", abstract = "Field Programmable Gate Arrays (FPGAs) have become an attractive choice for diverse applications due to their reconfigurability and unique security features. However, designs mapped to FPGAs are prone to malicious modifications or tampering of critical \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bhattacharjee:2020:BCA, author = "Sukanta Bhattacharjee and Jack Tang and Sudip Poddar and Mohamed Ibrahim and Ramesh Karri and Krishnendu Chakrabarty", title = "Bio-chemical Assay Locking to Thwart Bio-{IP} Theft", journal = j-TODAES, volume = "25", number = "1", pages = "5:1--5:20", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365579", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365579", abstract = "It is expected that as digital microfluidic biochips (DMFBs) mature, the hardware design flow will begin to resemble the current practice in the semiconductor industry: design teams send chip layouts to third-party foundries for fabrication. These \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Malekpour:2020:HTM, author = "Amin Malekpour and Roshan Ragel and Tuo Li and Haris Javaid and Aleksandar Ignjatovic and Sri Parameswaran", title = "Hardware {Trojan} Mitigation in Pipelined {MPSoCs}", journal = j-TODAES, volume = "25", number = "1", pages = "6:1--6:27", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365578", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365578", abstract = "Multiprocessor System-on-Chip (MPSoC) has become necessary due to the the billions of transistors available to the designer, the need for fast design turnaround times, and the power wall. Thus, present embedded systems are designed with MPSoCs, and one \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pan:2020:ARP, author = "Renjian Pan and Jun Tao and Yangfeng Su and Dian Zhou and Xuan Zeng and Xin Li", title = "Analog\slash {RF} Post-silicon Tuning via {Bayesian} Optimization", journal = j-TODAES, volume = "25", number = "1", pages = "7:1--7:17", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365577", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365577", abstract = "Tunable analog/RF circuit has emerged as a promising technique to address the significant performance uncertainties caused by process variations. To optimize these tunable circuits after fabrication, most existing post-silicon programming methods are \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2020:MCM, author = "Qi Xu and Hao Geng and Song Chen and Bei Yu and Feng Wu", title = "Memristive Crossbar Mapping for Neuromorphic Computing Systems on {$3$D} {IC}", journal = j-TODAES, volume = "25", number = "1", pages = "8:1--8:19", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365576", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365576", abstract = "In recent years, neuromorphic computing systems based on memristive crossbar have provided a promising solution to enable acceleration of neural networks. However, most of the neural networks used in realistic applications are often sparse. If such \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Parane:2020:LDL, author = "Khyamling Parane and Prabhu Prasad B. M. and Basavaraj Talawar", title = "{LBNoC}: Design of Low-latency Router Architecture with Lookahead Bypass for Network-on-Chip Using {FPGA}", journal = j-TODAES, volume = "25", number = "1", pages = "9:1--9:26", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365994", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365994", abstract = "An FPGA-based Network-on-Chip (NoC) using a low-latency router with a look-ahead bypass (LBNoC) is discussed in this article. The proposed design targets the optimized area with improved network performance. The techniques such as single-cycle router \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Roy:2020:HGM, author = "Pushpita Roy and Ansuman Banerjee and Robert Wille and Bhargab B. Bhattacharya", title = "Harnessing the Granularity of Micro-Electrode-Dot-Array Architectures for Optimizing Droplet Routing in Biochips", journal = j-TODAES, volume = "25", number = "1", pages = "10:1--10:37", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365993", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365993", abstract = "In this article, we consider the problem of droplet routing for Microelectrode-Dot-Array (MEDA) biochips. MEDA biochips today provide a host of useful features for droplet movement by making it possible to manoeuvre droplets at a much finer granularity \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Esmaili:2020:EAS, author = "Amirhossein Esmaili and Mahdi Nazemi and Massoud Pedram", title = "Energy-aware Scheduling of Task Graphs with Imprecise Computations and End-to-end Deadlines", journal = j-TODAES, volume = "25", number = "1", pages = "11:1--11:21", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365999", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365999", abstract = "Imprecise computations allow scheduling algorithms developed for energy-constrained computing devices to trade off output quality with utilization of system resources. The goal of such scheduling algorithms is to utilize imprecise computations to find a \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2020:HER, author = "Hongfei Wang and Jianwen Li and Kun He", title = "Hierarchical Ensemble Reduction and Learning for Resource-constrained Computing", journal = j-TODAES, volume = "25", number = "1", pages = "12:1--12:21", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3365224", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:33 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365224", abstract = "Generic tree ensembles (such as Random Forest, RF) rely on a substantial amount of individual models to attain desirable performance. The cost of maintaining a large ensemble could become prohibitive in applications where computing resources are \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tseng:2020:MAU, author = "Tien-Hung Tseng and Chung-Han Chou and Kai-Chiang Wu", title = "Making Aging Useful by Recycling Aging-induced Clock Skew", journal = j-TODAES, volume = "25", number = "2", pages = "13:1--13:24", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3363186", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:34 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3363186", abstract = "Device aging, which causes significant loss on circuit performance and lifetime, has been a primary factor in reliability degradation of nanoscale designs. In this article, we propose to take advantage of aging-induced clock skews (i.e., make them \ldots{}).", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Richthammer:2020:SSD, author = "Valentina Richthammer and Fabian Fassnacht and Michael Gla{\ss}", title = "Search-space Decomposition for System-level Design Space Exploration of Embedded Systems", journal = j-TODAES, volume = "25", number = "2", pages = "14:1--14:32", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3369388", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:34 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369388", abstract = "The development of large-scale multi- and many-core platforms and the rising complexity of embedded applications have led to a significant increase in the number of implementation possibilities for a single application. Furthermore, rising demands on \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{He:2020:LHD, author = "Xu He and Yu Deng and Shizhe Zhou and Rui Li and Yao Wang and Yang Guo", title = "Lithography Hotspot Detection with {FFT}-based Feature Extraction and Imbalanced Learning Rate", journal = j-TODAES, volume = "25", number = "2", pages = "15:1--15:21", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3372044", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:34 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372044", abstract = "With the increasing gap between transistor feature size and lithography manufacturing capability, the detection of lithography hotspots becomes a key stage of physical verification flow to enhance manufacturing yield. Although machine learning \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tadros:2020:TFT, author = "Ramy N. Tadros and Peter A. Beerel", title = "A Theoretical Foundation for Timing Synchronous Systems Using Asynchronous Structures", journal = j-TODAES, volume = "25", number = "2", pages = "16:1--16:28", month = mar, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3373355", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Mar 18 07:50:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3373355", abstract = "Timing of synchronous systems is an everlasting stumbling block to the booming demands for lower power consumption and higher operation speeds in the electronics industry. This hardship is aggravated by the growing levels of variability in state-of-the-. \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liang:2020:SAE, author = "Tung-Che Liang and Mohammed Shayan and Krishnendu Chakrabarty and Ramesh Karri", title = "Secure Assay Execution on {MEDA} Biochips to Thwart Attacks Using Real-Time Sensing", journal = j-TODAES, volume = "25", number = "2", pages = "17:1--17:25", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3374213", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:34 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3374213", abstract = "Digital microfluidic biochips (DMFBs) have emerged as a promising platform for DNA sequencing, clinical chemistry, and point-of-care diagnostics. Recent research has shown that DMFBs are susceptible to various types of malicious attacks. Defenses \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2020:TFT, author = "Irith Pomeranz", title = "Target Faults for Test Compaction Based on Multicycle Tests", journal = j-TODAES, volume = "25", number = "2", pages = "18:1--18:14", month = jan, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3375278", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:34 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375278", abstract = "The use of multicycle tests, with several functional capture cycles between scan operations, contributes significantly to the ability to compact a test set. Multicycle tests have the added benefit that they can contribute to the detection of defects \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Olney:2020:TFB, author = "Brooks Olney and Robert Karam", title = "Tunable {FPGA} Bitstream Obfuscation with {Boolean} Satisfiability Attack Countermeasure", journal = j-TODAES, volume = "25", number = "2", pages = "19:1--19:22", month = mar, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3373638", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Mar 18 07:50:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3373638", abstract = "Field Programmable Gate Arrays (FPGAs) are seeing a surge in usage in many emerging application domains, where the in-field reconfigurability is an attractive characteristic for diverse applications with dynamic design requirements, such as cloud \ldots{}", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2020:HSS, author = "Yajun Yang and Zhang Chen and Yuan Liu and Tsung-Yi Ho and Yier Jin and Pingqiang Zhou", title = "How Secure Is Split Manufacturing in Preventing Hardware {Trojan}?", journal = j-TODAES, volume = "25", number = "2", pages = "20:1--20:23", month = mar, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3378163", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Mar 18 07:50:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378163", abstract = "With the trend of outsourcing fabrication, split manufacturing is regarded as a promising way to both acquire the high-end nodes in untrusted external foundries and protect the design from potential attackers. However, in this article, we show that split manufacturing is not inherently secure, that a hardware Trojan attacker can still recover necessary information with a proximity-based or a simulated-annealing-based mapping approach together with a probability-based or net-based pruning method at the placement level. We further propose a defense approach by moving the insecure gates away from their easily attacked candidate locations. Results on benchmark circuits show the effectiveness of our proposed methods.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pui:2020:LRB, author = "Chak-Wa Pui and Evangeline F. Y. Young", title = "{Lagrangian} Relaxation-Based Time-Division Multiplexing Optimization for Multi-{FPGA} Systems", journal = j-TODAES, volume = "25", number = "2", pages = "21:1--21:23", month = mar, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3377551", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Mar 18 07:50:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377551", abstract = "\<?tight?\>To increase the resource utilization in multi-FPGA (field-programmable gate array) systems, time-division multiplexing (TDM) is a widely used technique to accommodate a large number of inter-FPGA signals. However, with this technique, the delay \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yan:2020:SLO, author = "Jin-Tai Yan", title = "Single-Layer Obstacle-Aware Substrate Routing via Iterative Pin Reassignment and Wire Assignment", journal = j-TODAES, volume = "25", number = "2", pages = "22:1--22:21", month = mar, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3378162", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Mar 18 07:50:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378162", abstract = "It is known that single-layer obstacle-aware substrate routing is necessary for modern IC/Package designs. In this article, given a set of two-pin nets and a set of rectangular obstacles inside a single-layer routing plane, a two-phase routing algorithm \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sha:2020:FPT, author = "Shi Sha and Ajinkya S. Bankar and Xiaokun Yang and Wujie Wen and Gang Quan", title = "On Fundamental Principles for Thermal-Aware Design on Periodic Real-Time Multi-Core Systems", journal = j-TODAES, volume = "25", number = "2", pages = "23:1--23:23", month = mar, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3378063", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Mar 18 07:50:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378063", abstract = "With the exponential rise of the transistor count in one chip, the thermal problem has become a pressing issue in computing system design. While there have been extensive methods and techniques published for design optimization with thermal awareness, \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Nath:2020:RDB, author = "Arijit Nath and Sukarn Agarwal and Hemangee K. Kapoor", title = "Reuse Distance-based Victim Cache for Effective Utilisation of Hybrid Main Memory System", journal = j-TODAES, volume = "25", number = "3", pages = "24:1--24:32", month = may, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3380732", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue May 19 10:15:25 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380732", abstract = "Hybrid main memories comprising DRAM and Non-volatile memories (NVM) are projected as potential replacements of the traditional DRAM-based memories. However, traditional cache management policies designed for improving the hit rate lack awareness of the \ldots{}", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kamal:2020:ADF, author = "Nishant Kamal and Ankur Gupta and Ananya Singla and Shubham Tiwari and Parth Kohli and Sudip Roy and Bhargab B. Bhattacharya", title = "Architectural Design of Flow-Based Microfluidic Biochips for Multi-Target Dilution of Biochemical Fluids", journal = j-TODAES, volume = "25", number = "3", pages = "25:1--25:34", month = may, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3357604", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue May 19 10:15:25 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3357604", abstract = "Microfluidic technologies enable replacement of time-consuming and complex steps of biochemical laboratory protocols with a tiny chip. Sample preparation (i.e., dilution or mixing of fluids) is one of the primary tasks of any bioprotocol. In real-life \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Nahiyan:2020:SCF, author = "Adib Nahiyan and Jungmin Park and Miao He and Yousef Iskander and Farimah Farahmandi and Domenic Forte and Mark Tehranipoor", title = "{SCRIPT}: a {CAD} Framework for Power Side-channel Vulnerability Assessment Using Information Flow Tracking and Pattern Generation", journal = j-TODAES, volume = "25", number = "3", pages = "26:1--26:27", month = may, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3383445", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue May 19 10:15:25 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383445", abstract = "Power side-channel attacks (SCAs) have been proven to be effective at extracting secret keys from hardware implementations of cryptographic algorithms. Ideally, the power side-channel leakage (PSCL) of hardware designs of a cryptographic algorithm \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2020:SMB, author = "Huili Chen and Seetal Potluri and Farinaz Koushanfar", title = "Security of Microfluidic Biochip: Practical Attacks and Countermeasures", journal = j-TODAES, volume = "25", number = "3", pages = "27:1--27:29", month = may, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3382127", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue May 19 10:15:25 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3382127", abstract = "With the advancement of system miniaturization and automation, Lab-on-a-Chip (LoC) technology has revolutionized traditional experimental procedures. Microfluidic Biochip (MFB) is an emerging branch of LoC with wide medical applications such as DNA \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mandal:2020:EAO, author = "Sumit K. Mandal and Ganapati Bhat and Janardhan Rao Doppa and Partha Pratim Pande and Umit Y. Ogras", title = "An Energy-aware Online Learning Framework for Resource Management in Heterogeneous Platforms", journal = j-TODAES, volume = "25", number = "3", pages = "28:1--28:26", month = may, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3386359", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue May 19 10:15:25 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386359", abstract = "Mobile platforms must satisfy the contradictory requirements of fast response time and minimum energy consumption as a function of dynamically changing applications. To address this need, systems-on-chip (SoC) that are at the heart of these devices \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2020:AFD, author = "Mengyun Liu and Lixue Xia and Yu Wang and Krishnendu Chakrabarty", title = "Algorithmic Fault Detection for {RRAM}-based Matrix Operations", journal = j-TODAES, volume = "25", number = "3", pages = "29:1--29:31", month = may, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3386360", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue May 19 10:15:25 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386360", abstract = "An RRAM-based computing system (RCS) provides an energy-efficient hardware implementation of vector--matrix multiplication for machine-learning hardware. However, it is vulnerable to faults due to the immature RRAM fabrication process. We propose an efficient fault tolerance method for RCS; the proposed method, referred to as extended-ABFT (X-ABFT), is inspired by algorithm-based fault tolerance (ABFT). We utilize row checksums and test-input vectors to extract signatures for fault detection and error correction. We present a solution to alleviate the overflow problem caused by the limited number of voltage levels for the test-input signals. Simulation results show that for a Hopfield classifier with faults in 5\% of its RRAM cells, X-ABFT allows us to achieve nearly the same classification accuracy as in the fault-free case.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Paik:2020:GRT, author = "Yoonah Paik and Seon Wook Kim and Dongha Jung and Minseong Kim", title = "Generating Representative Test Sequences from Real Workload for Minimizing {DRAM} Verification Overhead", journal = j-TODAES, volume = "25", number = "4", pages = "30:1--30:23", month = sep, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3391891", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 3 14:31:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3391891", abstract = "Dynamic Random Access Memory (DRAM) standards have evolved for higher bandwidth, larger capacity, and lower power consumption, so their specifications have become complicated to satisfy the design goals. These complex implementations have significantly \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jana:2020:HHC, author = "Rajib Lochan Jana and Soumyajit Dey and Pallab Dasgupta", title = "A Hierarchical {HVAC} Control Scheme for Energy-aware Smart Building Automation", journal = j-TODAES, volume = "25", number = "4", pages = "31:1--31:33", month = sep, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3393666", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 3 14:31:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3393666", abstract = "Heating ventilation and air conditioning (HVAC) systems usually account for the highest percentage of overall energy usage in large-sized smart building infrastructures. The performance of HVAC control systems for large buildings strongly depend on the \ldots{}", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chatterjee:2020:MLA, author = "Urbi Chatterjee and Soumi Chatterjee and Debdeep Mukhopadhyay and Rajat Subhra Chakraborty", title = "Machine Learning Assisted {PUF} Calibration for Trustworthy Proof of Sensor Data in {IoT}", journal = j-TODAES, volume = "25", number = "4", pages = "32:1--32:21", month = sep, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3393628", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 3 14:31:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3393628", abstract = "Remote integrity verification plays a paramount role in resource-constraint devices owing to emerging applications such as Internet-of-Things (IoT), smart homes, e-health, and so on. The concept of Virtual Proof of Reality (VPoR) proposed by R{\"u}hrmair et al.\ldots{}", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Vijayan:2020:RIH, author = "Arunkumar Vijayan and Mehdi B. Tahoori and Krishnendu Chakrabarty", title = "Runtime Identification of Hardware {Trojans} by Feature Analysis on Gate-Level Unstructured Data and Anomaly Detection", journal = j-TODAES, volume = "25", number = "4", pages = "33:1--33:23", month = sep, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3391890", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 3 14:31:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3391890", abstract = "As the globalization of chip design and manufacturing process becomes popular, malicious hardware inclusions such as hardware Trojans pose a serious threat to the security of digital systems. Advanced Trojans can mask many architectural-level Trojan \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Alasad:2020:SLO, author = "Qutaiba Alasad and Jiann-Shuin Yuan and Pramod Subramanyan", title = "Strong Logic Obfuscation with Low Overhead against {IC} Reverse Engineering Attacks", journal = j-TODAES, volume = "25", number = "4", pages = "34:1--34:31", month = sep, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3398012", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 3 14:31:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3398012", abstract = "Untrusted foundries pose threats of integrated circuit (IC) piracy and counterfeiting, and this has motivated research into logic locking. Strong logic locking approaches potentially prevent piracy and counterfeiting by preventing unauthorized \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Alam:2020:SSB, author = "Md Mahbub Alam and Adib Nahiyan and Mehdi Sadi and Domenic Forte and Mark Tehranipoor", title = "{Soft-HaT}: Software-Based Silicon Reprogramming for Hardware {Trojan} Implementation", journal = j-TODAES, volume = "25", number = "4", pages = "35:1--35:22", month = sep, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3396521", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Sep 3 14:31:04 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3396521", abstract = "A hardware Trojan is a malicious modification to an integrated circuit (IC) made by untrusted third-party vendors, fabrication facilities, or rogue designers. Although existing hardware Trojans are designed to be stealthy, they can, in theory, be \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Henkel:2020:ISI, author = "J{\"o}rg Henkel and Hussam Amrouch and Marilyn Wolf", title = "Introduction to the Special Issue on Machine Learning for {CAD}", journal = j-TODAES, volume = "25", number = "5", pages = "36:1--36:2", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3410864", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3410864", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Szentimrey:2020:MLC, author = "Hannah Szentimrey and Abeer Al-Hyari and Jeremy Foxcroft and Timothy Martin and David Noel and Gary Grewal and Shawki Areibi", title = "Machine Learning for Congestion Management and Routability Prediction within {FPGA} Placement", journal = j-TODAES, volume = "25", number = "5", pages = "37:1--37:25", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3373269", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3373269", abstract = "Placement for Field Programmable Gate Arrays (FPGAs) is one of the most important but time-consuming steps for achieving design closure. This article proposes the integration of three unique machine learning models into the state-of-the-art analytic \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2020:FGA, author = "Mengyun Liu and Renjian Pan and Fangming Ye and Xin Li and Krishnendu Chakrabarty and Xinli Gu", title = "Fine-grained Adaptive Testing Based on Quality Prediction", journal = j-TODAES, volume = "25", number = "5", pages = "38:1--38:25", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3385261", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3385261", abstract = "The ever-increasing complexity of integrated circuits inevitably leads to high test cost. Adaptive testing provides an effective solution for test-cost reduction; this testing framework selects the important test items for each set of chips. However, \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Last:2020:PMC, author = "Felix Last and Max Haeberlein and Ulf Schlichtmann", title = "Predicting Memory Compiler Performance Outputs Using Feed-forward Neural Networks", journal = j-TODAES, volume = "25", number = "5", pages = "39:1--39:19", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3385262", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3385262", abstract = "Typical semiconductor chips include thousands of mostly small memories. As memories contribute an estimated 25\% to 40\% to the overall power, performance, and area (PPA) of a product, memories must be designed carefully to meet the system's requirements. \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Goli:2020:PAP, author = "Mehran Goli and Rolf Drechsler", title = "{PREASC}: Automatic Portion Resilience Evaluation for Approximating {SystemC}-based Designs Using Regression Analysis Techniques", journal = j-TODAES, volume = "25", number = "5", pages = "40:1--40:28", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3388140", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3388140", abstract = "The increasing functionality of electronic systems due to the constant evolution of the market requirements makes the non-functional aspects of such systems (e.g., energy consumption, area overhead, or performance) a major concern in the design process. \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Nasser:2020:NCM, author = "Yehya Nasser and Carlo Sau and Jean-Christophe Pr{\'e}votet and Tiziana Fanni and Francesca Palumbo and Maryline H{\'e}lard and Luigi Raffo", title = "{NeuPow}: a {CAD} Methodology for High-level Power Estimation Based on Machine Learning", journal = j-TODAES, volume = "25", number = "5", pages = "41:1--41:29", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3388141", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3388141", abstract = "In this article, we present a new, simple, accurate, and fast power estimation technique that can be used to explore the power consumption of digital system designs at an early design stage. We exploit the machine learning techniques to aid the \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dey:2020:MLA, author = "Sukanta Dey and Sukumar Nandi and Gaurav Trivedi", title = "Machine Learning Approach for Fast Electromigration Aware Aging Prediction in Incremental Design of Large Scale On-chip Power Grid Network", journal = j-TODAES, volume = "25", number = "5", pages = "42:1--42:29", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3399677", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3399677", abstract = "With the advancement of technology nodes, Electromigration (EM) signoff has become increasingly difficult, which requires a considerable amount of time for an incremental change in the power grid (PG) network design in a chip. The traditional Black's \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2020:TSD, author = "Qicheng Huang and Chenlei Fang and Soumya Mittal and R. D. (Shawn) Blanton", title = "Towards Smarter Diagnosis: a Learning-based Diagnostic Outcome Previewer", journal = j-TODAES, volume = "25", number = "5", pages = "43:1--43:20", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3398267", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3398267", abstract = "Given the inherent perturbations during the fabrication process of integrated circuits that lead to yield loss, diagnosis of failing chips is a mitigating method employed during both yield ramping and high-volume manufacturing for yield learning. \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hu:2020:MLA, author = "Yong Hu and Marcel Mettler and Daniel Mueller-Gritschneder and Thomas Wild and Andreas Herkersdorf and Ulf Schlichtmann", title = "Machine Learning Approaches for Efficient Design Space Exploration of Application-Specific {NoCs}", journal = j-TODAES, volume = "25", number = "5", pages = "44:1--44:27", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3403584", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3403584", abstract = "In many Multi-Processor Systems-on-Chip (MPSoCs), traffic between cores is unbalanced. This motivates the use of an application-specific Network-on-Chip (NoC) that is customized and can provide a high performance at low cost in terms of power and area. \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2020:MFS, author = "Yi Wang and Paul D. Franzon and David Smart and Brian Swahn", title = "Multi-Fidelity Surrogate-Based Optimization for Electromagnetic Simulation Acceleration", journal = j-TODAES, volume = "25", number = "5", pages = "45:1--45:21", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3398268", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3398268", abstract = "As circuits' speed and frequency increase, fast and accurate capture of the details of the parasitics in metal structures, such as inductors and clock trees, becomes more critical. However, conducting high-fidelity 3D electromagnetic (EM) simulations \ldots{}", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Agnesina:2020:IFB, author = "Anthony Agnesina and Sung Kyu Lim and Etienne Lepercq and Jose {Escobedo Del Cid}", title = "Improving {FPGA}-Based Logic Emulation Systems through Machine Learning", journal = j-TODAES, volume = "25", number = "5", pages = "46:1--46:20", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3399595", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3399595", abstract = "We present a machine learning (ML) framework to improve the use of computing resources in the FPGA compilation step of a commercial FPGA-based logic emulation flow. Our ML models enable highly accurate predictability of the final place and route design \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xama:2020:MLB, author = "Nektar Xama and Martin Andraud and Jhon Gomez and Baris Esen and Wim Dobbelaere and Ronny Vanhooren and Anthony Coyette and Georges Gielen", title = "Machine Learning-based Defect Coverage Boosting of Analog Circuits under Measurement Variations", journal = j-TODAES, volume = "25", number = "5", pages = "47:1--47:27", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3408063", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3408063", abstract = "Safety-critical and mission-critical systems, such as airplanes or (semi-)autonomous cars, are relying on an ever-increasing number of embedded integrated circuits. Consequently, there is a need for complete defect coverage during the testing of these \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2020:APA, author = "Kang Liu and Haoyu Yang and Yuzhe Ma and Benjamin Tan and Bei Yu and Evangeline F. Y. Young and Ramesh Karri and Siddharth Garg", title = "Adversarial Perturbation Attacks on {ML}-based {CAD}: a Case Study on {CNN}-based Lithographic Hotspot Detection", journal = j-TODAES, volume = "25", number = "5", pages = "48:1--48:31", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3408288", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Oct 3 07:47:57 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3408288", abstract = "There is substantial interest in the use of machine learning (ML)-based techniques throughout the electronic computer-aided design (CAD) flow, particularly those based on deep learning. However, while deep learning methods have surpassed state-of-the-. \ldots{}", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hu:2020:EMN, author = "X. Sharon Hu", title = "Editorial: a Message from the New {Editor-in-Chief}", journal = j-TODAES, volume = "25", number = "6", pages = "49e:1--49e:2", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3419376", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3419376", acknowledgement = ack-nhfb, articleno = "49e", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Torabi:2020:LAA, author = "Mohammad Torabi and Lihong Zhang", title = "{LDE}-aware Analog Layout Migration with {OPC}-inclusive Routing", journal = j-TODAES, volume = "25", number = "6", pages = "49:1--49:22", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3398190", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3398190", abstract = "Performance degradation in analog circuits due to layout dependent effects (LDEs) has become increasingly challenging in advanced technologies. To address this issue, LDEs have to be seriously considered as performance constraints in the physical design \ldots{}", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ma:2020:MEF, author = "Chenlin Ma and Yi Wang and Zhaoyan Shen and Renhai Chen and Zhu Wang and Zili Shao", title = "{MNFTL}: an Efficient Flash Translation Layer for {MLC NAND} Flash Memory", journal = j-TODAES, volume = "25", number = "6", pages = "50:1--50:19", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3398037", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3398037", abstract = "The write constraints of Multi-Level Cell (MLC) NAND flash memory make most of the existing flash translation layer (FTL) schemes inefficient or inapplicable. In this article, we solve several fundamental problems in the design of MLC flash translation \ldots{}", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lezos:2020:LOL, author = "Christakis Lezos and Grigoris Dimitroulakos and Ioannis Latifis and Konstantinos Masselos", title = "A Locality Optimizer for Loop-dominated Applications Based on Reuse Distance Analysis", journal = j-TODAES, volume = "25", number = "6", pages = "51:1--51:26", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3398189", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3398189", abstract = "Source code optimization can heavily improve software code implementation quality while still being complementary to conventional compilers' optimizations. Source code analysis tools are very useful in supporting source code optimization. This article \ldots{}", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tan:2020:EEG, author = "Jingweijia Tan and Kaige Yan and Shuaiwen Leon Song and Xin Fu", title = "Energy-Efficient {GPU} {L2} Cache Design Using Instruction-Level Data Locality Similarity", journal = j-TODAES, volume = "25", number = "6", pages = "52:1--52:18", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3408060", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3408060", abstract = "This article presents a novel energy-efficient cache design for massively parallel, throughput-oriented architectures like GPUs. Unlike L1 data cache on modern GPUs, L2 cache shared by all of the streaming multiprocessors is not the primary performance \ldots{}", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Charles:2020:RNC, author = "Subodha Charles and Prabhat Mishra", title = "Reconfigurable Network-on-Chip Security Architecture", journal = j-TODAES, volume = "25", number = "6", pages = "53:1--53:25", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3406661", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3406661", abstract = "Growth of the Internet-of-things has led to complex system-on-chips (SoCs) being used in the edge devices in IoT applications. The increased complexity is demanding designers to consider several critical factors, such as dynamic requirement changes, \ldots{}", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pendyala:2020:IAS, author = "Shilpa Pendyala and Sheikh Ariful Islam and Srinivas Katkoori", title = "Interval Arithmetic and Self-Similarity Based {RTL} Input Vector Control for Datapath Leakage Minimization", journal = j-TODAES, volume = "25", number = "6", pages = "54:1--54:26", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3408061", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3408061", abstract = "With technology scaling, subthreshold leakage has dominated the overall power consumption in a design. Input vector control is an effective technique to minimize subthreshold leakage. Low leakage input vector determination is not often possible due to large design space and simulation time. Similarly, applying an appropriate minimum leakage vector (MLV) to each Register Transfer Level (RTL) module instance in a design often results in a low leakage state with significant area overhead. In this work, we propose a top-down and bottom-up approach for propagating the input vector interval to identify low leakage input vector at primary inputs of an RTL datapath. For each module, via Monte Carlo simulation, we identify a set of MLV intervals such that maximum leakage is within (say) 10\% of the lowest leakage points. As the module bit width increases, exhaustive simulation to find the low leakage vector is not feasible. Further, we need to uniformly search the entire input space to obtain as many low leakage intervals as possible. Based on empirical observations, we observe self-similarity in the subthreshold leakage distribution of adder\slash multiplier modules with highly regular bit-slice architectures when input space is partitioned into smaller cells. This property enables the uniform search of low leakage vectors in the entire input space where the time taken for characterization increases linearly with the module size. We further process the reduced interval set with simulated annealing to arrive at the best low-leakage vector at the primary inputs. We also propose to reduce area overhead (in some cases to 0\%) by choosing Primary Input (PI) MLVs such that resultant inputs to internal nodes are also MLVs. Compared to existing work, experimental results for DSP filters simulated in 16nm technology demonstrated leakage savings of 93.6\% and 89.2\% for top-down and bottom-up approaches with no area overhead.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chi:2020:WLO, author = "Hao Yu Chi and Chien Nan Jimmy Liu and Hung Ming Chen", title = "Wire Load Oriented Analog Routing with Matching Constraints", journal = j-TODAES, volume = "25", number = "6", pages = "55:1--55:26", month = oct, year = "2020", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3403932", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Oct 13 08:53:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3403932", abstract = "As design complexity is increased exponentially, electronic design automation (EDA) tools are essential to reduce design efforts. However, the analog layout design has still been done manually for decades because it is a sensitive and error-prone task. \ldots{}", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Goel:2021:MNN, author = "Abhinav Goel and Sara Aghajanzadeh and Caleb Tung and Shuo-Han Chen and George K. Thiruvathukal and Yung-Hsiang Lu", title = "Modular Neural Networks for Low-Power Image Classification on Embedded Devices", journal = j-TODAES, volume = "26", number = "1", pages = "1:1--1:35", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3408062", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3408062", abstract = "Embedded devices are generally small, battery-powered computers with limited hardware resources. It is difficult to run deep neural networks (DNNs) on these devices, because DNNs perform millions of operations and consume significant amounts of energy. \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Roy:2021:FAA, author = "Indrani Roy and Chester Rebeiro and Aritra Hazra and Swarup Bhunia", title = "{FaultDroid}: an Algorithmic Approach for Fault-Induced Information Leakage Analysis", journal = j-TODAES, volume = "26", number = "1", pages = "2:1--2:27", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3410336", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3410336", abstract = "Fault attacks belong to a potent class of implementation-based attacks that can compromise a crypto-device within a few milliseconds. Out of the large numbers of faults that can occur in the device, only a very few are exploitable in terms of leaking \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2021:MNI, author = "Jun Li and Bowen Huang and Zhibing Sha and Zhigang Cai and Jianwei Liao and Balazs Gerofi and Yutaka Ishikawa", title = "Mitigating Negative Impacts of Read Disturb in {SSDs}", journal = j-TODAES, volume = "26", number = "1", pages = "3:1--3:24", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3410332", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3410332", abstract = "Read disturb is a circuit-level noise in solid-state drives (SSDs), which may corrupt existing data in SSD blocks and then cause high read error rate and longer read latency. The approach of read refresh is commonly used to avoid read disturb errors by \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mondal:2021:IFS, author = "Ankit Mondal and Ankur Srivastava", title = "{Ising-FPGA}: a Spintronics-based Reconfigurable {Ising} Model Solver", journal = j-TODAES, volume = "26", number = "1", pages = "4:1--4:27", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3411511", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3411511", abstract = "The Ising model has been explored as a framework for modeling NP-hard problems, with several diverse systems proposed to solve it. The Magnetic Tunnel Junction- (MTJ) based Magnetic RAM is capable of replacing CMOS in memory chips. In this article, we \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rokni:2021:TMS, author = "Seyed Ali Rokni and Marjan Nourollahi and Parastoo Alinia and Iman Mirzadeh and Mahdi Pedram and Hassan Ghasemzadeh", title = "{TransNet}: Minimally Supervised Deep Transfer Learning for Dynamic Adaptation of Wearable Systems", journal = j-TODAES, volume = "26", number = "1", pages = "5:1--5:31", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3414062", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3414062", abstract = "Wearables are poised to transform health and wellness through automation of cost-effective, objective, and real-time health monitoring. However, machine learning models for these systems are designed based on labeled data collected, and feature \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Islam:2021:HLS, author = "Sheikh Ariful Islam and Love Kumar Sah and Srinivas Katkoori", title = "High-Level Synthesis of Key-Obfuscated {RTL IP} with Design Lockout and Camouflaging", journal = j-TODAES, volume = "26", number = "1", pages = "6:1--6:35", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3410337", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3410337", abstract = "We propose three orthogonal techniques to secure Register-Transfer-Level (RTL) Intellectual Property (IP). In the first technique, the key-based RTL obfuscation scheme is proposed at an early design phase during High-Level Synthesis (HLS). Given a \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Poddar:2021:RMT, author = "Sudip Poddar and Tapalina Banerjee and Robert Wille and Bhargab B. Bhattacharya", title = "Robust Multi-Target Sample Preparation on {MEDA} Biochips Obviating Waste Production", journal = j-TODAES, volume = "26", number = "1", pages = "7:1--7:29", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3414061", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3414061", abstract = "Digital microfluidic biochips have fueled a paradigm shift in implementing bench-top laboratory experiments on a single tiny chip, thus replacing costly and bulky equipment. However, because of imprecise fluidic functions, several volumetric split \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhang:2021:DPR, author = "Ying Zhang and Xinpeng Hong and Zhongsheng Chen and Zebo Peng and Jianhui Jiang", title = "A Deterministic-Path Routing Algorithm for Tolerating Many Faults on Very-Large-Scale Network-on-Chip", journal = j-TODAES, volume = "26", number = "1", pages = "8:1--8:26", month = jan, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3414060", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:13 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3414060", abstract = "Very-large-scale network-on-chip (VLS-NoC) has become a promising fabric for supercomputers, but this fabric may encounter the many-fault problem. This article proposes a deterministic routing algorithm to tolerate the effects of many faults in VLS-. \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Goncalves:2021:SAT, author = "St{\`e}phano M. M. Gon{\c{c}}alves and Leomar S. da Rosa Jr and Felipe S. Marques", title = "{SmartDR}: Algorithms and Techniques for Fast Detailed Routing with Good Design Rule Handling", journal = j-TODAES, volume = "26", number = "2", pages = "9:1--9:38", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3417133", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3417133", abstract = "Detailed routing is one of the most time-consuming steps of physical synthesis of integrated circuits. Also, it is very challenging due to the complexity of the design rules that the router must obey. In this article, we present SmartDR, a detailed \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liao:2021:EPA, author = "Tuotian Liao and Lihong Zhang", title = "Efficient Parasitic-aware $ g^m $ \slash {$ I^D $} --- based Hybrid Sizing Methodology for Analog and {RF} Integrated Circuits", journal = j-TODAES, volume = "26", number = "2", pages = "10:1--10:31", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3416946", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3416946", abstract = "As the primary second-order effect, parasitic issues have to be seriously addressed when synthesizing high-performance analog and RF integrated circuits (ICs). In this article, a two-phase hybrid sizing methodology for analog and RF ICs is proposed to \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2021:CPO, author = "Nan Wu and Lei Deng and Guoqi Li and Yuan Xie", title = "Core Placement Optimization for Multi-chip Many-core Neural Network Systems with Reinforcement Learning", journal = j-TODAES, volume = "26", number = "2", pages = "11:1--11:27", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3418498", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3418498", abstract = "Multi-chip many-core neural network systems are capable of providing high parallelism benefited from decentralized execution, and they can be scaled to very large systems with reasonable fabrication costs. As multi-chip many-core systems scale up, \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Siddhu:2021:LAD, author = "Lokesh Siddhu and Rajesh Kedia and Preeti Ranjan Panda", title = "Leakage-Aware Dynamic Thermal Management of {$3$D} Memories", journal = j-TODAES, volume = "26", number = "2", pages = "12:1--12:31", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3419468", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3419468", abstract = "3D memory systems offer several advantages in terms of area, bandwidth, and energy efficiency. However, thermal issues arising out of higher power densities have limited their widespread use. While prior works have looked at reducing dynamic power \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ghosh:2021:PDP, author = "Sumana Ghosh and Soumyajit Dey and Pallab Dasgupta", title = "Performance-Driven Post-Processing of Control Loop Execution Schedules", journal = j-TODAES, volume = "26", number = "2", pages = "13:1--13:27", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3421505", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3421505", abstract = "The increasing demand for mapping diverse embedded features onto shared electronic control units has brought about novel ways to co-design control tasks and their schedules. These techniques replace traditional implementations of control with new \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Luo:2021:TMF, author = "Yingyi Luo and Joshua C. Zhao and Arnav Aggarwal and Seda Ogrenci-Memik and Kazutomo Yoshii", title = "Thermal Management for {FPGA} Nodes in {HPC} Systems", journal = j-TODAES, volume = "26", number = "2", pages = "14:1--14:17", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3423494", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3423494", abstract = "The integration of FPGAs into large-scale computing systems is gaining attention. In these systems, real-time data handling for networking, tasks for scientific computing, and machine learning can be executed with customized datapaths on reconfigurable \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2021:RMB, author = "Jianli Chen and Ziran Zhu and Wenxing Zhu and Chang Yao-Wen", title = "A Robust Modulus-Based Matrix Splitting Iteration Method for Mixed-Cell-Height Circuit Legalization", journal = j-TODAES, volume = "26", number = "2", pages = "15:1--15:28", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3423326", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3423326", abstract = "Modern circuits often contain standard cells of different row heights to meet various design requirements. Taller cells give larger drive strengths and higher speed at the cost of larger areas and power. Multi-row height standard cells incur challenging \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Arka:2021:HHM, author = "Aqeeb Iqbal Arka and Biresh Kumar Joardar and Ryan Gary Kim and Dae Hyun Kim and Janardhan Rao Doppa and Partha Pratim Pande", title = "{HeM$3$D}: Heterogeneous Manycore Architecture Based on Monolithic {$3$D} Vertical Integration", journal = j-TODAES, volume = "26", number = "2", pages = "16:1--16:21", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3424239", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3424239", abstract = "Heterogeneous manycore architectures are the key to efficiently execute compute- and data-intensive applications. Through-silicon-via (TSV)-based 3D manycore system is a promising solution in this direction as it enables the integration of disparate \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Deb:2021:CRC, author = "Dipika Deb and John Jose and Maurizio Palesi", title = "{COPE}: Reducing Cache Pollution and Network Contention by Inter-tile Coordinated Prefetching in {NoC}-based {MPSoCs}", journal = j-TODAES, volume = "26", number = "3", pages = "17:1--17:31", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3428149", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3428149", abstract = "Prefetching helps in reducing the memory access latency in multi-banked NUCA architecture, where the Last Level Cache (LLC) is shared. In such systems, an application running on core generates significant traffic on the shared resources, the underlying \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Letras:2021:MOO, author = "Martin Letras and Joachim Falk and Tobias Schwarzer and J{\"u}rgen Teich", title = "Multi-objective Optimization of Mapping Dataflow Applications to {MPSoCs} Using a Hybrid Evaluation Combining Analytic Models and Measurements", journal = j-TODAES, volume = "26", number = "3", pages = "18:1--18:33", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3431814", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3431814", abstract = "Dataflow modeling is well suited for a large variety of applications for modern multi-core architectures, e.g., from the signal processing and the control domain. Furthermore, Design Space Exploration (DSE) can be used to explore mappings of tasks to \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2021:LDH, author = "Irith Pomeranz and M. Enamul Amyeen", title = "Logic Diagnosis with Hybrid Fail Data", journal = j-TODAES, volume = "26", number = "3", pages = "19:1--19:13", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3433929", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3433929", abstract = "Yield improvement requires information about the defects present in faulty units. This information is derived by applying a logic diagnosis procedure to the fail data collected by a tester from faulty units. It is typical in the early stages of yield \ldots{}", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ince:2021:FBB, author = "Mehmet Ince and Ender Yilmaz and Wei Fu and Joonsung Park and Krishnaswamy Nagaraj and Leroy Winemberg and Sule Ozev", title = "Fault-based Built-in Self-test and Evaluation of Phase Locked Loops", journal = j-TODAES, volume = "26", number = "3", pages = "20:1--20:18", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3427911", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3427911", abstract = "With the increasing pressure to obtain near-zero defect rates for the automotive industry, there is a need to explore built-in self-test and other non-traditional test techniques for embedded mixed-signal components, such as PLLs, DC-DC converters, and \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gebregirogis:2021:ALF, author = "Anteneh Gebregirogis and Mehdi Tahoori", title = "Approximate Learning and Fault-Tolerant Mapping for Energy-Efficient Neuromorphic Systems", journal = j-TODAES, volume = "26", number = "3", pages = "21:1--21:23", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3436491", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3436491", abstract = "Brain-inspired deep neural networks such as Convolutional Neural Network (CNN) have shown great potential in solving difficult cognitive problems such as object recognition and classification. However, such architectures have high computational energy \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lyu:2021:MSC, author = "Yangdi Lyu and Prabhat Mishra", title = "{MaxSense}: Side-channel Sensitivity Maximization for {Trojan} Detection Using Statistical Test Patterns", journal = j-TODAES, volume = "26", number = "3", pages = "22:1--22:21", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3436820", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3436820", abstract = "Detection of hardware Trojans is vital to ensure the security and trustworthiness of System-on-Chip (SoC) designs. Side-channel analysis is effective for Trojan detection by analyzing various side-channel signatures such as power, current, and delay. In \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2021:CTH, author = "Irith Pomeranz", title = "Covering Test Holes of Functional Broadside Tests", journal = j-TODAES, volume = "26", number = "3", pages = "23:1--23:15", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3441282", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3441282", abstract = "Functional broadside tests were developed to avoid overtesting of delay faults. The tests achieve this goal by creating functional operation conditions during their functional capture cycles. To increase the achievable fault coverage, close-to-. \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Roy:2021:MLS, author = "Urmimala Roy and Tanmoy Pramanik and Subhendu Roy and Avhishek Chatterjee and Leonard F. Register and Sanjay K. Banerjee", title = "Machine Learning for Statistical Modeling: The Case of Perpendicular Spin-Transfer-Torque Random Access Memory", journal = j-TODAES, volume = "26", number = "3", pages = "24:1--24:17", month = feb, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3440014", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 25 10:17:15 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3440014", abstract = "We propose a methodology to perform process variation-aware device and circuit design using fully physics-based simulations within limited computational resources, without developing a compact model. Machine learning (ML), specifically a support vector \ldots{}", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2021:EFU, author = "Irith Pomeranz", title = "Equivalent Faults under Launch-on-Shift {(LOS)} Tests with Equal Primary Input Vectors", journal = j-TODAES, volume = "26", number = "4", pages = "25:1--25:15", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3440013", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3440013", abstract = "A recent work showed that it is possible to transform a single-cycle test for stuck-at faults into a launch-on-shift (LOS) test that is guaranteed to detect the same stuck-at faults without any logic or fault simulation. The LOS test also detects \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Witharana:2021:DTG, author = "Hasini Witharana and Yangdi Lyu and Prabhat Mishra", title = "Directed Test Generation for Activation of Security Assertions in {RTL} Models", journal = j-TODAES, volume = "26", number = "4", pages = "26:1--26:28", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3441297", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3441297", abstract = "Assertions are widely used for functional validation as well as coverage analysis for both software and hardware designs. Assertions enable runtime error detection as well as faster localization of errors. While there is a vast literature on both \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mohammadzadeh:2021:EOP, author = "Naser Mohammadzadeh and Robert Wille and Oliver Keszocze", title = "Efficient One-pass Synthesis for Digital Microfluidic Biochips", journal = j-TODAES, volume = "26", number = "4", pages = "27:1--27:21", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3446880", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3446880", abstract = "Digital microfluidics biochips are a promising emerging technology that provides fluidic experimental capabilities on a chip (i.e., following the lab-on-a-chip paradigm). However, the design of such biochips still constitutes a challenging task that is \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jain:2021:TTA, author = "Ayush Jain and Ziqi Zhou and Ujjwal Guin", title = "{TAAL}: Tampering Attack on Any Key-based Logic Locked Circuits", journal = j-TODAES, volume = "26", number = "4", pages = "28:1--28:22", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3442379", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3442379", abstract = "Due to the globalization of semiconductor manufacturing and test processes, the system-on-a-chip (SoC) designers no longer design the complete SoC and manufacture chips on their own. This outsourcing of the design and manufacturing of Integrated \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rahman:2021:SAD, author = "M. Sazadur Rahman and Adib Nahiyan and Fahim Rahman and Saverio Fazzari and Kenneth Plaks and Farimah Farahmandi and Domenic Forte and Mark Tehranipoor", title = "Security Assessment of Dynamically Obfuscated Scan Chain Against Oracle-guided Attacks", journal = j-TODAES, volume = "26", number = "4", pages = "29:1--29:27", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3444960", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3444960", abstract = "Logic locking has emerged as a promising solution to protect integrated circuits against piracy and tampering. However, the security provided by existing logic locking techniques is often thwarted by Boolean satisfiability (SAT)-based oracle-guided \ldots{}", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sinha:2021:DSO, author = "Mitali Sinha and Gade Sri Harsha and Pramit Bhattacharyya and Sujay Deb", title = "Design Space Optimization of Shared Memory Architecture in Accelerator-rich Systems", journal = j-TODAES, volume = "26", number = "4", pages = "30:1--30:31", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3446001", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3446001", abstract = "Shared memory architectures, as opposed to private-only memories, provide a viable alternative to meet the ever-increasing memory requirements of multi-accelerator systems to achieve high performance under stringent area and energy constraints. However, \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Palchaudhuri:2021:DAT, author = "Ayan Palchaudhuri and Sandeep Sharma and Anindya Sundar Dhar", title = "Design Automation for Tree-based Nearest Neighborhood-aware Placement of High-speed Cellular Automata on {FPGA} with Scan Path Insertion", journal = j-TODAES, volume = "26", number = "4", pages = "31:1--31:34", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3446206", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3446206", abstract = "Cellular Automata (CA) is attractive for high-speed VLSI implementation due to modularity, cascadability, and locality of interconnections confined to neighboring logic cells. However, this outcome is not easily transferable to tree-structured CA, since \ldots{}", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yellu:2021:STA, author = "Pruthvy Yellu and Landon Buell and Miguel Mark and Michel A. Kinsy and Dongpeng Xu and Qiaoyan Yu", title = "Security Threat Analyses and Attack Models for Approximate Computing Systems: From Hardware and Micro-architecture Perspectives", journal = j-TODAES, volume = "26", number = "4", pages = "32:1--32:31", month = apr, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3442380", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Apr 27 08:06:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3442380", abstract = "Approximate computing (AC) represents a paradigm shift from conventional precise processing to inexact computation but still satisfying the system requirement on accuracy. The rapid progress on the development of diverse AC techniques allows us to apply \ldots{}", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jayasinghe:2021:QQB, author = "Darshana Jayasinghe and Aleksandar Ignjatovic and Roshan Ragel and Jude Angelo Ambrose and Sri Parameswaran", title = "{QuadSeal}: Quadruple Balancing to Mitigate Power Analysis Attacks with Variability Effects and Electromagnetic Fault Injection Attacks", journal = j-TODAES, volume = "26", number = "5", pages = "33:1--33:36", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3443706", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3443706", abstract = "Side channel analysis attacks employ the emanated side channel information to deduce the secret keys from cryptographic implementations by analyzing the power traces during execution or scrutinizing faulty outputs. To be effective, a countermeasure must \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2021:DHC, author = "Chin-Hsien Wu and Hao-Wei Zhang and Chia-Wei Liu and Ta-Ching Yu and Chi-Yen Yang", title = "A Dynamic {Huffman} Coding Method for Reliable {TLC NAND} Flash Memory", journal = j-TODAES, volume = "26", number = "5", pages = "34:1--34:25", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3446771", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3446771", abstract = "With the progress of the manufacturing process, NAND flash memory has evolved from the single-level cell and multi-level cell into the triple-level cell (TLC). NAND flash memory has physical problems such as the characteristic of erase-before-write and \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jeong:2021:DMB, author = "Eunjin Jeong and Dowhan Jeong and Soonhoi Ha", title = "Dataflow Model-based Software Synthesis Framework for Parallel and Distributed Embedded Systems", journal = j-TODAES, volume = "26", number = "5", pages = "35:1--35:38", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3447680", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3447680", abstract = "Existing software development methodologies mostly assume that an application runs on a single device without concern about the non-functional requirements of an embedded system such as latency and resource consumption. Besides, embedded software is \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xie:2021:DFM, author = "Guoqi Xie and Hao Peng and Xiongren Xiao and Yao Liu and Renfa Li", title = "Design Flow and Methodology for Dynamic and Static Energy-constrained Scheduling Framework in Heterogeneous Multicore Embedded Devices", journal = j-TODAES, volume = "26", number = "5", pages = "36:1--36:18", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3450448", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3450448", abstract = "With Internet of things technologies, billions of embedded devices, including smart gateways, smart phones, and mobile robots, are connected and deeply integrated. Almost all these embedded devices are battery-constrained and energy-limited systems. In \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Park:2021:PPD, author = "Heechun Park and Bon Woong Ku and Kyungwook Chang and Da Eun Shim and Sung Kyu Lim", title = "Pseudo-{$3$D} Physical Design Flow for Monolithic {$3$D} {ICs}: Comparisons and Enhancements", journal = j-TODAES, volume = "26", number = "5", pages = "37:1--37:25", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3453480", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3453480", abstract = "Studies have shown that monolithic 3D (M3D) ICs outperform the existing through-silicon-via (TSV) -based 3D ICs in terms of power, performance, and area (PPA) metrics, primarily due to the orders of magnitude denser vertical interconnections offered by \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hassanpourghadi:2021:MLG, author = "Mohsen Hassanpourghadi and Rezwan A. Rasul and Mike Shuo-Wei Chen", title = "A Module-Linking Graph Assisted Hybrid Optimization Framework for Custom Analog and Mixed-Signal Circuit Parameter Synthesis", journal = j-TODAES, volume = "26", number = "5", pages = "38:1--38:22", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3456722", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3456722", abstract = "Analog and mixed-signal (AMS) computer-aided design tools are of increasing interest owing to demand for the wide range of AMS circuit specifications in the modern system on a chip and faster time to market requirement. Traditionally, to accelerate the \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Feng:2021:FRT, author = "Lang Feng and Jeff Huang and Jiang Hu and Abhijith Reddy", title = "{FastCFI}: Real-time Control-Flow Integrity Using {FPGA} without Code Instrumentation", journal = j-TODAES, volume = "26", number = "5", pages = "39:1--39:39", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3458471", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3458471", abstract = "Control-Flow Integrity (CFI) is an effective defense technique against a variety of memory-based cyber attacks. CFI is usually enforced through software methods, which entail considerable performance overhead. Hardware-based CFI techniques can largely \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2021:MLE, author = "Guyue Huang and Jingbo Hu and Yifan He and Jialong Liu and Mingyuan Ma and Zhaoyang Shen and Juejian Wu and Yuanfan Xu and Hengrui Zhang and Kai Zhong and Xuefei Ning and Yuzhe Ma and Haoyu Yang and Bei Yu and Huazhong Yang and Yu Wang", title = "Machine Learning for Electronic Design Automation: a Survey", journal = j-TODAES, volume = "26", number = "5", pages = "40:1--40:46", month = jun, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3451179", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Jun 22 08:18:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3451179", abstract = "With the down-scaling of CMOS technology, the design complexity of very large-scale integrated is increasing. Although the application of machine learning (ML) techniques in electronic design automation (EDA) can trace its history back to the 1990s, the \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chattopadhyay:2021:CCP, author = "Saranyu Chattopadhyay and Pranesh Santikellur and Rajat Subhra Chakraborty and Jimson Mathew and Marco Ottavi", title = "A Conditionally Chaotic Physically Unclonable Function Design Framework with High Reliability", journal = j-TODAES, volume = "26", number = "6", pages = "41:1--41:24", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460004", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460004", abstract = "Physically Unclonable Function (PUF) circuits are promising low-overhead hardware security primitives, but are often gravely susceptible to machine learning-based modeling attacks. Recently, chaotic PUF circuits have been proposed that show greater \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2021:PDM, author = "Chen Jiang and Bo Yuan and Tsung-Yi Ho and Xin Yao", title = "Placement of Digital Microfluidic Biochips via a New Evolutionary Algorithm", journal = j-TODAES, volume = "26", number = "6", pages = "42:1--42:22", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460230", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460230", abstract = "Digital microfluidic biochips (DMFBs) have been a revolutionary platform for automating and miniaturizing laboratory procedures with the advantages of flexibility and reconfigurability. The placement problem is one of the most challenging issues in the \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gnad:2021:VBC, author = "Dennis R. E. Gnad and Cong Dang Khoa Nguyen and Syed Hashim Gillani and Mehdi B. Tahoori", title = "Voltage-Based Covert Channels Using {FPGAs}", journal = j-TODAES, volume = "26", number = "6", pages = "43:1--43:25", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460229", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460229", abstract = "Field Programmable Gate Arrays (FPGAs) are increasingly used in cloud applications and being integrated into Systems-on-Chip. For these systems, various side-channel attacks on cryptographic implementations have been reported, motivating one to apply \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ning:2021:FND, author = "Xuefei Ning and Guangjun Ge and Wenshuo Li and Zhenhua Zhu and Yin Zheng and Xiaoming Chen and Zhen Gao and Yu Wang and Huazhong Yang", title = "{FTT-NAS}: Discovering Fault-tolerant Convolutional Neural Architecture", journal = j-TODAES, volume = "26", number = "6", pages = "44:1--44:24", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460288", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460288", abstract = "With the fast evolvement of embedded deep-learning computing systems, applications powered by deep learning are moving from the cloud to the edge. When deploying neural networks (NNs) onto the devices under complex environments, there are various types of \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lu:2021:RRD, author = "Anni Lu and Xiaochen Peng and Yandong Luo and Shanshi Huang and Shimeng Yu", title = "A Runtime Reconfigurable Design of Compute-in-Memory-Based Hardware Accelerator for Deep Learning Inference", journal = j-TODAES, volume = "26", number = "6", pages = "45:1--45:18", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460436", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460436", abstract = "Compute-in-memory (CIM) is an attractive solution to address the ``memory wall'' challenges for the extensive computation in deep learning hardware accelerators. For custom ASIC design, a specific chip instance is restricted to a specific network during \ldots{}", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Roy:2021:FVS, author = "Pushpita Roy and Ansuman Banerjee", title = "A Framework for Validation of Synthesized {MicroElectrode} Dot Array Actuations for Digital Microfluidic Biochips", journal = j-TODAES, volume = "26", number = "6", pages = "46:1--46:36", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460437", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460437", abstract = "Digital Microfluidics is an emerging technology for automating laboratory procedures in biochemistry. With more and more complex biochemical protocols getting mapped to biochip devices and microfluidics receiving a wide adoption, it is becoming \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2021:VAH, author = "Xi Li and Soheil Nazar Shahsavani and Xuan Zhou and Massoud Pedram and Peter A. Beerel", title = "A Variation-aware Hold Time Fixing Methodology for Single Flux Quantum Logic Circuits", journal = j-TODAES, volume = "26", number = "6", pages = "47:1--47:17", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460289", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460289", abstract = "Single flux quantum (SFQ) logic is a promising technology to replace complementary metal-oxide-semiconductor logic for future exa-scale supercomputing but requires the development of reliable EDA tools that are tailored to the unique characteristics of \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Park:2021:HTN, author = "Naebeom Park and Sungju Ryu and Jaeha Kung and Jae-Joon Kim", title = "High-throughput Near-Memory Processing on {CNNs} with {$3$D} {HBM}-like Memory", journal = j-TODAES, volume = "26", number = "6", pages = "48:1--48:20", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460971", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460971", abstract = "This article discusses the high-performance near-memory neural network (NN) accelerator architecture utilizing the logic die in three-dimensional (3D) High Bandwidth Memory- (HBM) like memory. As most of the previously reported 3D memory-based near-memory \ldots{}", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Maleki:2021:EEI, author = "Mohammad-Ali Maleki and Alireza Nabipour-Meybodi and Mehdi Kamal and Ali Afzali-Kusha and Massoud Pedram", title = "An Energy-Efficient Inference Method in Convolutional Neural Networks Based on Dynamic Adjustment of the Pruning Level", journal = j-TODAES, volume = "26", number = "6", pages = "49:1--49:20", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3460972", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3460972", abstract = "In this article, we present a low-energy inference method for convolutional neural networks in image classification applications. The lower energy consumption is achieved by using a highly pruned (lower-energy) network if the resulting network can provide \ldots{}", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2021:DAS, author = "Dave Y.-W. Lin and Charles H.-P. Wen", title = "A Delay-Adjustable, Self-Testable Flip-Flop for Soft-Error Tolerability and Delay-Fault Testability", journal = j-TODAES, volume = "26", number = "6", pages = "50:1--50:12", month = nov, year = "2021", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3462171", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Aug 19 08:44:49 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3462171", abstract = "As the demand of safety-critical applications (e.g., automobile electronics) increases, various radiation-hardened flip-flops are proposed for enhancing design reliability. Among all flip-flops, Delay-Adjustable D-Flip-Flop (DAD-FF) is specialized in \ldots{}", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Polychronou:2022:CSA, author = "Nikolaos-Foivos Polychronou and Pierre-Henri Thevenon and Maxime Puys and Vincent Beroulle", title = "A Comprehensive Survey of Attacks without Physical Access Targeting Hardware Vulnerabilities in {IoT\slash IIoT} Devices, and Their Detection Mechanisms", journal = j-TODAES, volume = "27", number = "1", pages = "1:1--1:35", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3471936", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3471936", abstract = "With the advances in the field of the Internet of Things (IoT) and Industrial IoT (IIoT), these devices are increasingly used in daily life or industry. To reduce costs related to the time required to develop these devices, security features are usually \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gade:2022:NHC, author = "Sri Harsha Gade and Sujay Deb", title = "A Novel Hybrid Cache Coherence with Global Snooping for Many-core Architectures", journal = j-TODAES, volume = "27", number = "1", pages = "2:1--2:31", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3462775", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3462775", abstract = "Cache coherence ensures correctness of cached data in multi-core processors. Traditional implementations of existing protocols make them unscalable for many core architectures. While snoopy coherence requires unscalable ordered networks, directory \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Han:2022:EEF, author = "Ding Han and Guohui Li and Quan Zhou and Jianjun Li and Yong Yang and Xiaofei Hu", title = "An Efficient Execution Framework of Two-Part Execution Scenario Analysis", journal = j-TODAES, volume = "27", number = "1", pages = "3:1--3:24", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3465474", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3465474", abstract = "Response Time Analysis (RTA) is an important and promising technique for analyzing the schedulability of real-time tasks under both Global Fixed-Priority (G-FP) scheduling and Global Earliest Deadline First (G-EDF) scheduling. Most existing RTA methods \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{He:2022:DME, author = "Jingyu He and Yao Xiao and Corina Bogdan and Shahin Nazarian and Paul Bogdan", title = "A Design Methodology for Energy-Aware Processing in Unmanned Aerial Vehicles", journal = j-TODAES, volume = "27", number = "1", pages = "4:1--4:20", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3470451", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3470451", abstract = "Unmanned Aerial Vehicles (UAVs) have rapidly become popular for monitoring, delivery, and actuation in many application domains such as environmental management, disaster mitigation, homeland security, energy, transportation, and manufacturing. However, \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cui:2022:ILD, author = "Lanlan Cui and Fei Wu and Xiaojian Liu and Meng Zhang and Renzhi Xiao and Changsheng Xie", title = "Improving {LDPC} Decoding Performance for {$3$D TLC NAND} Flash by {LLR} Optimization Scheme for Hard and Soft Decision", journal = j-TODAES, volume = "27", number = "1", pages = "5:1--5:20", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3473305", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3473305", abstract = "Low-density parity-check (LDPC) codes have been widely adopted in NAND flash in recent years to enhance data reliability. There are two types of decoding, hard-decision and soft-decision decoding. However, for the two types, their error correction \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2022:NSI, author = "Bo Li and Guoyong Shi", title = "A Native {SPICE} Implementation of Memristor Models for Simulation of Neuromorphic Analog Signal Processing Circuits", journal = j-TODAES, volume = "27", number = "1", pages = "6:1--6:24", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3474364", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3474364", abstract = "Since the memristor emerged as a programmable analog storage device, it has stimulated research on the design of analog/mixed-signal circuits with the memristor as the enabler of in-memory computation. Due to the difficulty in evaluating the circuit-level \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Poddar:2022:DDM, author = "Sudip Poddar and Sukanta Bhattacharjee and Shao-Yun Fang and Tsung-Yi Ho and B. B. Bhattacharya", title = "Demand-Driven Multi-Target Sample Preparation on Resource-Constrained Digital Microfluidic Biochips", journal = j-TODAES, volume = "27", number = "1", pages = "7:1--7:21", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3474392", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3474392", abstract = "Microfluidic lab-on-chips offer promising technology for the automation of various biochemical laboratory protocols on a minuscule chip. Sample preparation (SP) is an essential part of any biochemical experiments, which aims to produce dilution of a \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2022:FIA, author = "Qiang Liu and Honghui Tang and Peiran Zhang", title = "Fault Injection Attack Emulation Framework for Early Evaluation of {IC} Designs", journal = j-TODAES, volume = "27", number = "1", pages = "8:1--8:25", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3480962", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3480962", abstract = "Fault injection attack (FIA) has become a serious threat to the confidentiality and fault tolerance of integrated circuits (ICs). Circuit designers need an effective method to evaluate the countermeasures of the IC designs against the FIAs at the design \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ge:2022:SBN, author = "Mengke Ge and Xiaobing Ni and Xu Qi and Song Chen and Jinglei Huang and Yi Kang and Feng Wu", title = "Synthesizing Brain-network-inspired Interconnections for Large-scale Network-on-chips", journal = j-TODAES, volume = "27", number = "1", pages = "9:1--9:30", month = jan, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3480961", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jan 7 08:25:44 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3480961", abstract = "Brain network is a large-scale complex network with scale-free, small-world, and modularity properties, which largely supports this high-efficiency massive system. In this article, we propose to synthesize brain-network-inspired interconnections for large-. \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Alaghi:2022:ISI, author = "Armin Alaghi and Eva Darulova and Andreas Gerstlauer and Phillip Stanley-Marbell", title = "Introduction to the Special Issue on Approximate Systems", journal = j-TODAES, volume = "27", number = "2", pages = "10:1--10:2", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3488726", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3488726", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bu:2022:TFG, author = "Tiancong Bu and Kaige Yan and Jingweijia Tan", title = "Towards Fine-Grained Online Adaptive Approximation Control for Dense {SLAM} on Embedded {GPUs}", journal = j-TODAES, volume = "27", number = "2", pages = "11:1--11:19", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3486612", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3486612", abstract = "Dense SLAM is an important application on an embedded environment. However, embedded platforms usually fail to provide enough computation resources for high-accuracy real-time dense SLAM, even with high-parallelism architecture such as GPUs. To tackle \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Singh:2022:PFE, author = "Somesh Singh and Tejas Shah and Rupesh Nasre", title = "{ParTBC}: Faster Estimation of Top-$k$ Betweenness Centrality Vertices on {GPU}", journal = j-TODAES, volume = "27", number = "2", pages = "12:1--12:25", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3486613", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3486613", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2022:AAF, author = "Liu Liu and Sibren Isaacman and Ulrich Kremer", title = "An Adaptive Application Framework with Customizable Quality Metrics", journal = j-TODAES, volume = "27", number = "2", pages = "13:1--13:33", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3477428", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3477428", abstract = "Many embedded environments require applications to produce outcomes under different, potentially changing, resource constraints. Relaxing application semantics through approximations enables trading off resource usage for outcome quality. Although quality \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chowdhury:2022:LAH, author = "Prattay Chowdhury and Benjamin Carrion Schafer", title = "Leveraging Automatic High-Level Synthesis Resource Sharing to Maximize Dynamical Voltage Overscaling with Error Control", journal = j-TODAES, volume = "27", number = "2", pages = "14:1--14:18", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3473909", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3473909", abstract = "Approximate Computing has emerged as an alternative way to further reduce the power consumption of integrated circuits (ICs) by trading off errors at the output with simpler, more efficient logic. So far the main approaches in approximate computing have \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Han:2022:DSL, author = "Ming Han and Ye Wang and Jian Dong and Gang Qu", title = "Double-Shift: a Low-Power {DNN} Weights Storage and Access Framework based on Approximate Decomposition and Quantization", journal = j-TODAES, volume = "27", number = "2", pages = "15:1--15:16", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3477047", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3477047", abstract = "One major challenge in deploying Deep Neural Network (DNN) in resource-constrained applications, such as edge nodes, mobile embedded systems, and IoT devices, is its high energy cost. The emerging approximate computing methodology can effectively reduce \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ebrahimi:2022:PCL, author = "Zahra Ebrahimi and Dennis Klar and Mohammad Aasim Ekhtiyar and Akash Kumar", title = "Plasticine: a Cross-layer Approximation Methodology for Multi-kernel Applications through Minimally Biased, High-throughput, and Energy-efficient {SIMD} Soft Multiplier-divider", journal = j-TODAES, volume = "27", number = "2", pages = "16:1--16:33", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3486616", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3486616", abstract = "The rapid evolution of error-resilient programs intertwined with their quest for high throughput has motivated the use of Single Instruction, Multiple Data (SIMD) components in Field-Programmable Gate Arrays (FPGAs). Particularly, to exploit the error-. \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2022:DAA, author = "Jaechul Lee and C{\'e}dric Killian and Sebastien {Le Beux} and Daniel Chillet", title = "Distance-aware Approximate Nanophotonic Interconnect", journal = j-TODAES, volume = "27", number = "2", pages = "17:1--17:30", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3484309", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3484309", abstract = "The energy consumption of manycore architectures is dominated by data movement, which calls for energy-efficient and high-bandwidth interconnects. To overcome the bandwidth limitation of electrical interconnects, integrated optics appear as a promising \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Angizi:2022:MRN, author = "Shaahin Angizi and Navid Khoshavi and Andrew Marshall and Peter Dowben and Deliang Fan", title = "{MeF-RAM}: a New Non-Volatile Cache Memory Based on Magneto-Electric {FET}", journal = j-TODAES, volume = "27", number = "2", pages = "18:1--18:18", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3484222", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3484222", abstract = "Magneto-Electric FET (MEFET) is a recently developed post-CMOS FET, which offers intriguing characteristics for high-speed and low-power design in both logic and memory applications. In this article, we present MeF-RAM, a non-volatile cache memory design \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shi:2022:CHD, author = "Xiao Shi and Hao Yan and Qiancun Huang and Chengzhen Xuan and Lei He and Longxing Shi", title = "A Compact High-Dimensional Yield Analysis Method using Low-Rank Tensor Approximation", journal = j-TODAES, volume = "27", number = "2", pages = "19:1--19:23", month = mar, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3483941", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 17 07:56:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3483941", abstract = "``Curse of dimensionality'' has become the major challenge for existing high-sigma yield analysis methods. In this article, we develop a meta-model using Low-Rank Tensor Approximation (LRTA) to substitute expensive SPICE simulation. The polynomial degree of \ldots{}", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cai:2022:EDL, author = "Han Cai and Ji Lin and Yujun Lin and Zhijian Liu and Haotian Tang and Hanrui Wang and Ligeng Zhu and Song Han", title = "Enable Deep Learning on Mobile Devices: Methods, Systems, and Applications", journal = j-TODAES, volume = "27", number = "3", pages = "20:1--20:50", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3486618", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3486618", abstract = "Deep neural networks (DNNs) have achieved unprecedented success in the field of artificial intelligence (AI), including computer vision, natural language processing, and speech recognition. However, their superior performance comes at the considerable \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{S:2022:EEE, author = "Skandha Deepsita S. and Dhayala Kumar M. and Noor Mahammad SK", title = "Energy Efficient Error Resilient Multiplier Using Low-power Compressors", journal = j-TODAES, volume = "27", number = "3", pages = "21:1--21:26", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3488837", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3488837", abstract = "The approximate hardware design can save huge energy at the cost of errors incurred in the design. This article proposes the approximate algorithm for low-power compressors, utilized to build approximate multiplier with low energy and acceptable error \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Oldja:2022:HSS, author = "Mari-Liis Oldja and Jangryul Kim and Dowhan Jeong and Soonhoi Ha", title = "Hierarchical Scheduling of an {SDF/L} Graph onto Multiple Processors", journal = j-TODAES, volume = "27", number = "3", pages = "22:1--22:23", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3489469", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3489469", abstract = "Although dataflow models are known to thrive at exploiting task-level parallelism of an application, it is difficult to exploit the parallelism of data, represented well with loop structures, since these structures are not explicitly specified in existing \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2022:UTB, author = "Si Chen and Guoqi Xie and Renfa Li and Keqin Li", title = "Uncertainty Theory Based Partitioning for Cyber-Physical Systems with Uncertain Reliability Analysis", journal = j-TODAES, volume = "27", number = "3", pages = "23:1--23:19", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3490177", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3490177", abstract = "Reasonable partitioning is a critical issue for cyber-physical system (CPS) design. Traditional CPS partitioning methods run in a determined context and depend on the parameter pre-estimations, but they ignore the uncertainty of parameters and hardly \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Luo:2022:FDF, author = "Yukui Luo and Shijin Duan and Xiaolin Xu", title = "{FPGAPRO}: a Defense Framework Against Crosstalk-Induced Secret Leakage in {FPGA}", journal = j-TODAES, volume = "27", number = "3", pages = "24:1--24:31", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3491214", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3491214", abstract = "With the emerging cloud-computing development, FPGAs are being integrated with cloud servers for higher performance. Recently, it has been explored to enable multiple users to share the hardware resources of a remote FPGA, i.e., to execute their own \ldots{}", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Feng:2022:TTO, author = "Lang Feng and Jiayi Huang and Jeff Huang and Jiang Hu", title = "Toward Taming the Overhead Monster for Data-flow Integrity", journal = j-TODAES, volume = "27", number = "3", pages = "25:1--25:24", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3490176", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3490176", abstract = "Data-Flow Integrity (DFI) is a well-known approach to effectively detecting a wide range of software attacks. However, its real-world application has been quite limited so far because of the prohibitive performance overhead it incurs. Moreover, the \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mahalat:2022:ICA, author = "Mahabub Hasan Mahalat and Suraj Mandal and Anindan Mondal and Bibhash Sen and Rajat Subhra Chakraborty", title = "Implementation, Characterization and Application of Path Changing Switch based Arbiter {PUF} on {FPGA} as a lightweight Security Primitive for {IoT}", journal = j-TODAES, volume = "27", number = "3", pages = "26:1--26:26", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3491212", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3491212", abstract = "Secure authentication of any Internet-of-Things (IoT) device becomes the utmost necessity due to the lack of specifically designed IoT standards and intrinsic vulnerabilities with limited resources and heterogeneous technologies. Despite the suitability \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Baker:2022:CMA, author = "Timothy J. Baker and John P. Hayes", title = "{CeMux}: Maximizing the Accuracy of Stochastic Mux Adders and an Application to Filter Design", journal = j-TODAES, volume = "27", number = "3", pages = "27:1--27:26", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3491213", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3491213", abstract = "Stochastic computing (SC) is a low-cost computational paradigm that has promising applications in digital filter design, image processing, and neural networks. Fundamental to these applications is the weighted addition operation, which is most often \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Elangovan:2022:ABA, author = "Reena Elangovan and Shubham Jain and Anand Raghunathan", title = "{Ax-BxP}: Approximate Blocked Computation for Precision-reconfigurable Deep Neural Network Acceleration", journal = j-TODAES, volume = "27", number = "3", pages = "28:1--28:20", month = may, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3492733", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Mar 24 16:05:33 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3492733", abstract = "Precision scaling has emerged as a popular technique to optimize the compute and storage requirements of Deep Neural Networks (DNNs). Efforts toward creating ultra-low-precision (sub-8-bit) DNNs for efficient inference suggest that the minimum precision \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pilato:2022:ISS, author = "Christian Pilato and Zhenman Fang and Yuko Hara-Azumi and Jim Hwang", title = "Introduction to the Special Section on High-level Synthesis for {FPGA}: Next-generation Technologies and Applications", journal = j-TODAES, volume = "27", number = "4", pages = "29:1--29:2", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3519279", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3519279", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ramanathan:2022:CPF, author = "Nadesh Ramanathan and George A. Constantinides and John Wickerson", title = "A Case for Precise, Fine-Grained Pointer Synthesis in High-Level Synthesis", journal = j-TODAES, volume = "27", number = "4", pages = "30:1--30:26", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3491430", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3491430", abstract = "This article combines two practical approaches to improve pointer synthesis within HLS tools. Both approaches focus on inefficiencies in how HLS tools treat the points-to graph- a mapping that connects each instruction to the memory locations that it might \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sun:2022:CMO, author = "Qi Sun and Tinghuan Chen and Siting Liu and Jianli Chen and Hao Yu and Bei Yu", title = "Correlated Multi-objective Multi-fidelity Optimization for {HLS} Directives Design", journal = j-TODAES, volume = "27", number = "4", pages = "31:1--31:27", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3503540", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3503540", abstract = "High-level synthesis (HLS) tools have gained great attention in recent years because it emancipates engineers from the complicated and heavy hardware description language writing and facilitates the implementations of modern applications (e.g., deep \ldots{})", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sohrabizadeh:2022:AES, author = "Atefeh Sohrabizadeh and Cody Hao Yu and Min Gao and Jason Cong", title = "{AutoDSE}: Enabling Software Programmers to Design Efficient {FPGA} Accelerators", journal = j-TODAES, volume = "27", number = "4", pages = "32:1--32:27", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3494534", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3494534", abstract = "Adopting FPGA as an accelerator in datacenters is becoming mainstream for customized computing, but the fact that FPGAs are hard to program creates a steep learning curve for software programmers. Even with the help of high-level synthesis (HLS), \ldots{}", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gautier:2022:SMO, author = "Quentin Gautier and Alric Althoff and Christopher L. Crutchfield and Ryan Kastner", title = "{Sherlock}: a Multi-Objective Design Space Exploration Framework", journal = j-TODAES, volume = "27", number = "4", pages = "33:1--33:20", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3511472", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3511472", abstract = "Design space exploration (DSE) provides intelligent methods to tune the large number of optimization parameters present in modern FPGA high-level synthesis tools. High-level synthesis parameter tuning is a time-consuming process due to lengthy hardware \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2022:LPE, author = "Zi Wang and Benjamin Carrion Schafer", title = "Learning from the Past: Efficient High-level Synthesis Design Space Exploration for {FPGAs}", journal = j-TODAES, volume = "27", number = "4", pages = "34:1--34:23", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3495531", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3495531", abstract = "The quest to democratize the use of Field-Programmable Gate Arrays (FPGAs) has given High-Level Synthesis (HLS) the final push to be widely accepted with FPGA vendors strongly supporting this VLSI design methodology to expand the FPGA user base. HLS takes \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sjovall:2022:HLS, author = "Panu Sj{\"o}vall and Ari Lemmetti and Jarno Vanne and Sakari Lahti and Timo D. H{\"a}m{\"a}l{\"a}inen", title = "High-Level Synthesis Implementation of an Embedded Real-Time {HEVC} Intra Encoder on {FPGA} for Media Applications", journal = j-TODAES, volume = "27", number = "4", pages = "35:1--35:34", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3491215", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3491215", abstract = "High Efficiency Video Coding (HEVC) is the key enabling technology for numerous modern media applications. Overcoming its computational complexity and customizing its rich features for real-time HEVC encoder implementations, calls for automated design \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2022:LOH, author = "Yanjiang Liu and Tongzhou Qu and Zibin Dai", title = "A Low-Overhead and High-Security Cryptographic Circuit Design Utilizing the {TIGFET}-Based Three-Phase Single-Rail Pulse Register against Side-Channel Attacks", journal = j-TODAES, volume = "27", number = "4", pages = "36:1--36:13", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3498339", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3498339", abstract = "Side-channel attack (SCA) reveals confidential information by statistically analyzing physical manifestations, which is the serious threat to cryptographic circuits. Various SCA circuit-level countermeasures have been proposed as fundamental solutions to \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2022:AHS, author = "Shanshi Huang and Xiaoyu Sun and Xiaochen Peng and Hongwu Jiang and Shimeng Yu", title = "Achieving High In Situ Training Accuracy and Energy Efficiency with Analog Non-Volatile Synaptic Devices", journal = j-TODAES, volume = "27", number = "4", pages = "37:1--37:19", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3500929", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3500929", abstract = "On-device embedded artificial intelligence prefers the adaptive learning capability when deployed in the field, and thus in situ training is required. The compute-in-memory approach, which exploits the analog computation within the memory array, is a \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Uysal:2022:SCN, author = "Necati Uysal and Rickard Ewetz", title = "Synthesis of Clock Networks with a Mode-Reconfigurable Topology", journal = j-TODAES, volume = "27", number = "4", pages = "38:1--38:22", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3503538", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3503538", abstract = "Modern digital circuits are often required to operate in multiple modes to cater to variable frequency and power requirements. Consequently, the clock networks for such circuits must be synthesized, meeting different timing constraints in different \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Handique:2022:FLS, author = "Mousum Handique and Jantindra Kumar Deka and Santosh Biswas", title = "Fault Localization Scheme for Missing Gate Faults in Reversible Circuits", journal = j-TODAES, volume = "27", number = "4", pages = "39:1--39:29", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3503539", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3503539", abstract = "This article introduces a fault localization method to extract the exact location of single and multiple missing gate faults in reversible \( k \) -CNOT -based circuits. The primary target of the proposed method is to obtain the complete test set for \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Guo:2022:SSD, author = "Wenzhong Guo and Sihuang Lian and Chen Dong and Zhenyi Chen and Xing Huang", title = "A Survey on Security of Digital Microfluidic Biochips: Technology, Attack, and Defense", journal = j-TODAES, volume = "27", number = "4", pages = "40:1--40:33", month = jul, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3494697", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 25 08:20:01 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3494697", abstract = "As an emerging lab-on-a-chip technology platform, digital microfluidic biochips (DMFBs) have been widely used for executing various laboratory procedures in biochemistry and biomedicine such as gene sequencing and near-patient diagnosis, with the \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chandra:2022:ISS, author = "Vikas Chandra and Yiran Chen and Sungjoo Yoo", title = "Introduction to the Special Section on Energy-Efficient {AI} Chips", journal = j-TODAES, volume = "27", number = "5", pages = "41:1--41:2", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3538502", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3538502", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2022:MEC, author = "Sunjung Lee and Jaewan Choi and Wonkyung Jung and Byeongho Kim and Jaehyun Park and Hweesoo Kim and Jung Ho Ahn", title = "{MVP}: an Efficient {CNN} Accelerator with Matrix, Vector, and Processing-Near-Memory Units", journal = j-TODAES, volume = "27", number = "5", pages = "42:1--42:25", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3497745", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3497745", abstract = "Mobile and edge devices become common platforms for inferring convolutional neural networks (CNNs) due to superior privacy and service quality. To reduce the computational costs of convolution (CONV), recent CNN models adopt depth-wise CONV (DW-CONV) and \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cicek:2022:EEB, author = "Nihat Mert Cicek and Xipeng Shen and Ozcan Ozturk", title = "Energy Efficient Boosting of {GEMM} Accelerators for {DNN} via Reuse", journal = j-TODAES, volume = "27", number = "5", pages = "43:1--43:26", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3503469", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3503469", abstract = "Reuse-centric convolutional neural networks (CNN) acceleration speeds up CNN inference by reusing computations for similar neuron vectors in CNN's input layer or activation maps. This new paradigm of optimizations is, however, largely limited by the \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2022:EEL, author = "Zhe Chen and Hugh T. Blair and Jason Cong", title = "Energy-Efficient {LSTM} Inference Accelerator for Real-Time Causal Prediction", journal = j-TODAES, volume = "27", number = "5", pages = "44:1--44:19", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3495006", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3495006", abstract = "Ever-growing edge applications often require short processing latency and high energy efficiency to meet strict timing and power budget. In this work, we propose that the compact long short-term memory (LSTM) model can approximate conventional acausal. \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shiri:2022:EEE, author = "Aidin Shiri and Uttej Kallakuri and Hasib-Al Rashid and Bharat Prakash and Nicholas R. Waytowich and Tim Oates and Tinoosh Mohsenin", title = "{E2HRL}: an Energy-efficient Hardware Accelerator for Hierarchical Deep Reinforcement Learning", journal = j-TODAES, volume = "27", number = "5", pages = "45:1--45:19", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3498327", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3498327", abstract = "Recently, Reinforcement Learning (RL) has shown great performance in solving sequential decision-making and control in dynamic environment problems. Despite its achievements, deploying Deep Neural Network (DNN)-based RL is expensive in terms of time and \ldots{}", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Laubeuf:2022:DQR, author = "Nathan Laubeuf and Jonas Doevenspeck and Ioannis A. Papistas and Michele Caselli and Stefan Cosemans and Peter Vrancx and Debjyoti Bhattacharjee and Arindam Mallik and Peter Debacker and Diederik Verkest and Francky Catthoor and Rudy Lauwereins", title = "Dynamic Quantization Range Control for Analog-in-Memory Neural Networks Acceleration", journal = j-TODAES, volume = "27", number = "5", pages = "46:1--46:21", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3498328", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3498328", abstract = "Analog in Memory Computing (AiMC) based neural network acceleration is a promising solution to increase the energy efficiency of deep neural networks deployment. However, the quantization requirements of these analog systems are not compatible with state-. \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gong:2022:AMB, author = "Yifan Gong and Geng Yuan and Zheng Zhan and Wei Niu and Zhengang Li and Pu Zhao and Yuxuan Cai and Sijia Liu and Bin Ren and Xue Lin and Xulong Tang and Yanzhi Wang", title = "Automatic Mapping of the Best-Suited {DNN} Pruning Schemes for Real-Time Mobile Acceleration", journal = j-TODAES, volume = "27", number = "5", pages = "47:1--47:26", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3495532", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3495532", abstract = "Weight pruning is an effective model compression technique to tackle the challenges of achieving real-time deep neural network (DNN) inference on mobile devices. However, prior pruning schemes have limited application scenarios due to accuracy degradation,. \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2022:ION, author = "Jooyeon Lee and Junsang Park and Seunghyun Lee and Jaeha Kung", title = "Implication of Optimizing {NPU} Dataflows on Neural Architecture Search for Mobile Devices", journal = j-TODAES, volume = "27", number = "5", pages = "48:1--48:24", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3513085", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3513085", abstract = "Recent advances in deep learning have made it possible to implement artificial intelligence in mobile devices. Many studies have put a lot of effort into developing lightweight deep learning models optimized for mobile devices. To overcome the performance \ldots{}", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tang:2022:ETE, author = "Yue Tang and Xinyi Zhang and Peipei Zhou and Jingtong Hu", title = "{EF-Train}: Enable Efficient On-device {CNN} Training on {FPGA} through Data Reshaping for Online Adaptation or Personalization", journal = j-TODAES, volume = "27", number = "5", pages = "49:1--49:36", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3505633", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3505633", abstract = "Conventionally, DNN models are trained once in the cloud and deployed in edge devices such as cars, robots, or unmanned aerial vehicles (UAVs) for real-time inference. However, there are many cases that require the models to adapt to new environments, \ldots{}", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2022:DDN, author = "Chaojian Li and Wuyang Chen and Yuchen Gu and Tianlong Chen and Yonggan Fu and Zhangyang Wang and Yingyan Lin", title = "{DANCE}: {DAta-Network Co-optimization for Efficient} Segmentation Model Training and Inference", journal = j-TODAES, volume = "27", number = "5", pages = "50:1--50:20", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3510835", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3510835", abstract = "Semantic segmentation for scene understanding is nowadays widely demanded, raising significant challenges for the algorithm efficiency, especially its applications on resource-limited platforms. Current segmentation models are trained and evaluated on \ldots{}", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kee:2022:LPP, author = "Minkwan Kee and Gi-Ho Park", title = "A Low-power Programmable Machine Learning Hardware Accelerator Design for Intelligent Edge Devices", journal = j-TODAES, volume = "27", number = "5", pages = "51:1--51:13", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3531479", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3531479", abstract = "With the advent of the machine learning and IoT, many low-power edge devices, such as wearable devices with various sensors, are used for machine learning-based intelligent applications, such as healthcare or motion recognition. While these applications \ldots{}", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wen:2022:MCT, author = "Chenyi Wen and Xiao Dong and Baixin Chen and Umamaheswara Rao Tida and Yiyu Shi and Cheng Zhuo", title = "Magnetic Core {TSV}-Inductor Design and Optimization for On-chip {DC-DC} Converter", journal = j-TODAES, volume = "27", number = "5", pages = "52:1--52:23", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3507700", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3507700", abstract = "The conventional on-chip spiral inductor consumes a significant top-metal routing area, thereby preventing its popularity in many on-chip applications. Recently through-silicon-via- (TSV) based inductor (also known as a TSV-inductor) with a magnetic core \ldots{}", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dewan:2022:DAA, author = "Monzurul Islam Dewan and Dae Hyun Kim", title = "Design Automation Algorithms for the {NP}-Separate {VLSI} Design Methodology", journal = j-TODAES, volume = "27", number = "5", pages = "53:1--53:20", month = sep, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3508375", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Sep 28 11:01:08 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3508375", abstract = "The NP-Separate design methodology for very-large-scale integration (VLSI) design fine-controls the sizes of transistors, thereby achieving significant power, performance, and area improvement compared to the conventional standard-cell-based design \ldots{}", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2022:IFC, author = "Irith Pomeranz", title = "Increasing the Fault Coverage of a Truncated Test Set", journal = j-TODAES, volume = "27", number = "6", pages = "54:1--54:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3508459", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3508459", abstract = "Defect-aware, cell-aware, and gate-exhaustive faults are described by input patterns of subcircuits or cells that are expected to activate defects. Even with single-cycle faults, an $n$-input subcircuit can have up to $ 2^n $ faults with unique \ldots{}", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jagadheesh:2022:NAM, author = "Samala Jagadheesh and P. Veda Bhanu and Soumya J.", title = "{NoC} Application Mapping Optimization Using Reinforcement Learning", journal = j-TODAES, volume = "27", number = "6", pages = "55:1--55:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3510381", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3510381", abstract = "Application mapping is one of the early stage design processes aimed to improve the performance of Network-on-Chip. Mapping is an NP-hard problem. A massive amount of high-quality supervised data is required to solve the application mapping problem using \ldots{}", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kolhe:2022:BDS, author = "Gaurav Kolhe and Tyler David Sheaves and Sai Manoj P. D. and Hamid Mahmoodi and Setareh Rafatirad and Avesta Sasan and Houman Homayoun", title = "Breaking the Design and Security Trade-off of Look-up-table-based Obfuscation", journal = j-TODAES, volume = "27", number = "6", pages = "56:1--56:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3510421", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3510421", abstract = "Logic locking and Integrated Circuit (IC) camouflaging are the most prevalent protection schemes that can thwart most hardware security threats. However, the state-of-the-art attacks, including Boolean Satisfiability (SAT) and approximation-based attacks, \ldots{}", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2022:NAD, author = "Taozhong Li and Naifeng Jing and Jianfei Jiang and Qin Wang and Zhigang Mao and Yiran Chen", title = "A Novel Architecture Design for Output Significance Aligned Flow with Adaptive Control in {ReRAM}-based Neural Network Accelerator", journal = j-TODAES, volume = "27", number = "6", pages = "57:1--57:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3510819", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3510819", abstract = "Resistive-RAM-based (ReRAM-based) computing shows great potential on accelerating DNN inference by its highly parallel structure. Regrettably, computing accuracy in practical is much lower than expected due to the non-ideal ReRAM device. Conventional \ldots{}", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Brunner:2022:THR, author = "Michaela Brunner and Alexander Hepp and Johanna Baehr and Georg Sigl", title = "Toward a Human-Readable State Machine Extraction", journal = j-TODAES, volume = "27", number = "6", pages = "58:1--58:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3513086", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3513086", abstract = "The target of sequential reverse engineering is to extract the state machine of a design. Sequential reverse engineering of a gate-level netlist consists of the identification of so-called state flip-flops (sFFs), as well as the extraction of the state \ldots{}", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2022:QCT, author = "Xiangzhen Zhou and Yuan Feng and Sanjiang Li", title = "Quantum Circuit Transformation: a {Monte Carlo} Tree Search Framework", journal = j-TODAES, volume = "27", number = "6", pages = "59:1--59:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3514239", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3514239", abstract = "In the noisy intermediate-scale quantum era, quantum processing units suffer from, among others, highly limited connectivity between physical qubits. To make a quantum circuit effectively executable, a circuit transformation process is necessary to \ldots{}", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hong:2022:TNB, author = "Xin Hong and Xiangzhen Zhou and Sanjiang Li and Yuan Feng and Mingsheng Ying", title = "A Tensor Network based Decision Diagram for Representation of Quantum Circuits", journal = j-TODAES, volume = "27", number = "6", pages = "60:1--60:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3514355", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3514355", abstract = "Tensor networks have been successfully applied in simulation of quantum physical systems for decades. Recently, they have also been employed in classical simulation of quantum computing, in particular, random quantum circuits. This article proposes a \ldots{}", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Choudhury:2022:SHC, author = "Dwaipayan Choudhury and Reet Barik and Aravind Sukumaran Rajam and Ananth Kalyanaraman and Partha Pratim Pande", title = "Software\slash Hardware Co-design of {$3$D} {NoC}-based {GPU} Architectures for Accelerated Graph Computations", journal = j-TODAES, volume = "27", number = "6", pages = "61:1--61:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3514354", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3514354", abstract = "Manycore GPU architectures have become the mainstay for accelerating graph computations. One of the primary bottlenecks to performance of graph computations on manycore architectures is the data movement. Since most of the accesses in graph processing are \ldots{}", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2022:ELH, author = "Yiyang Jiang and Fan Yang and Bei Yu and Dian Zhou and Xuan Zeng", title = "Efficient Layout Hotspot Detection via Neural Architecture Search", journal = j-TODAES, volume = "27", number = "6", pages = "62:1--62:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3517130", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3517130", abstract = "Layout hotspot detection is of great importance in the physical verification flow. Deep neural network models have been applied to hotspot detection and achieved great success. Despite their success, high-performance neural networks are still quite \ldots{}", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Abel:2022:FSS, author = "Inga Abel and Helmut Graeb", title = "{FUBOCO}: Structure Synthesis of Basic Op-Amps by {FUnctional BlOck COmposition}", journal = j-TODAES, volume = "27", number = "6", pages = "63:1--63:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3522738", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3522738", abstract = "This article presents a method to automatically synthesize the structure and initial sizing of an operational amplifier. It is positioned between approaches with fixed design plans and a small search space of structures and approaches with generic \ldots{}", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sha:2022:DMB, author = "Zhibing Sha and Jun Li and Zhigang Cai and Min Huang and Jianwei Liao and Francois Trahay", title = "Degraded Mode-benefited {I/O} Scheduling to Ensure {I/O} Responsiveness in {RAID}-enabled {SSDs}", journal = j-TODAES, volume = "27", number = "6", pages = "64:1--64:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3522755", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3522755", abstract = "RAID-enabled SSDs commonly have unbalanced I/O workloads on their components (e.g., SSD channels), as the data/parity chunks in the same stripe may have varied access frequency, which greatly impacts I/O responsiveness. This article proposes a I/O \ldots{}", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bai:2022:RER, author = "Yunkai Bai and Andrew Stern and Jungmin Park and Mark Tehranipoor and Domenic Forte", title = "{RASCv2}: Enabling Remote Access to Side-Channels for Mission Critical and {IoT} Systems", journal = j-TODAES, volume = "27", number = "6", pages = "65:1--65:??", month = nov, year = "2022", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3524123", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 25 09:11:49 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3524123", abstract = "The Internet of Things (IoT) and smart devices are currently being deployed in systems such as autonomous vehicles and medical monitoring devices. The introduction of IoT devices into these systems enables network connectivity for data transfer, cloud \ldots{}", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hung:2023:DDR, author = "Jos{\'e} Romero Hung and Chao Li and Taolei Wang and Jinyang Guo and Pengyu Wang and Chuanming Shao and Jing Wang and Guoyong Shi and Xiangwen Liu and Hanqing Wu", title = "{DRAGON}: Dynamic Recurrent Accelerator for Graph Online Convolution", journal = j-TODAES, volume = "28", number = "1", pages = "1:1--1:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3524124", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3524124", abstract = "Despite the extraordinary applicative potentiality that dynamic graph inference may entail, its practical-physical implementation has been a topic seldom explored in literature. Although graph inference through neural networks has received plenty of \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Minakova:2023:MTT, author = "Svetlana Minakova and Todor Stefanov", title = "Memory-Throughput Trade-off for {CNN}-Based Applications at the Edge", journal = j-TODAES, volume = "28", number = "1", pages = "2:1--2:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3527457", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3527457", abstract = "Many modern applications require execution of Convolutional Neural Networks (CNNs) on edge devices, such as mobile phones or embedded platforms. This can be challenging, as the state-of-the art CNNs are memory costly, whereas the memory budget of edge \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chhabria:2023:EDN, author = "Vidya A. Chhabria and Vipul Ahuja and Ashwath Prabhu and Nikhil Patil and Palkesh Jain and Sachin S. Sapatnekar", title = "Encoder-Decoder Networks for Analyzing Thermal and Power Delivery Networks", journal = j-TODAES, volume = "28", number = "1", pages = "3:1--3:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3526115", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3526115", abstract = "Power delivery network (PDN) analysis and thermal analysis are computationally expensive tasks that are essential for successful integrated circuit (IC) design. Algorithmically, both these analyses have similar computational structure and complexity as \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Spieck:2023:LBM, author = "Jan Spieck and Stefan Wildermann and J{\"u}rgen Teich", title = "A Learning-based Methodology for Scenario-aware Mapping of Soft Real-time Applications onto Heterogeneous {MPSoCs}", journal = j-TODAES, volume = "28", number = "1", pages = "4:1--4:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3529230", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3529230", abstract = "Soft real-time streaming applications often process input data that evoke varying workloads for their tasks. This may lead to high energy consumption or deadline misses in case their mapping onto a heterogeneous MPSoC target architecture is not adapted, \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2023:EES, author = "Chunqiao Li and Chengtao An and Fan Yang and Xuan Zeng", title = "{ESPSim}: an Efficient Scalable Power Grid Simulator Based on Parallel Algebraic Multigrid", journal = j-TODAES, volume = "28", number = "1", pages = "5:1--5:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3529533", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3529533", abstract = "Fast verification for the extremely large-scale power grid is demanding as CMOS technology advances consistently. In this work, we propose ESPSim, an efficient scalable power grid simulator based on a parallel smoothed aggregation-based algebraic \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2023:RRB, author = "Chenglong Huang and Nuo Xu and Junwei Zeng and Wenqing Wang and Yihong Hu and Liang Fang and Desheng Ma and Yanting Chen", title = "Rescuing {ReRAM}-based Neural Computing Systems from Device Variation", journal = j-TODAES, volume = "28", number = "1", pages = "6:1--6:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3533706", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3533706", abstract = "Resistive random-access memory (ReRAM)-based crossbar array (RCA) is a promising platform to accelerate vector-matrix multiplication in deep neural networks (DNNs). There are, however, some practical issues, especially device variation, that hinder the \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ding:2023:MAP, author = "Bo Ding and Jinglei Huang and Qi Xu and Junpeng Wang and Song Chen and Yi Kang", title = "Memory-aware Partitioning, Scheduling, and Floorplanning for Partially Dynamically Reconfigurable Systems", journal = j-TODAES, volume = "28", number = "1", pages = "7:1--7:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3534968", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3534968", abstract = "Partially dynamic reconfiguration (PDR) technology can accelerate the reconfiguration process and overcome hardware resource constraints when facing the challenge of high performance with respect to applications and resources constraints on field-. \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zeng:2023:AMM, author = "Junwei Zeng and Nuo Xu and Yabo Chen and Chenglong Huang and Zhiwei Li and Liang Fang", title = "{AIMCU-MESO}: an In-Memory Computing Unit Constructed by {MESO} Device", journal = j-TODAES, volume = "28", number = "1", pages = "8:1--8:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3539575", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3539575", abstract = "Traditional CMOS-based von-Neumann computer architecture faces the issue of memory wall that the limitation of bus-bandwidth and the speed mismatch between processor and memory restrict the efficiency of data processing along with an irreducible energy \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Das:2023:CCV, author = "Sourav Das and Sayandeep Sanyal and Aritra Hazra and Pallab Dasgupta", title = "{CoVerPlan}: a Comprehensive Verification Planning Framework Leveraging {PSS} Specifications", journal = j-TODAES, volume = "28", number = "1", pages = "9:1--9:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3543175", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3543175", abstract = "With increasing design complexity, the portability of tests across different designs and platforms becomes a key criterion for accelerating verification closure. The Portable Test and Stimulus Standard (PSS) is an emerging industry standard prepared by \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Song:2023:VEE, author = "Zhuoran Song and Naifeng Jing and Xiaoyao Liang", title = "{E$^2$-VOR}: an End-to-End En\slash Decoder Architecture for Efficient Video Object Recognition", journal = j-TODAES, volume = "28", number = "1", pages = "10:1--10:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3543852", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3543852", abstract = "High-resolution video object recognition (VOR) evolves so fast but is very compute-intensive. This is because VOR leverages compute-intensive deep neural network (DNN) for better accuracy. Although many works have been proposed for speedup, they mostly \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2023:MSF, author = "Zhiqiang Zhao and Zhuo Feng", title = "A Multilevel Spectral Framework for Scalable Vectorless Power\slash Thermal Integrity Verification", journal = j-TODAES, volume = "28", number = "1", pages = "11:1--11:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3529534", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3529534", abstract = "Vectorless integrity verification is becoming increasingly critical to the robust design of nanoscale integrated circuits. This article introduces a general vectorless integrity verification framework that allows computing the worst-case voltage drops or \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2023:SDP, author = "Kai Huang and Bowen Li and Dongliang Xiong and Haitian Jiang and Xiaowen Jiang and Xiaolang Yan and Luc Claesen and Dehong Liu and Junjian Chen and Zhili Liu", title = "Structured Dynamic Precision for Deep Neural Networks Quantization", journal = j-TODAES, volume = "28", number = "1", pages = "12:1--12:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3549535", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3549535", abstract = "Deep Neural Networks (DNNs) have achieved remarkable success in various Artificial Intelligence applications. Quantization is a critical step in DNNs compression and acceleration for deployment. To further boost DNN execution efficiency, many works \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ebrahimi-Azandaryani:2023:ACA, author = "Farhad Ebrahimi-Azandaryani and Omid Akbari and Mehdi Kamal and Ali Afzali-Kusha and Massoud Pedram", title = "Accuracy Configurable Adders with Negligible Delay Overhead in Exact Operating Mode", journal = j-TODAES, volume = "28", number = "1", pages = "13:1--13:??", month = jan, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3549936", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:22 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3549936", abstract = "In this paper, two accuracy configurable adders capable of operating in approximate and exact modes are proposed. In the adders, which include a block-based carry propagate and a parallel prefix structure, the carry chains are cut off in the approximate \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2023:ISI, author = "Yibo Lin and Avi Ziv and Haoxing Ren", title = "Introduction to the Special Issue on Machine Learning for {CAD\slash EDA}", journal = j-TODAES, volume = "28", number = "2", pages = "14:1--14:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3586208", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3586208", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sanchez:2023:CSE, author = "Daniela S{\'a}nchez and Lorenzo Servadei and Gamze Naz Kiprit and Robert Wille and Wolfgang Ecker", title = "A Comprehensive Survey on Electronic Design Automation and Graph Neural Networks: Theory and Applications", journal = j-TODAES, volume = "28", number = "2", pages = "15:1--15:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3543853", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3543853", abstract = "Driven by Moore's law, the chip design complexity is steadily increasing. Electronic Design Automation (EDA) has been able to cope with the challenging very large-scale integration process, assuring scalability, reliability, and proper time-to-market. \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Koblah:2023:SPA, author = "David Koblah and Rabin Acharya and Daniel Capecci and Olivia Dizon-Paradis and Shahin Tajik and Fatemeh Ganji and Damon Woodard and Domenic Forte", title = "A Survey and Perspective on Artificial Intelligence for Security-Aware Electronic Design Automation", journal = j-TODAES, volume = "28", number = "2", pages = "16:1--16:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3563391", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3563391", abstract = "Artificial intelligence (AI) and machine learning (ML) techniques have been increasingly used in several fields to improve performance and the level of automation. In recent years, this use has exponentially increased due to the advancement of high-. \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Fan:2023:PCC, author = "Shaoze Fan and Shun Zhang and Jianbo Liu and Ningyuan Cao and Xiaoxiao Guo and Jing Li and Xin Zhang", title = "Power Converter Circuit Design Automation Using Parallel {Monte Carlo} Tree Search", journal = j-TODAES, volume = "28", number = "2", pages = "17:1--17:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3549538", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3549538", abstract = "The tidal waves of modern electronic/electrical devices have led to increasing demands for ubiquitous application-specific power converters. A conventional manual design procedure of such power converters is computation- and labor-intensive, which \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Song:2023:MLA, author = "Ling-Yen Song and Chih-Yun Chou and Tung-Chieh Kuo and Chien-Nan Liu and Juinn-Dar Huang", title = "Machine Learning Assisted Circuit Sizing Approach for Low-Voltage Analog Circuits with Efficient Variation-Aware Optimization", journal = j-TODAES, volume = "28", number = "2", pages = "18:1--18:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3567422", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3567422", abstract = "Low-power analog design is a hot topic for various power efficient applications. Sizing low-power analog circuits is not easy because the increasing uncertainties from low-voltage techniques magnify process variation effects on the design yield. \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2023:PDW, author = "Yaguang Li and Yishuang Lin and Meghna Madhusudan and Arvind Sharma and Sachin Sapatnekar and Ramesh Harjani and Jiang Hu", title = "Performance-driven Wire Sizing for Analog Integrated Circuits", journal = j-TODAES, volume = "28", number = "2", pages = "19:1--19:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3559542", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3559542", abstract = "Analog IC performance has a strong dependence on interconnect RC parasitics, which are significantly affected by wire sizes in recent technologies, where minimum-width wires have high resistance. However, performance-driven wire sizing for analog ICs has \ldots{}", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cheng:2023:MLD, author = "Jiawen Cheng and Yong Xiao and Yun Shao and Guanghai Dong and Songlin Lyu and Wenjian Yu", title = "Machine-learning-driven Architectural Selection of Adders and Multipliers in Logic Synthesis", journal = j-TODAES, volume = "28", number = "2", pages = "20:1--20:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3560712", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3560712", abstract = "Designing high-performance adders and multiplier components for diverse specifications and constraints is of practical concern. However, selecting the best architecture for adder or multiplier, which largely affects the performance of synthesized circuits,. \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2023:GFG, author = "Yiting Liu and Ziyi Ju and Zhengming Li and Mingzhi Dong and Hai Zhou and Jia Wang and Fan Yang and Xuan Zeng and Li Shang", title = "{GraphPlanner}: Floorplanning with Graph Neural Network", journal = j-TODAES, volume = "28", number = "2", pages = "21:1--21:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3555804", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3555804", abstract = "Chip floorplanning has long been a critical task with high computation complexity in the physical implementation of VLSI chips. Its key objective is to determine the initial locations of large chip modules with minimized wirelength while adhering to the \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Fang:2023:ETC, author = "Chenlei Fang and Qicheng Huang and Zeye Liu and Ruizhou Ding and Ronald D. Blanton", title = "Efficient Test Chip Design via Smart Computation", journal = j-TODAES, volume = "28", number = "2", pages = "22:1--22:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3558393", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3558393", abstract = "Submitted to the Special Issue on Machine Learning for CAD (ML-CAD). Competitive strength in semiconductor field depends on yield. The challenges associated with designing and manufacturing of leading-edge integrated circuits (ICs) have increased that \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lozano:2023:LBP, author = "Erika Susana Alcorta Lozano and Andreas Gerstlauer", title = "Learning-based Phase-aware Multi-core {CPU} Workload Forecasting", journal = j-TODAES, volume = "28", number = "2", pages = "23:1--23:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564929", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564929", abstract = "Predicting workload behavior during workload execution is essential for dynamic resource optimization in multi-processor systems. Recent studies have proposed advanced machine learning techniques for dynamic workload prediction. Workload prediction can be \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2023:MLB, author = "Benzheng Li and Xi Zhang and Hailong You and Zhongdong Qi and Yuming Zhang", title = "Machine Learning Based Framework for Fast Resource Estimation of {RTL} Designs Targeting {FPGAs}", journal = j-TODAES, volume = "28", number = "2", pages = "24:1--24:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3555047", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3555047", abstract = "Field-programmable gate arrays (FPGAs) have grown to be an important platform for integrated circuit design and hardware emulation. However, with the dramatic increase in design scale, it has become a key challenge to partition very large scale \ldots{}", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ferretti:2023:GNN, author = "Lorenzo Ferretti and Andrea Cini and Georgios Zacharopoulos and Cesare Alippi and Laura Pozzi", title = "Graph Neural Networks for High-Level Synthesis Design Space Exploration", journal = j-TODAES, volume = "28", number = "2", pages = "25:1--25:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3570925", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3570925", abstract = "High-level Synthesis (HLS) Design-Space Exploration (DSE) aims at identifying Pareto-optimal synthesis configurations whose exhaustive search is unfeasible due to the design-space dimensionality and the prohibitive computational cost of the synthesis \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Last:2023:TPM, author = "Felix Last and Ulf Schlichtmann", title = "Training {PPA} Models for Embedded Memories on a Low-data Diet", journal = j-TODAES, volume = "28", number = "2", pages = "26:1--26:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3556539", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3556539", abstract = "Supervised machine learning requires large amounts of labeled data for training. In power, performance, and area (PPA) estimation of embedded memories, every new memory compiler version is considered independently of previous compiler versions. Since the \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xing:2023:BPB, author = "Wei W. Xing and Xiang Jin and Tian Feng and Dan Niu and Weisheng Zhao and Zhou Jin", title = "{BoA-PTA}: a {Bayesian Optimization Accelerated PTA} Solver for {SPICE} Simulation", journal = j-TODAES, volume = "28", number = "2", pages = "27:1--27:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3555805", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3555805", abstract = "One of the greatest challenges in integrated circuit design is the repeated executions of computationally expensive SPICE simulations, particularly when highly complex chip testing/verification is involved. Recently, pseudo-transient analysis (PTA) has \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dai:2023:SAD, author = "Ruochen Dai and Tuba Yavuz", title = "A Symbolic Approach to Detecting Hardware {Trojans} Triggered by Don't Care Transitions", journal = j-TODAES, volume = "28", number = "2", pages = "28:1--28:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3558392", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3558392", abstract = "Due to the globalization of Integrated Circuit supply chain, hardware Trojans and the attacks that can trigger them have become an important security issue. One type of hardware Trojans leverages the ``don't care transitions'' in Finite-state Machines (FSMs). \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2023:AMC, author = "Zhisheng Chen and Wenzhong Guo and Genggeng Liu and Xing Huang", title = "Application Mapping and Control-system Design for Microfluidic Biochips with Distributed Channel Storage", journal = j-TODAES, volume = "28", number = "2", pages = "29:1--29:??", month = mar, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564288", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed Apr 5 10:07:23 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564288", abstract = "Continuous-flow microfluidic biochips have emerged as a potential low-cost and fast-responsive lab-on-chip platform. They have attracted much attention due to their capability of performing various biochemical applications concurrently and automatically \ldots{}", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Choudhury:2023:AGC, author = "Dwaipayan Choudhury and Lizhi Xiang and Aravind Rajam and Anantharaman Kalyanaraman and Partha Pratim Pande", title = "Accelerating Graph Computations on {$3$D} {NoC}-Enabled {PIM} Architectures", journal = j-TODAES, volume = "28", number = "3", pages = "30:1--30:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564290", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564290", abstract = "Graph application workloads are dominated by random memory accesses with the poor locality. To tackle the irregular and sparse nature of computation, ReRAM-based Processing-in-Memory (PIM) architectures have been proposed recently. Most of these ReRAM \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2023:VEL, author = "Jayoung Lee and Pengcheng Wang and Ran Xu and Sarthak Jain and Venkat Dasari and Noah Weston and Yin Li and Saurabh Bagchi and Somali Chaterji", title = "Virtuoso: Energy- and Latency-aware Streamlining of Streaming Videos on Systems-on-Chips", journal = j-TODAES, volume = "28", number = "3", pages = "31:1--31:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564289", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564289", abstract = "Efficient and adaptive computer vision systems have been proposed to make computer vision tasks, such as image classification and object detection, optimized for embedded or mobile devices. These solutions, quite recent in their origin, focus on \ldots{}", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bommana:2023:DST, author = "Ashish Reddy Bommana and Susheel Ujwal Siddamshetty and Dhilleswararao Pudi and Arvind Thumatti K. R. and Srinivas Boppu and M. Sabarimalai Manikandan and Linga Reddy Cenkeramaddi", title = "Design of Synthesis-time Vectorized Arithmetic Hardware for Tapered Floating-point Addition and Subtraction", journal = j-TODAES, volume = "28", number = "3", pages = "32:1--32:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3567423", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3567423", abstract = "Energy efficiency has become the new performance criterion in this era of pervasive embedded computing; thus, accelerator-rich multi-processor system-on-chips are commonly used in embedded computing hardware. Once computationally intensive machine learning applications gained much traction, they are now deployed in many application domains due to abundant and cheaply available computational capacity. In addition, there is a growing trend toward developing hardware accelerators for machine learning applications for embedded edge devices where performance and energy efficiency are critical. Although these hardware accelerators frequently use floating-point operations for accuracy, reduced-width floating-point formats are also used to reduce hardware complexity; thus, power consumption while maintaining accuracy. Vectorization concepts can also be used to improve performance, energy efficiency, and memory bandwidth. We propose the design of a vectorized floating-point adder/subtractor that supports arbitrary length floating-point formats with varying exponent and mantissa widths in this article. In comparison to existing designs in the literature, the proposed design is 2.57$ \times $ area- and 1.56$ \times $ power-efficient, and it supports true vectorization with no restrictions on exponent and mantissa widths.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2023:ATF, author = "Chun-Chieh Yang and Yi-Ru Chen and Hui-Hsin Liao and Yuan-Ming Chang and Jenq-Kuen Lee", title = "Auto-tuning Fixed-point Precision with {TVM} on {RISC-V} Packed {SIMD} Extension", journal = j-TODAES, volume = "28", number = "3", pages = "33:1--33:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3569939", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3569939", abstract = "Today, as deep learning (DL) is applied more often in daily life, dedicated processors such as CPUs and GPUs have become very important for accelerating model executions. With the growth of technology, people are becoming accustomed to using edge devices, such as mobile phones, smart watches, and VR devices in their daily lives. A variety of technologies using DL are gradually being applied to these edge devices. However, there is a large number of computations in DL. It faces a challenging problem how to provide solutions in the edge devices. In this article, the proposed method enables a flow with the RISC-V Packed extension (P extension) in TVM. TVM, an open deep learning compiler for neural network models, is growing as a key infrastructure for DL computing. RISC-V is an open instruction set architecture (ISA) with customized and flexible features. The Packed-SIMD extension is a RISC-V extension that enables subword single-instruction multiple-data (SIMD) computations in RISC-V architectures to support fallback engines in AI computing. In the proposed flow, a fixed-point type that is supported by an integer of 16-bit type and saturation instructions is added to replace the original 32-bit float type. In addition, an auto-tuning method is proposed to use a uniform selector mechanism (USM) to find the binary point position for fixed-point type use. The tensorization feature of TVM can be used to optimize specific hardware such as subword SIMD instructions with RISC-V P extension. With our experiment on the Spike simulator, the proposed method with the USM can improve performance by approximately 2.54 to 6.15$ \times $ in terms of instruction counts with little accuracy loss.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2023:HAQ, author = "Shanshi Huang and Hongwu Jiang and Shimeng Yu", title = "Hardware-aware Quantization\slash Mapping Strategies for Compute-in-Memory Accelerators", journal = j-TODAES, volume = "28", number = "3", pages = "34:1--34:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3569940", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3569940", abstract = "The emerging non-volatile memory (eNVM) based mixed-signal Compute-in-Memory (CIM) accelerators are of great interest in today's AI accelerators design due to their high energy efficiency. Various CIM architectures and circuit-level designs have been \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Feng:2023:GGA, author = "Lang Feng and Wenjian Liu and Chuliang Guo and Ke Tang and Cheng Zhuo and Zhongfeng Wang", title = "{GANDSE}: Generative Adversarial Network-based Design Space Exploration for Neural Network Accelerator Design", journal = j-TODAES, volume = "28", number = "3", pages = "35:1--35:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3570926", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3570926", abstract = "With the popularity of deep learning, the hardware implementation platform of deep learning has received increasing interest. Unlike the general purpose devices, e.g., CPU or GPU, where the deep learning algorithms are executed at the software level, \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2023:DDD, author = "Junpeng Wang and Haitao Du and Bo Ding and Qi Xu and Song Chen and Yi Kang", title = "{DDAM}: Data Distribution-Aware Mapping of {CNNs} on Processing-In-Memory Systems", journal = j-TODAES, volume = "28", number = "3", pages = "36:1--36:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3576196", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3576196", abstract = "Convolution neural networks (CNNs) are widely used algorithms in image processing, natural language processing and many other fields. The large amount of memory access of CNNs is one of the major concerns in CNN accelerator designs that influences the \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rawat:2023:SNB, author = "Bhawna Rawat and Poornima Mittal", title = "A Switching {NMOS} Based Single Ended Sense Amplifier for High Density {SRAM} Applications", journal = j-TODAES, volume = "28", number = "3", pages = "37:1--37:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3576198", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3576198", abstract = "The demand for single ended static random access memory is growing, driven by the decreasing technology node and increasing processing load. This mandates the need for a single ended sense amplifier to be used along with the memory. Consequently, a single \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pereira:2023:IED, author = "Danny Pereira and Anirban Ghose and Sumana Ghosh and Soumyajit Dey", title = "Inferencing on Edge Devices: a Time- and Space-aware Co-scheduling Approach", journal = j-TODAES, volume = "28", number = "3", pages = "38:1--38:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3576197", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3576197", abstract = "Neural Network (NN)-based real-time inferencing tasks are often co-scheduled on GPGPU-style edge platforms. Existing works advocate using different NN parameters for the same detection task in different environments. However, realizing such approaches \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2023:CFD, author = "Yanze Huang and Kui Wen and Limei Lin and Li Xu and Sun-Yuan Hsieh", title = "Component Fault Diagnosability of Hierarchical Cubic Networks", journal = j-TODAES, volume = "28", number = "3", pages = "39:1--39:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3577018", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3577018", abstract = "The fault diagnosability of a network indicates the self-diagnosis ability of the network, thus it is an important measure of robustness of the network. As a neoteric feature for measuring fault diagnosability, the r -component diagnosability \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Nie:2023:CMD, author = "Qi Nie and Sharad Malik", title = "{CNNFlow}: Memory-driven Data Flow Optimization for Convolutional Neural Networks", journal = j-TODAES, volume = "28", number = "3", pages = "40:1--40:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3577017", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3577017", abstract = "Convolution Neural Networks (CNNs) are widely deployed in computer vision applications. The datasets are large, and the data reuse across different parts is heavily interleaved. Given that memory access (SRAM and especially DRAM) is more expensive in both \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{deOliveira:2023:MOO, author = "Ricardo Gonzalez de Oliveira and Nicolas Navet and Achim Henkel", title = "Multi-Objective Optimization for Safety-Related Available {E\slash E} Architectures Scoping Highly Automated Driving Vehicles", journal = j-TODAES, volume = "28", number = "3", pages = "41:1--41:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3582004", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3582004", abstract = "Megatrends such as Highly Automated Driving (HAD) (SAE >= Level 3), electrification, and connectivity are reshaping the automotive industry. Together with the new technologies, the business models will also evolve, opening up new possibilities and new \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mahmoud:2023:LEP, author = "Mervat M. A. Mahmoud and Nahla E. Elashkar and Heba H. Draz", title = "Low-energy Pipelined Hardware Design for Approximate Medium Filter", journal = j-TODAES, volume = "28", number = "3", pages = "42:1--42:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3582005", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3582005", abstract = "Image and video processing algorithms are currently crucial for many applications. Hardware implementation of these algorithms provides higher speed for large computation applications. Removing noise is often a typical pre-processing step to enhance the \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cardona:2023:AMC, author = "Jordi Cardona and Carles Hern{\'a}ndez and Jaume Abella and Enrico Mezzetti and Francisco J. Cazorla", title = "Accurately Measuring Contention in Mesh {NoCs} in Time-Sensitive Embedded Systems", journal = j-TODAES, volume = "28", number = "3", pages = "43:1--43:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3582006", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3582006", abstract = "The computing capacity demanded by embedded systems is on the rise as software implements more functionalities, ranging from best-effort entertainment functions to performance-guaranteed safety-related functions. Heterogeneous manycore processors, using \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Du:2023:TLR, author = "Yajuan Du and Siyi Huang and Yao Zhou and Qiao Li", title = "Towards {LDPC} Read Performance of {$3$D} Flash Memories with Layer-induced Error Characteristics", journal = j-TODAES, volume = "28", number = "3", pages = "44:1--44:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3585075", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3585075", abstract = "3D flash memories have been widely developed to further increase the storage capacity of SSDs by vertically stacking multiple layers. However, this special physical structure brings new error characteristics. Existing studies have discovered that there \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2023:FAO, author = "Yuhao Zhou and Zhenxue He and Jianhui Jiang and Jia Liu and Juncai He and Tao Wang and Limin Xiao and Xiang Wang", title = "Fast Area Optimization Approach for {XNOR\slash OR}-based Fixed Polarity {Reed--Muller} Logic Circuits based on Multi-strategy Wolf Pack Algorithm", journal = j-TODAES, volume = "28", number = "3", pages = "45:1--45:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3587818", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3587818", abstract = "Area optimization is one of the most important contents of circuits logic synthesis. The smaller area has stronger testability and lower cost. However, searching for a circuit with the smallest area in a large-scale space of polarity is a combinatorial \ldots{}", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2023:TPI, author = "Senling Wang and Xihong Zhou and Yoshinobu Higami and Hiroshi Takahashi and Hiroyuki Iwata and Yoichi Maeda and Jun Matsushima", title = "Test Point Insertion for Multi-Cycle Power-On Self-Test", journal = j-TODAES, volume = "28", number = "3", pages = "46:1--46:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3563552", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3563552", abstract = "Under the functional safety standard ISO26262, automotive systems require testing in the field, such as the power-on self-test (POST). Unlike the production test, the POST requires reducing the test application time to meet the indispensable test quality \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Le:2023:PBM, author = "Trung Le and Zhao Zhang and Zhichun Zhu", title = "Polling-Based Memory Interface", journal = j-TODAES, volume = "28", number = "3", pages = "47:1--47:??", month = may, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3572919", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Wed May 17 08:06:20 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3572919", abstract = "Non-volatile memory has been extensively researched as the alternative for a DRAM-based system; however, the traditional memory controller cannot efficiently track and schedule operations for all the memory devices in heterogeneous systems due to \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Markov:2023:GEI, author = "Igor Markov and Fan Yang and Li Shang and Hai Zhou", title = "{Guest Editor}'s Introduction: Machine Learning for {VLSI} Physical Design", journal = j-TODAES, volume = "28", number = "4", pages = "48:1--48:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3592606", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3592606", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kashyap:2023:IIM, author = "Suhas Krishna Kashyap and Sule Ozev", title = "{IMPRoVED}: Integrated Method to Predict {PostRouting} setup Violations in Early Design Stages", journal = j-TODAES, volume = "28", number = "4", pages = "49:1--49:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3572546", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3572546", abstract = "The detail routing process is by far the most time consuming during the physical design flow. Routing starts with an estimation of timing slacks and aims to meet the timing specifications at signoff. In this paper, we propose an improved method to predict \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hyun:2023:ROE, author = "Daijoon Hyun and Sunwha Koh and Younggwang Jung and Taeyoung Kim and Youngsoo Shin", title = "Routability Optimization of Extreme Aspect Ratio Design through Non-uniform Placement Utilization and Selective Flip-flop Stacking", journal = j-TODAES, volume = "28", number = "4", pages = "50:1--50:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3573387", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3573387", abstract = "Circuits that are placed with very low (or high) aspect ratio are susceptible to routing overflows. Such designs are difficult to close and usually end up with larger area with low area utilization. In this article, we propose two routability optimization \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Utyamishev:2023:MPP, author = "Dmitry Utyamishev and Inna Partin-Vaisband", title = "Multiterminal Pathfinding in Practical {VLSI} Systems with Deep Neural Networks", journal = j-TODAES, volume = "28", number = "4", pages = "51:1--51:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564930", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564930", abstract = "A multiterminal obstacle-avoiding pathfinding approach is proposed. The approach is inspired by deep image learning. The key idea is based on training a conditional generative adversarial network (cGAN) to interpret a pathfinding task as a graphical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cheng:2023:DDG, author = "Chung-Kuan Cheng and Chester Holtz and Andrew B. Kahng and Bill Lin and Uday Mallappa", title = "{DAGSizer}: a Directed Graph Convolutional Network Approach to Discrete Gate Sizing of {VLSI} Graphs", journal = j-TODAES, volume = "28", number = "4", pages = "52:1--52:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3577019", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3577019", abstract = "The objective of a leakage recovery step is to make use of positive slack and reduce power by performing appropriate standard-cell swaps such as threshold-voltage ( V$_{th}$ ) or channel-length reassignments. The resulting engineering change order netlist needs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2023:RDP, author = "Ping-Wei Huang and Yao-Wen Chang", title = "Routability-driven Power\slash Ground Network Optimization Based on Machine Learning", journal = j-TODAES, volume = "28", number = "4", pages = "53:1--53:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3587817", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3587817", abstract = "The dynamic IR drop of a power/ground (PG) network is a critical problem in modern circuit designs. Excessive IR drop slows down circuit performance and causes potential functional failures. Most industrial practices tend to over-design the PG network for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dong:2023:WCP, author = "Xiao Dong and Yufei Chen and Jun Chen and Yucheng Wang and Ji Li and Tianming Ni and Zhiguo Shi and Xunzhao Yin and Cheng Zhuo", title = "Worst-case Power Integrity Prediction Using Convolutional Neural Network", journal = j-TODAES, volume = "28", number = "4", pages = "54:1--54:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564932", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564932", abstract = "Power integrity analysis is an essential step in power distribution network (PDN) sign-off to ensure the performance and reliability of chips. However, with the growing PDN size and increasing scenarios to be validated, it becomes very time- and resource-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lu:2023:EGS, author = "Yi-Chen Lu and Siddhartha Nath and Sai Pentapati and Sung Kyu Lim", title = "{ECO-GNN}: Signoff Power Prediction Using Graph Neural Networks with Subgraph Approximation", journal = j-TODAES, volume = "28", number = "4", pages = "55:1--55:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3569942", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3569942", abstract = "Modern electronic design automation flows depend on both implementation and signoff tools to perform timing-constrained power optimization through Engineering Change Orders (ECOs), which involve gate sizing and threshold-voltage ( V$_{th}$ )-assignment of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2023:CCE, author = "Dingcheng Yang and Haoyuan Li and Wenjian Yu and Yuanbo Guo and Wenjie Liang", title = "{CNN-Cap}: Effective Convolutional Neural Network-based Capacitance Models for Interconnect Capacitance Extraction", journal = j-TODAES, volume = "28", number = "4", pages = "56:1--56:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3564931", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3564931", abstract = "Accurate capacitance extraction is becoming more important for designing integrated circuits under advanced process technology. The pattern matching-based full-chip extraction methodology delivers fast computational speed but suffers from large error and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hou:2023:DLF, author = "Tianshu Hou and Peining Zhen and Zhigang Ji and Hai-Bao Chen", title = "A Deep Learning Framework for Solving Stress-based Partial Differential Equations in Electromigration Analysis", journal = j-TODAES, volume = "28", number = "4", pages = "57:1--57:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3567424", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3567424", abstract = "The electromigration-induced reliability issues (EM) in very large scale integration (VLSI) circuits have attracted continuous attention due to technology scaling. Traditional EM methods lead to inaccurate results incompatible with the advanced technology \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhang:2023:CCM, author = "Qing Zhang and Huajie Huang and Jizuo Li and Yuhang Zhang and Yongfu Li", title = "{CmpCNN}: {CMP} Modeling with Transfer Learning {CNN} Architecture", journal = j-TODAES, volume = "28", number = "4", pages = "58:1--58:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3569941", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3569941", abstract = "Performing chemical mechanical polishing (CMP) modeling for physical verification on an integrated circuit (IC) chip is vital to minimize its manufacturing yield loss. Traditional CMP models calculate post-CMP topography height of the IC's layout based on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Aseeri:2023:PTA, author = "Ahmad O. Aseeri", title = "A Problem-tailored Adversarial Deep Neural Network-Based Attack Model for Feed-Forward Physical Unclonable Functions", journal = j-TODAES, volume = "28", number = "4", pages = "59:1--59:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3557742", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3557742", abstract = "With the exceeding advancement in technology, the sophistication of attacks is considerably increasing. Standard security methods fall short of achieving the security essentials of IoT against physical attacks due to the nature of IoTs being resource-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bhattacharjee:2023:SGG, author = "Abhiroop Bhattacharjee and Priyadarshini Panda", title = "{SwitchX}: Gmin-Gmax Switching for Energy-efficient and Robust Implementation of Binarized Neural Networks on {ReRAM} Xbars", journal = j-TODAES, volume = "28", number = "4", pages = "60:1--60:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3576195", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3576195", abstract = "Memristive crossbars can efficiently implement Binarized Neural Networks (BNNs) wherein the weights are stored in high-resistance states (HRS) and low-resistance states (LRS) of the synapses. We propose SwitchX mapping of BNN weights onto ReRAM crossbars \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2023:STB, author = "Po-Hsuan Huang and Chia-Heng Tu and Shen-Ming Chung and Pei-Yuan Wu and Tung-Lin Tsai and Yi-An Lin and Chun-Yi Dai and Tzu-Yi Liao", title = "{SecureTVM}: a {TVM}-based Compiler Framework for Selective Privacy-preserving Neural Inference", journal = j-TODAES, volume = "28", number = "4", pages = "61:1--61:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3579049", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3579049", abstract = "Privacy-preserving neural inference helps protect both the user input data and the model weights from being leaked to others during the inference of a deep learning model. To achieve data protection, the inference is often performed within a secure domain,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ibrahim:2023:OPR, author = "Abrar A. Ibrahim and Ahmed M. Y. Ibrahim and Mohamed Watheq El-Kharashi and Mona Safar", title = "Optimal Pattern Retargeting in {IEEE 1687} Networks: a {SAT}-based Upper-Bound Computation", journal = j-TODAES, volume = "28", number = "4", pages = "62:1--62:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3585074", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3585074", abstract = "A growing number of embedded instruments is being integrated into System-on-Chips for testing, monitoring, and several other purposes. To standardize their access protocols, the IEEE 1687 (IJTAG) standard has defined a flexible network infrastructure. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ferres:2023:CFF, author = "Bruno Ferres and Olivier Muller and Fr{\'e}d{\'e}ric Rousseau", title = "A Chisel Framework for Flexible Design Space Exploration through a Functional Approach", journal = j-TODAES, volume = "28", number = "4", pages = "63:1--63:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3590769", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3590769", abstract = "As the need for efficient digital circuits is ever growing in the industry, the design of such systems remains daunting, requiring both expertise and time. In an attempt to close the gap between software development and hardware design, powerful features \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Khan:2023:HEC, author = "Muhammad Imran Khan", title = "Harmonic Estimation and Comparative Analysis of Ultra-High Speed Flip-Flop and Latch Topologies for Low Power and High Performance Future Generation Micro-\slash Nano Electronic Systems", journal = j-TODAES, volume = "28", number = "4", pages = "64:1--64:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3590770", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3590770", abstract = "This paper presents estimation and analysis of the higher order harmonics, power features, and real performance of flip-flop and master-slave latch topologies. This research article outlines the impact of transistor model quality and input signal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{He:2023:SEM, author = "Xu He and Yao Wang and Chang Liu and Qiang Wu and Juan Luo and Yang Guo", title = "A Soft-Error Mitigation Approach Using Pulse Quenching Enhancement at Detailed Placement for Combinational Circuits", journal = j-TODAES, volume = "28", number = "4", pages = "65:1--65:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3595637", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3595637", abstract = "As technology continuously shrinks, radiation-induced soft errors have become a great threat to the circuit reliability. Among all the causes, the Single-Event Transient (SET) effect is the dominating one for the radiation-induced soft errors. SET-induced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kazerooni-Zand:2023:MBM, author = "Reza Kazerooni-Zand and Mehdi Kamal and Ali Afzali-Kusha and Massoud Pedram", title = "Memristive-based Mixed-signal {CGRA} for Accelerating Deep Neural Network Inference", journal = j-TODAES, volume = "28", number = "4", pages = "66:1--66:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3595638", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3595638", abstract = "In this paper, a mixed-signal coarse-grained reconfigurable architecture (CGRA) for accelerating inference in deep neural networks (DNNs) is presented. It is based on performing dot-product computations using analog computing to achieve a considerable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chu:2023:ADC, author = "Cheng Chu and Cheng Liu and Dawen Xu and Ying Wang and Tao Luo and Huawei Li and Xiaowei Li", title = "Accelerating Deformable Convolution Networks with Dynamic and Irregular Memory Accesses", journal = j-TODAES, volume = "28", number = "4", pages = "67:1--67:??", month = jul, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3597431", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:08 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3597431", abstract = "Deformable convolution networks (DCNs) proposed to address image recognition with geometric or photometric variations typically involve deformable convolution that convolves on arbitrary locations of input features. The locations change with different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2023:ISS, author = "Iris Hru Jiang and David Chinnery and Gracieli Posser and Jens Lienig", title = "Introduction to the Special Section on Advances in Physical Design Automation", journal = j-TODAES, volume = "28", number = "5", pages = "68:1--68:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3604593", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3604593", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gopalakrishnan:2023:GMW, author = "Ramprasath Srinivasa Gopalakrishnan and Meghna Madhusudan and Arvind K. Sharma and Jitesh Poojary and Soner Yaldiz and Ramesh Harjani and Steven M. Burns and Sachin S. Sapatnekar", title = "A Generalized Methodology for Well Island Generation and Well-tap Insertion in Analog\slash Mixed-signal Layouts", journal = j-TODAES, volume = "28", number = "5", pages = "69:1--69:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3580477", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3580477", abstract = "Well island generation and well tap placement is an important problem in analog/mixed-signal (AMS) circuits. Well taps can only prevent latchups within a certain radius of influence within a well island, and hence must be appropriately inserted to cover \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wei:2023:APP, author = "Min Wei and Xingyu Tong and Yuan Wen and Jianli Chen and Jun Yu and Wenxing Zhu and Yao-Wen Chang", title = "Analytical Placement with {$3$D} {Poisson}'s Equation and {ADMM}-based Optimization for Large-scale {2.5D} Heterogeneous {FPGAs}", journal = j-TODAES, volume = "28", number = "5", pages = "70:1--70:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3582554", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3582554", abstract = "As design complexity keeps increasing, the 2.5D field-programmable gate array (FPGA) with large logic capacity has become popular in modern circuit applications. A 2.5D FPGA consists of multiple dies connected through super long lines (SLLs) on an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "70", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hougardy:2023:FOD, author = "Stefan Hougardy and Meike Neuwohner and Ulrike Schorr", title = "A Fast Optimal Double-row Legalization Algorithm", journal = j-TODAES, volume = "28", number = "5", pages = "71:1--71:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3579844", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3579844", abstract = "In Placement Legalization, it is often assumed that (almost) all standard cells possess the same height and can therefore be aligned in cell rows, which can then be treated independently. However, this is no longer true for recent technologies, where a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "71", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Daboul:2023:GIO, author = "Siad Daboul and Stephan Held and Bento Natura and Daniel Rotter", title = "Global Interconnect Optimization", journal = j-TODAES, volume = "28", number = "5", pages = "72:1--72:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3587044", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3587044", abstract = "We propose a new comprehensive solution to global interconnect optimization. Traditional buffering algorithms mostly insert repeaters on a net-by-net basis based on slacks and possibly guided by global wires. We show how to integrate routing congestion, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "72", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2023:MMR, author = "Zhonghua Zhou and Yuxuan Pan and Guy G. F. Lemieux and Andr{\'e} Ivanov", title = "{MEDUSA}: a Multi-Resolution Machine Learning Congestion Estimation Method for {$2$D} and {$3$D} Global Routing", journal = j-TODAES, volume = "28", number = "5", pages = "73:1--73:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3590768", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3590768", abstract = "Routing congestion is one of the many factors that need to be minimized during the physical design phase of large integrated circuits. In this article, we propose a novel congestion estimation method, called MEDUSA, that consists of three parts: (1) a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "73", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zheng:2023:BVD, author = "Su Zheng and Hao Geng and Chen Bai and Bei Yu and Martin D. F. Wong", title = "Boosting {VLSI} Design Flow Parameter Tuning with Random Embedding and Multi-objective Trust-region {Bayesian} Optimization", journal = j-TODAES, volume = "28", number = "5", pages = "74:1--74:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3597931", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3597931", abstract = "Modern very large-scale integration (VLSI) design requires the implementation of integrated circuits using electronic design automation (EDA) tools. Due to the complexity of EDA algorithms, there are numerous tool parameters that have imperative impacts \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "74", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Murali:2023:PSR, author = "Gauthaman Murali and Anthony Agnesina and Sung Kyu Lim", title = "A {PPA} Study of Reinforced Placement Parameter Autotuning: Pseudo-{$3$D} vs. True-{$3$D} Placers", journal = j-TODAES, volume = "28", number = "5", pages = "75:1--75:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3582007", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3582007", abstract = "D Place and Route (P\&R) flows either involve true-3D placement algorithms or use commercial 2D tools to transform a 2D design into a 3D design. Irrespective of the nature of the placers, several placement parameters in these tools affect the quality of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "75", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Vanna-Iampikul:2023:GBM, author = "Pruek Vanna-Iampikul and Yi-Chen Lu and Da Eun Shim and Sung Kyu Lim", title = "{GNN}-based Multi-bit Flip-flop Clustering and Post-clustering Design Optimization for Energy-efficient {$3$D} {ICs}", journal = j-TODAES, volume = "28", number = "5", pages = "76:1--76:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3588570", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3588570", abstract = "In high-performance three-dimensional Integrated Circuits (3D ICs), clock networks consume a large portion of the full-chip power. However, no previous 3D IC work has ever optimized 3D clock networks for both power and performance simultaneously, which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "76", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2023:IBS, author = "Jun-Sheng Wu and Chi-An Pan and Yi-Yu Liu", title = "{ILP}-based Substrate Routing with Mismatched Via Dimension Consideration for Wire-bonding {FBGA} Package Design", journal = j-TODAES, volume = "28", number = "5", pages = "77:1--77:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3579843", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3579843", abstract = "With the rapidly growing demand for system-level integration, package substrates have become one of the most important carriers in semiconductor industry. Fine pitch ball grid array (FBGA) packaging is a widely used technology thanks to its relative cost-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "77", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2023:CPN, author = "Yanjiang Liu and Junwei Li and Tongzhou Qu and Zibin Dai", title = "{CBDC-PUF}: a Novel Physical Unclonable Function Design Framework Utilizing Configurable Butterfly Delay Chain Against Modeling Attack", journal = j-TODAES, volume = "28", number = "5", pages = "78:1--78:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3588435", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3588435", abstract = "Physical unclonable function (PUF) is a promising security-based primitive, which provides an extremely large number of responses for key generation and authentication applications. Various PUFs have been developed as central building blocks in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "78", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Aghaeekiasaraee:2023:CFR, author = "Erfan Aghaeekiasaraee and Aysa Fakheri Tabrizi and Tiago Augusto Fontana and Renan Netto and Sheiny Fabre Almeida and Upma Gandhi and Jos{\'e} Lu{\'\i}s G{\"u}ntzel and David Westwick and Laleh Behjat", title = "{CRP2.0}: a Fast and Robust Cooperation between Routing and Placement in Advanced Technology Nodes", journal = j-TODAES, volume = "28", number = "5", pages = "79:1--79:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3590962", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3590962", abstract = "Traditionally, the placement and routing stages of a physical design are performed separately. Because of the additional complexities arising in advanced technology nodes, they have become more interdependent. Therefore, creating efficient cooperation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "79", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhu:2023:DSE, author = "Binwu Zhu and Xinyun Zhang and Yibo Lin and Bei Yu and Martin Wong", title = "{DRC-SG 2.0}: Efficient Design Rule Checking Script Generation via Key Information Extraction", journal = j-TODAES, volume = "28", number = "5", pages = "80:1--80:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3594666", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3594666", abstract = "Design Rule Checking (DRC) is a critical step in integrated circuit design. DRC requires formatted scripts as the input to design rule checkers. However, these scripts are manually generated in the foundry, which is tedious and error prone for generation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "80", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kritikakou:2023:MMS, author = "Angeliki Kritikakou and Stefanos Skalistis", title = "Mitigating Mode-switch through Run-time Computation of Response Time", journal = j-TODAES, volume = "28", number = "5", pages = "81:1--81:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3597432", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3597432", abstract = "Mixed-critical systems consist of applications with different criticality. In these systems, different confidence levels of Worst-Case Execution Time (WCET) estimations are used. Dual criticality systems use a less pessimistic, but with lower level of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "81", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2023:BIH, author = "Zilu Wang and Xinming Shi and Xin Yao", title = "A Brain-Inspired Hardware Architecture for Evolutionary Algorithms Based on Memristive Arrays", journal = j-TODAES, volume = "28", number = "5", pages = "82:1--82:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3598421", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3598421", abstract = "Brain-inspired computing takes inspiration from the brain to create energy-efficient hardware systems for information processing, capable of performing highly sophisticated tasks. Systems built with emerging electronics, such as memristive devices, can \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "82", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Monjur:2023:HSR, author = "Mohammad Monjur and Joshua Calzadillas and Qiaoyan Yu", title = "Hardware Security Risks and Threat Analyses in Advanced Manufacturing Industry", journal = j-TODAES, volume = "28", number = "5", pages = "83:1--83:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3603502", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3603502", abstract = "The advanced manufacturing industry (AMI) faces many unique challenges from the cyber-physical domain. Security threats are originated from two integral parts: software and hardware. Over the past decade, software security has been addressed extensively, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "83", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Narang:2023:DPM, author = "Gaurav Narang and Aryan Deshwal and Raid Ayoub and Michael Kishinevsky and Janardhan Rao Doppa and Partha Pratim Pande", title = "Dynamic Power Management in Large Manycore Systems: a Learning-to-Search Framework", journal = j-TODAES, volume = "28", number = "5", pages = "84:1--84:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3603501", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3603501", abstract = "The complexity of manycore System-on-chips (SoCs) is growing faster than our ability to manage them to reduce the overall energy consumption. Further, as SoC design moves toward three-dimensional (3D) architectures, the core's power density increases \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "84", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tan:2023:IPC, author = "Jingweijia Tan and Weiren Wang and Maodi Ma and Xiaohui Wei and Kaige Yan", title = "Improving the Performance of {CNN} Accelerator Architecture under the Impact of Process Variations", journal = j-TODAES, volume = "28", number = "5", pages = "85:1--85:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3604236", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3604236", abstract = "Convolutional neural network (CNN) accelerators are popular specialized platforms for efficient CNN processing. As semiconductor manufacturing technology scales down to nano scale, process variation dramatically affects the chip's quality. Process \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "85", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2023:CAT, author = "Meng-Jing Li and Yu-Chuan Yen and Yi-Ting Li and Yung-Chih Chen and Chun-Yao Wang", title = "A Constructive Approach for Threshold Function Identification", journal = j-TODAES, volume = "28", number = "5", pages = "86:1--86:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3606371", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3606371", abstract = "Threshold Function (TF) is a subset of Boolean function that can be represented with a single linear threshold gate (LTG). In the research about threshold logic, the identification of TF is an important task that determines whether a given function is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "86", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yamin:2023:UAE, author = "Nuzhat Yamin and Ganapati Bhat", title = "Uncertainty-aware Energy Harvest Prediction and Management for {IoT} Devices", journal = j-TODAES, volume = "28", number = "5", pages = "87:1--87:??", month = sep, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3606372", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 18 09:07:10 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3606372", abstract = "Internet of things (IoT) devices are popular in several high-impact applications such as mobile healthcare and digital agriculture. However, IoT devices have limited operating lifetime due to their small form factor. Harvesting energy from ambient sources \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "87", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhang:2023:SKR, author = "Ruisi Zhang and Shehzeen Hussain and Huili Chen and Mojan Javaheripi and Farinaz Koushanfar", title = "Systemization of Knowledge: Robust Deep Learning using Hardware--Software Co-design in Centralized and Federated Settings", journal = j-TODAES, volume = "28", number = "6", pages = "88:1--88:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3616868", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3616868", abstract = "Deep learning (DL) models are enabling a significant paradigm shift in a diverse range of fields, including natural language processing and computer vision, as well as the design and automation of complex integrated circuits. While the deep models --- and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "88", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lu:2023:SPI, author = "Huaixi Lu and Yue Xing and Aarti Gupta and Sharad Malik", title = "{SoC} Protocol Implementation Verification Using Instruction-Level Abstraction Specifications", journal = j-TODAES, volume = "28", number = "6", pages = "89:1--89:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3610292", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3610292", abstract = "In modern systems-on-chips, several hardware protocols are used for communication and interaction among different modules. These protocols are complex and need to be implemented correctly for correct operation of the system-on-chip. Therefore, protocol \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "89", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{He:2023:GLP, author = "Xu He and Yao Wang and Zhiyong Fu and Yipei Wang and Yang Guo", title = "A General Layout Pattern Clustering Using Geometric Matching-based Clip Relocation and Lower-bound Aided Optimization", journal = j-TODAES, volume = "28", number = "6", pages = "90:1--90:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3610293", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3610293", abstract = "With the continuous shrinking of feature size, detection of lithography hotspots has been raised as one of the major concerns in Design-for-Manufacturability (DFM) of semiconductor processing. Hotspot detection, along with other DFM measures, trades off \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "90", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chang:2023:HPM, author = "Yajing Chang and Yingjian Yan and Chunsheng Zhu and Yanjiang Liu", title = "A High-performance Masking Design Approach for {Saber} against High-order Side-channel Attack", journal = j-TODAES, volume = "28", number = "6", pages = "91:1--91:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3611670", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3611670", abstract = "Post-quantum cryptography (PQC) has become the most promising cryptographic scheme against the threat of quantum computing to conventional public-key cryptographic schemes. Saber, as the finalist in the third round of the PQC standardization procedure, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "91", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Venieris:2023:MMW, author = "Stylianos I. Venieris and Javier Fernandez-Marques and Nicholas D. Lane", title = "Mitigating Memory Wall Effects in {CNN} Engines with On-the-Fly Weights Generation", journal = j-TODAES, volume = "28", number = "6", pages = "92:1--92:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3611673", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3611673", abstract = "The unprecedented accuracy of convolutional neural networks (CNNs) across a broad range of AI tasks has led to their widespread deployment in mobile and embedded settings. In a pursuit for high-performance and energy-efficient inference, significant \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "92", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Choudhury:2023:EPF, author = "Muhtadi Choudhury and Minyan Gao and Avinash Varna and Elad Peer and Domenic Forte", title = "Enhanced {PATRON}: Fault Injection and Power-aware {FSM} Encoding Through Linear Programming", journal = j-TODAES, volume = "28", number = "6", pages = "93:1--93:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3611669", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3611669", abstract = "Since finite state machines (FSMs) regulate the control flow in circuits, a computing system's security might be breached by attacking the FSM. Physical attacks are especially worrisome because they can bypass software countermeasures. For example, an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "93", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dahiya:2023:MDS, author = "Ayush Dahiya and Poornima Mittal and Rajesh Rohilla", title = "Modified Decoupled Sense Amplifier with Improved Sensing Speed for Low-Voltage Differential {SRAM}", journal = j-TODAES, volume = "28", number = "6", pages = "94:1--94:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3611672", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3611672", abstract = "A modified decoupled sense amplifier (MDSA) and modified decoupled sense amplifier with NMOS foot-switch is proposed for improved sensing in differential SRAM for low-voltage operation at the 22-nm technology node. The MDSA and MDSANF both offer notable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "94", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Naseer:2023:QGA, author = "Mahum Naseer and Osman Hasan and Muhammad Shafique", title = "{QuanDA}: {GPU} Accelerated Quantitative Deep Neural Network Analysis", journal = j-TODAES, volume = "28", number = "6", pages = "95:1--95:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3611671", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3611671", abstract = "Over the past years, numerous studies demonstrated the vulnerability of deep neural networks (DNNs) to make correct classifications in the presence of small noise. This motivated the formal analysis of DNNs to ensure that they delineate acceptable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "95", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rawat:2023:RSB, author = "Bhawna Rawat and Poornima Mittal", title = "A Reconfigurable {7T} {SRAM} Bit Cell for High Speed, Power Saving and Low Voltage Application", journal = j-TODAES, volume = "28", number = "6", pages = "96:1--96:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3616872", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3616872", abstract = "The decreasing operational voltage and scaled technology node for memory designing has widened the gap between two crucial parameters for an SRAM --- delay and power. As the demand for internet of things is increasing, the need for round the clock \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "96", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sivakumar:2023:SAL, author = "S. Sivakumar and John Jose", title = "Self Adaptive Logical Split Cache Techniques for Delayed Aging of {NVM LLC}", journal = j-TODAES, volume = "28", number = "6", pages = "97:1--97:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3616871", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3616871", abstract = "Due to the technological advancements in the last few decades, several applications have emerged that demand more computing power and on-chip and off-chip memories. However, the scaling of memory technologies is not at par with computing throughput of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "97", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Esper:2023:ASF, author = "Khalil Esper and Stefan Wildermann and J{\"u}rgen Teich", title = "Automatic Synthesis of {FSMs} for Enforcing Non-functional Requirements on {MPSoCs} Using Multi-objective Evolutionary Algorithms", journal = j-TODAES, volume = "28", number = "6", pages = "98:1--98:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3617832", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3617832", abstract = "Embedded system applications often require guarantees regarding non-functional properties when executed on a given MPSoC platform. Examples of such requirements include real-time, energy, or safety properties on corresponding programs. One option to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "98", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Senapati:2023:TTA, author = "Debabrata Senapati and Kousik Rajesh and Chandan Karfa and Arnab Sarkar", title = "{TMDS}: Temperature-aware Makespan Minimizing {DAG} Scheduler for Heterogeneous Distributed Systems", journal = j-TODAES, volume = "28", number = "6", pages = "99:1--99:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3616869", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3616869", abstract = "To meet application-specific performance demands, recent embedded platforms often involve the use of intricate micro-architectural designs and very small feature sizes leading to complex chips with multi-million gates. Such ultra-high gate densities often \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "99", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hong:2023:PMC, author = "Qinghui Hong and Richeng Huang and Pingdan Xiao and Jun Li and Jingru Sun and Jiliang Zhang", title = "Programmable In-memory Computing Circuit of {Fast Hartley Transform}", journal = j-TODAES, volume = "28", number = "6", pages = "100:1--100:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3618112", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3618112", abstract = "Discrete Hartley transform is a core component of digital signal processing because of its advantages of fast computing speed and less power consumption. Traditional FPGA-based implementation methods have the disadvantage of high latency, which cannot \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "100", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kundu:2023:MTF, author = "Debraj Kundu and Sudip Roy", title = "Multi-target Fluid Mixing in {MEDA} Biochips: Theory and an Attempt toward Waste Minimization", journal = j-TODAES, volume = "28", number = "6", pages = "101:1--101:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3622785", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3622785", abstract = "Sample preparation is an inherent procedure of many biochemical applications, and digital microfluidic biochips (DMBs) have proved to be very effective in performing such a procedure. In a single mixing step, conventional DMBs can mix two droplets in a 1:. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "101", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2023:SLR, author = "Shanglin Zhou and Mikhail A. Bragin and Deniz Gurevin and Lynn Pepin and Fei Miao and Caiwen Ding", title = "Surrogate {Lagrangian} Relaxation: a Path to Retrain-Free Deep Neural Network Pruning", journal = j-TODAES, volume = "28", number = "6", pages = "102:1--102:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3624476", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3624476", abstract = "Network pruning is a widely used technique to reduce computation cost and model size for deep neural networks. However, the typical three-stage pipeline (i.e., training, pruning, and retraining (fine-tuning)) significantly increases the overall training \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "102", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ding:2023:TMP, author = "Bo Ding and Jinglei Huang and Junpeng Wang and Qi Xu and Song Chen and Yi Kang", title = "Task Modules Partitioning, Scheduling and Floorplanning for Partially Dynamically Reconfigurable Systems with Heterogeneous Resources", journal = j-TODAES, volume = "28", number = "6", pages = "103:1--103:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3625295", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3625295", abstract = "Some field programmable gate arrays (FPGAs) can be partially dynamically reconfigurable with heterogeneous resources distributed on the chip. FPGA-based partially dynamically reconfigurable system (FPGA-PDRS) can be used to accelerate computing and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "103", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2023:SRB, author = "Wenxiong Lin and Haojie Wu and Peng Gao and Wenjun Luo and Shuting Cai and Xiaoming Xiong", title = "Sequential Routing-based Time-division Multiplexing Optimization for Multi-{FPGA} Systems", journal = j-TODAES, volume = "28", number = "6", pages = "104:1--104:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3626322", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3626322", abstract = "Multi-field programming gate array (FPGA) systems are widely used in various circuit design-related areas, such as hardware emulation, virtual prototypes, and chiplet design methodologies. However, a physical resource clash between inter-FPGA signals and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "104", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Praveen:2023:DER, author = "Pushkar Praveen and R. K. Singh", title = "Design of Enhanced Reversible {9T} {SRAM} Design for the Reduction in Sub-threshold Leakage Current with14nm {FinFET} Technology", journal = j-TODAES, volume = "28", number = "6", pages = "105:1--105:??", month = nov, year = "2023", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3616538", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 10 09:53:53 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3616538", abstract = "Power dissipation is considered one of the important issues in low power Very-large-scale integration (VLSI) circuit design and is related to the threshold voltage. Generally, the sub-threshold leakage current and the leakage power dissipation are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "105", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ni:2024:ISI, author = "Tianming Ni and Xiaoqing Wen and Hussam Amrouch and Cheng Zhuo and Peilin Song", title = "Introduction to the Special Issue on Design for Testability and Reliability of Security-aware Hardware", journal = j-TODAES, volume = "29", number = "1", pages = "1:1--1:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3631476", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3631476", abstract = "The research on design for testability and reliability of security-aware hardware has been important in both academia and industry. With ever-growing globalization, commercial hardware design, manufacturing, transportation, and supply now involve many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cui:2024:ERO, author = "Yijun Cui and Jiang Li and Yunpeng Chen and Chenghua Wang and Chongyan Gu and M{\'a}ire O'neill and Weiqiang Liu", title = "An Efficient Ring Oscillator {PUF} Using Programmable Delay Units on {FPGA}", journal = j-TODAES, volume = "29", number = "1", pages = "2:1--2:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3593807", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3593807", abstract = "The ring oscillator (RO) PUF can be implemented on different FPGA platforms with high uniqueness and reliability. To decrease the hardware cost of conventional RO PUFs, a new design using the programmable delay units is proposed, namely, PRO PUF. The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2024:PLC, author = "Taixin Li and Boran Sun and Hongtao Zhong and Yixin Xu and Vijaykrishnan Narayanan and Liang Shi and Tianyi Wang and Yao Yu and Thomas K{\"a}mpfe and Kai Ni and Huazhong Yang and Xueqing Li", title = "{ProtFe}: Low-Cost Secure Power Side-Channel Protection for General and Custom {FeFET}-Based Memories", journal = j-TODAES, volume = "29", number = "1", pages = "3:1--3:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3604589", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3604589", abstract = "Ferroelectric Field Effect Transistors (FeFETs) have spurred increasing interest in both memories and computing applications, thanks to their CMOS compatibility, low-power operation, and high scalability. However, new security threats to the FeFET-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pan:2024:CEP, author = "Zijin Pan and Xunyu Li and Weiquan Hao and Runyu Miao and Albert Wang", title = "On-chip {ESD} Protection Design Methodologies by {CAD} Simulation", journal = j-TODAES, volume = "29", number = "1", pages = "4:1--4:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3593808", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3593808", abstract = "Electrostatic discharge (ESD) can cause malfunction or failure of integrated circuits (ICs). On-chip ESD protection design is a major IC design-for-reliability (DfR) challenge, particularly for complex chips made in advanced technology nodes. Traditional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bian:2024:RAS, author = "Jingchang Bian and Zhengfeng Huang and Peng Ye and Zhao Yang and Huaguo Liang", title = "A Reliability-Aware Splitting Duty-Cycle Physical Unclonable Function Based on Trade-off Process, Voltage, and Temperature Variations", journal = j-TODAES, volume = "29", number = "1", pages = "5:1--5:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3594667", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3594667", abstract = "The physical unclonable function (PUF) is a hardware security primitive that can be used to prevent malicious attacks aimed at obtaining device information at the hardware level. The ring oscillator (RO) PUF has attracted considerable research attention. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhang:2024:HTS, author = "Yuan Zhang and Jiliang Zhang", title = "A High Throughput {STR}-based {TRNG} by Jitter Precise Quantization Superposing", journal = j-TODAES, volume = "29", number = "1", pages = "6:1--6:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3606373", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3606373", abstract = "With the rapid development of integrated circuits and the continuous progress of computing capability, higher demands have been placed on the security and speed of data encryption in security systems. As a basic hardware security primitive, the true \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xiang:2024:TCL, author = "Dong Xiang", title = "Test Compression for Launch-on-Capture Transition Fault Testing", journal = j-TODAES, volume = "29", number = "1", pages = "7:1--7:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3597433", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3597433", abstract = "A new low-power test compression scheme, called Dcompress, is proposed for launch-on-capture transition fault testing by using a new seed encoding scheme, a new design for testability architecture, and a new low-power test application procedure. The new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bi:2024:AVA, author = "Yongtian Bi and Qi Xu and Hao Geng and Song Chen and Yi Kang", title = "{AD$^2$VNCS}: Adversarial Defense and Device Variation-tolerance in Memristive Crossbar-based Neuromorphic Computing Systems", journal = j-TODAES, volume = "29", number = "1", pages = "8:1--8:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3600231", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3600231", abstract = "In recent years, memristive crossbar-based neuromorphic computing systems (NCS) have obtained extremely high performance in neural network acceleration. However, adversarial attacks and conductance variations of memristors bring reliability challenges to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Calzada:2024:HIS, author = "Paul E. Calzada and Md. Sami {Ul Islam Sami} and Kimia Zamiri Azar and Fahim Rahman and Farimah Farahmandi and Mark Tehranipoor", title = "Heterogeneous Integration Supply Chain Integrity Through Blockchain and {CHSM}", journal = j-TODAES, volume = "29", number = "1", pages = "9:1--9:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3625823", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3625823", abstract = "Over the past few decades, electronics have become commonplace in government, commercial, and social domains. These devices have developed rapidly, as seen in the prevalent use of system-on-chips rather than separate integrated circuits on a single \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cui:2024:RAA, author = "Xiaole Cui and Mingqi Yin and Hanqing Liu and Xiaoxin Cui", title = "The Resistance Analysis Attack and Security Enhancement of the {IMC LUT} Based on the Complementary Resistive Switch Cells", journal = j-TODAES, volume = "29", number = "1", pages = "10:1--10:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3616870", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3616870", abstract = "The resistive random access memory (RRAM) based in-memory computing (IMC) is an emerging architecture to address the challenge of the ``memory wall'' problem. The complementary resistive switch (CRS) cell connects two bipolar RRAM elements anti-serially to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xiao:2024:IRI, author = "Jie Xiao and Yingying Ge and Ru Wang and Jungang Lou", title = "{ICP-RL}: Identifying Critical Paths for Fault Diagnosis Using Reinforcement Learning", journal = j-TODAES, volume = "29", number = "1", pages = "11:1--11:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3610294", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3610294", abstract = "Identifying the critical paths is crucial to reducing the complexity of performance analysis and reliability calculation for logic circuits. In this article, we propose a method for identifying the critical path in a combination circuit using a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Guo:2024:YOA, author = "Nanlin Guo and Fulin Peng and Jiahe Shi and Fan Yang and Jun Tao and Xuan Zeng", title = "Yield Optimization for Analog Circuits over Multiple Corners via {Bayesian} Neural Networks: Enhancing Circuit Reliability under Environmental Variation", journal = j-TODAES, volume = "29", number = "1", pages = "12:1--12:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3626321", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3626321", abstract = "The reliability of circuits is significantly affected by process variations in manufacturing and environmental variation during operation. Current yield optimization algorithms take process variations into consideration to improve circuit reliability. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Peng:2024:CTD, author = "Qingsong Peng and Jingchang Bian and Zhengfeng Huang and Senling Wang and Aibin Yan", title = "A Compact {TRNG} Design for {FPGA} Based on the Metastability of {RO}-driven Shift Registers", journal = j-TODAES, volume = "29", number = "1", pages = "13:1--13:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3610295", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3610295", abstract = "True random number generators (TRNGs), as an important component of security systems, have received a lot of attention for their related research. The previous researches have provided a large number of TRNG solutions, however, they still failed to reach \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sun:2024:LLD, author = "Rihui Sun and Pengfei Qiu and Yongqiang Lyu and Jian Dong and Haixia Wang and Dongsheng Wang and Gang Qu", title = "{Lightning}: Leveraging {DVFS-induced} Transient Fault Injection to Attack Deep Learning Accelerator of {GPUs}", journal = j-TODAES, volume = "29", number = "1", pages = "14:1--14:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3617893", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3617893", abstract = "Graphics Processing Units (GPU) are widely used as deep learning accelerators because of its high performance and low power consumption. Additionally, it remains secure against hardware-induced transient fault injection attacks, a classic type of attacks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Saglican:2024:MDV, author = "Enes Sa{\u{g}}lican and Engin Afacan", title = "{MOEA\slash D} vs. {NSGA-II}: a Comprehensive Comparison for Multi\slash Many Objective Analog\slash {RF} Circuit Optimization through a Generic Benchmark", journal = j-TODAES, volume = "29", number = "1", pages = "15:1--15:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3626096", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3626096", abstract = "Thanks to the enhanced computational capacity of modern computers, even sophisticated analog/radio frequency (RF) circuit sizing problems can be solved via electronic design automation (EDA) tools. Recently, several analog/RF circuit optimization \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rapp:2024:NAI, author = "Martin Rapp and Heba Khdr and Nikita Krohmer and J{\"o}rg Henkel", title = "{NPU}-Accelerated Imitation Learning for Thermal Optimization of {QoS}-Constrained Heterogeneous Multi-Cores", journal = j-TODAES, volume = "29", number = "1", pages = "16:1--16:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3626320", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3626320", abstract = "Thermal optimization of a heterogeneous clustered multi-core processor under user-defined QoS targets requires application migration and DVFS. However, selecting the core to execute each application and the VF levels of each cluster is a complex problem \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dewan:2024:CAM, author = "Monzurul Islam Dewan and Sheng-En David Lin and Dae Hyun Kim", title = "Construction of All Multilayer Monolithic {RSMTs} and Its Application to Monolithic {$3$D} {IC} Routing", journal = j-TODAES, volume = "29", number = "1", pages = "17:1--17:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3626958", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3626958", abstract = "Monolithic three-dimensional (M3D) integration allows ultra-thin silicon tier stacking in a single package. The high-density stacking is acquiring interest and is becoming more popular for smaller footprint areas, shorter wirelength, higher performance, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chhabria:2024:MLA, author = "Vidya A. Chhabria and Wenjing Jiang and Andrew B. Kahng and Sachin S. Sapatnekar", title = "A Machine Learning Approach to Improving Timing Consistency between Global Route and Detailed Route", journal = j-TODAES, volume = "29", number = "1", pages = "18:1--18:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3626959", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3626959", abstract = "Due to the unavailability of routing information in design stages prior to detailed routing (DR), the tasks of timing prediction and optimization pose major challenges. Inaccurate timing prediction wastes design effort, hurts circuit performance, and may \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pandey:2024:NDT, author = "Shailja Pandey and Lokesh Siddhu and Preeti Ranjan Panda", title = "{NeuroCool}: Dynamic Thermal Management of {$3$D} {DRAM} for Deep Neural Networks through Customized Prefetching", journal = j-TODAES, volume = "29", number = "1", pages = "19:1--19:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3630012", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3630012", abstract = "Deep neural network (DNN) implementations are typically characterized by huge datasets and concurrent computation, resulting in a demand for high memory bandwidth due to intensive data movement between processors and off-chip memory. Performing DNN \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bai:2024:BER, author = "Chen Bai and Qi Sun and Jianwang Zhai and Yuzhe Ma and Bei Yu and Martin D. F. Wong", title = "{BOOM-Explorer}: {RISC-V} {BOOM} Microarchitecture Design Space Exploration", journal = j-TODAES, volume = "29", number = "1", pages = "20:1--20:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3630013", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3630013", abstract = "Microarchitecture parameters tuning is critical in the microprocessor design cycle. It is a non-trivial design space exploration (DSE) problem due to the large solution space, cycle-accurate simulators' modeling inaccuracy, and high simulation runtime for performance evaluations. Previous methods require massive expert efforts to construct interpretable equations or high computing resource demands to train black-box prediction models. This article follows the black-box methods due to better solution qualities than analytical methods in general. We summarize two learned lessons and propose BOOM-Explorer accordingly. First, embedding microarchitecture domain knowledge in the DSE improves the solution quality. Second, BOOM-Explorer makes the microarchitecture DSE for register-transfer-level designs within the limited time budget feasible. We enhance BOOM-Explorer with the diversity-guidance, further improving the algorithm performance. Experimental results with RISC-V Berkeley-Out-of-Order Machine under 7-nm technology show that our proposed methodology achieves an average of 18.75\% higher Pareto hypervolume, 35.47\% less average distance to reference set, and 65.38\% less overall running time compared to previous approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2024:MFO, author = "Wanqian Li and Yinhe Han and Xiaoming Chen", title = "Mathematical Framework for Optimizing Crossbar Allocation for {ReRAM}-based {CNN} Accelerators", journal = j-TODAES, volume = "29", number = "1", pages = "21:1--21:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3631523", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3631523", abstract = "The resistive random-access memory (ReRAM) has widely been used to accelerate convolutional neural networks (CNNs) thanks to its analog in-memory computing capability. ReRAM crossbars not only store layers' weights, but also perform in-situ matrix-vector \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2024:FDC, author = "Dan Wu and Peng Chen and Thilini Kaushalya Bandara and Zhaoying Li and Tulika Mitra", title = "{Flip}: Data-centric Edge {CGRA} Accelerator", journal = j-TODAES, volume = "29", number = "1", pages = "22:1--22:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3631118", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3631118", abstract = "Coarse-Grained Reconfigurable Arrays (CGRA) are promising edge accelerators due to the outstanding balance in flexibility, performance, and energy efficiency. Classic CGRAs statically map compute operations onto the processing elements (PE) and route the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2024:SAM, author = "Ying Wu and Chuangtao Chen and Weihua Xiao and Xuan Wang and Chenyi Wen and Jie Han and Xunzhao Yin and Weikang Qian and Cheng Zhuo", title = "A Survey on Approximate Multiplier Designs for Energy Efficiency: From Algorithms to Circuits", journal = j-TODAES, volume = "29", number = "1", pages = "23:1--23:??", month = jan, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3610291", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Jan 15 11:14:18 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3610291", abstract = "Given the stringent requirements of energy efficiency for Internet-of-Things edge devices, approximate multipliers, as a basic component of many processors and accelerators, have been constantly proposed and studied for decades, especially in error-resilient applications. The computation error and energy efficiency largely depend on how and where the approximation is introduced into a design. Thus, this article aims to provide a comprehensive review of the approximation techniques in multiplier designs ranging from algorithms and architectures to circuits. We have implemented representative approximate multiplier designs in each category to understand the impact of the design techniques on accuracy and efficiency. The designs can then be effectively deployed in high-level applications, such as machine learning, to gain energy efficiency at the cost of slight accuracy loss.", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liang:2024:DAU, author = "Tung-Che Liang and Yi-Chen Chang and Zhanwei Zhong and Yaas Bigdeli and Tsung-Yi Ho and Krishnendu Chakrabarty and Richard Fair", title = "Dynamic Adaptation Using Deep Reinforcement Learning for Digital Microfluidic Biochips", journal = j-TODAES, volume = "29", number = "2", pages = "24:1--24:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3633458", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3633458", abstract = "We describe an exciting new application domain for deep reinforcement learning (RL): droplet routing on digital microfluidic biochips (DMFBs). A DMFB consists of a two-dimensional electrode array, and it manipulates droplets of liquid to automatically \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Qian:2024:ERL, author = "Yu Qian and Xuegong Zhou and Hao Zhou and Lingli Wang", title = "An Efficient Reinforcement Learning Based Framework for Exploring Logic Synthesis", journal = j-TODAES, volume = "29", number = "2", pages = "25:1--25:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3632174", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3632174", abstract = "Logic synthesis is a crucial step in electronic design automation tools. The rapid developments of reinforcement learning (RL) have enabled the automated exploration of logic synthesis. Existing RL based methods may lead to data inefficiency, and the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2024:SSG, author = "Bo Wang and Sheng Ma and Shengbai Luo and Lizhou Wu and Jianmin Zhang and Chunyuan Zhang and Tiejun Li", title = "{SparGD}: a Sparse {GEMM} Accelerator with Dynamic Dataflow", journal = j-TODAES, volume = "29", number = "2", pages = "26:1--26:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3634703", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3634703", abstract = "Deep learning has become a highly popular research field, and previously deep learning algorithms ran primarily on CPUs and GPUs. However, with the rapid development of deep learning, it was discovered that existing processors could not meet the specific \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kaur:2024:RRS, author = "Jaspinder Kaur and Shirshendu Das", title = "{RSPP}: Restricted Static Pseudo-Partitioning for Mitigation of Cross-Core Covert Channel Attacks", journal = j-TODAES, volume = "29", number = "2", pages = "27:1--27:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3637222", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3637222", abstract = "Cache timing channel attacks exploit the inherent properties of cache memories: hit and miss time along with the shared nature of the cache to leak secret information. The side channel and covert channel are the two well-known cache timing channel \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kim:2024:OMP, author = "Seok Young Kim and Jaewook Lee and Yoonah Paik and Chang Hyun Kim and Won Jun Lee and Seon Wook Kim", title = "Optimal Model Partitioning with Low-Overhead Profiling on the {PIM}-based Platform for Deep Learning Inference", journal = j-TODAES, volume = "29", number = "2", pages = "28:1--28:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3628599", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3628599", abstract = "Recently Processing-in-Memory (PIM) has become a promising solution to achieve energy-efficient computation in data-intensive applications by placing computation near or inside the memory. In most Deep Learning (DL) frameworks, a user manually partitions \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Niu:2024:ECS, author = "Linwei Niu and Danda B. Rawat and Jonathan Musselwhite and Zonghua Gu and Qingxu Deng", title = "Energy-Constrained Scheduling for Weakly Hard Real-Time Systems Using Standby-Sparing", journal = j-TODAES, volume = "29", number = "2", pages = "29:1--29:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3631587", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3631587", abstract = "For real-time embedded systems, QoS (Quality of Service), fault tolerance, and energy budget constraint are among the primary design concerns. In this research, we investigate the problem of energy constrained standby-sparing for both periodic and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ardalani:2024:DCS, author = "Newsha Ardalani and Saptadeep Pal and Puneet Gupta", title = "{DeepFlow}: a Cross-Stack Pathfinding Framework for Distributed {AI} Systems", journal = j-TODAES, volume = "29", number = "2", pages = "30:1--30:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3635867", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3635867", abstract = "Over the past decade, machine learning model complexity has grown at an extraordinary rate, as has the scale of the systems training such large models. However, there is an alarmingly low hardware utilization (5-20\%) in large scale AI systems. The low \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{S:2024:SAS, author = "Deepanjali S. and Noor Mahammad SK", title = "Scalable and Accelerated Self-healing Control Circuit Using Evolvable Hardware", journal = j-TODAES, volume = "29", number = "2", pages = "31:1--31:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3634682", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3634682", abstract = "Controllers are mission-critical components of any electronic design. By sending control signals, they decide which and when other data path elements must operate. Faults, especially Single Event Upset (SEU) occurrence in these components, can lead to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lu:2024:GPA, author = "Yi-Chen Lu and Haoxing Ren and Hao-Hsiang Hsiao and Sung Kyu Lim", title = "{GAN-Place}: Advancing Open Source Placers to Commercial-quality Using Generative Adversarial Networks and Transfer Learning", journal = j-TODAES, volume = "29", number = "2", pages = "32:1--32:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3636461", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3636461", abstract = "Recently, GPU-accelerated placers such as DREAMPlace and Xplace have demonstrated their superiority over traditional CPU-reliant placers by achieving orders of magnitude speed up in placement runtime. However, due to their limited focus in placement \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Deng:2024:ERT, author = "Libing Deng and Gang Zeng and Ryo Kurachi and Hiroaki Takada and Xiongren Xiao and Renfa Li and Guoqi Xie", title = "Enhanced Real-time Scheduling of {AVB} Flows in Time-Sensitive Networking", journal = j-TODAES, volume = "29", number = "2", pages = "33:1--33:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3637878", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3637878", abstract = "Time-Sensitive Networking (TSN) realizes high bandwidth and time determinism for data transmission and thus becomes the crucial communication technology in time-critical systems. The Gate Control List (GCL) is used to control the transmission of different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sankar:2024:TTA, author = "Syam Sankar and Ruchika Gupta and John Jose and Sukumar Nandi", title = "{TROP}: {TRust-aware OPportunistic} Routing in {NoC} with Hardware {Trojans}", journal = j-TODAES, volume = "29", number = "2", pages = "34:1--34:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3639821", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3639821", abstract = "Multiple software and hardware intellectual property (IP) components are combined on a single chip to form Multi-Processor Systems-on-Chips (MPSoCs). Due to the rigid time-to-market constraints, some of the IPs are from outsourced third parties. Due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2024:ALV, author = "Bo-Yuan Huang and Steven Lyubomirsky and Yi Li and Mike He and Gus Henry Smith and Thierry Tambe and Akash Gaonkar and Vishal Canumalla and Andrew Cheung and Gu-Yeon Wei and Aarti Gupta and Zachary Tatlock and Sharad Malik", title = "Application-level Validation of Accelerator Designs Using a Formal Software\slash Hardware Interface", journal = j-TODAES, volume = "29", number = "2", pages = "35:1--35:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3639051", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3639051", abstract = "Ideally, accelerator development should be as easy as software development. Several recent design languages/tools are working toward this goal, but actually testing early designs on real applications end-to-end remains prohibitively difficult due to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tang:2024:MIP, author = "Ke Tang and Lang Feng and Zhongfeng Wang", title = "Mixed Integer Programming based Placement Refinement by {RSMT} Model with Movable Pins", journal = j-TODAES, volume = "29", number = "2", pages = "36:1--36:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3639365", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3639365", abstract = "Placement is a critical step in the physical design for digital application specific integrated circuits (ASICs), as it can directly affect the design qualities such as wirelength and timing. For many domain specific designs, the demands for high \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{NS:2024:POA, author = "Karthik Somayaji NS and Peng Li", title = "{Pareto} Optimization of Analog Circuits Using Reinforcement Learning", journal = j-TODAES, volume = "29", number = "2", pages = "37:1--37:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3640463", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3640463", abstract = "Analog circuit optimization and design presents a unique set of challenges in the IC design process. Many applications require the designer to optimize for multiple competing objectives, which poses a crucial challenge. Motivated by these practical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2024:RHF, author = "Danping Jiang and Zibin Dai and Yanjiang Liu and Zongren Zhang", title = "{RGMU}: a High-flexibility and Low-cost Reconfigurable {Galois} Field Multiplication Unit Design Approach for {CGRCA}", journal = j-TODAES, volume = "29", number = "2", pages = "38:1--38:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3639820", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3639820", abstract = "Finite field multiplication is a non-linear transformation operator that appears in the majority of symmetric cryptographic algorithms. Numerous specified finite field multiplication units have been proposed as a fundamental module in the coarse-grained \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2024:MLC, author = "Jianfeng Wang and Zhonghao Chen and Jiahao Zhang and Yixin Xu and Tongguang Yu and Ziheng Zheng and Enze Ye and Sumitha George and Huazhong Yang and Yongpan Liu and Kai Ni and Vijaykrishnan Narayanan and Xueqing Li", title = "A Module-Level Configuration Methodology for Programmable Camouflaged Logic", journal = j-TODAES, volume = "29", number = "2", pages = "39:1--39:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3640462", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3640462", abstract = "Logic camouflage is a widely adopted technique that mitigates the threat of intellectual property (IP) piracy and overproduction in the integrated circuit (IC) supply chain. Camouflaged logic achieves functional obfuscation through physical-level \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Weerasena:2024:SEO, author = "Hansika Weerasena and Prabhat Mishra", title = "Security of Electrical, Optical, and Wireless On-chip Interconnects: a Survey", journal = j-TODAES, volume = "29", number = "2", pages = "40:1--40:??", month = mar, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3631117", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 19 08:17:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3631117", abstract = "The advancement of manufacturing technologies has enabled the integration of more intellectual property (IP) cores on the same system-on-chip (SoC). Scalable and high throughput on-chip communication architecture has become a vital component in today's \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dong:2024:DAE, author = "Jinxin Dong and Pingqiang Zhou", title = "Detecting Adversarial Examples Utilizing Pixel Value Diversity", journal = j-TODAES, volume = "29", number = "3", pages = "41:1--41:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3636460", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3636460", abstract = "In this article, we introduce two novel methods to detect adversarial examples utilizing pixel value diversity. First, we propose the concept of pixel value diversity (which reflects the spread of pixel values in an image) and two independent metrics \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hassani:2024:EFA, author = "Fatemeh Serajeh Hassani and Mohammad Sadrosadati and Nezam Rohbani and Sebastian Pointner and Robert Wille and Hamid Sarbazi-Azad", title = "An Efficient {FPGA} Architecture with Turn-Restricted Switch Boxes", journal = j-TODAES, volume = "29", number = "3", pages = "42:1--42:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3643809", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3643809", abstract = "Abstract. Field-Programmable Gate Arrays (FPGAs) employ a large number of SRAM cells to provide a flexible routing architecture which have a significant impact on the FPGA's area and power consumption. This flexible routing allows for a rather easy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2024:EEE, author = "Yunping Zhao and Sheng Ma and Hengzhu Liu and Libo Huang", title = "{EPHA}: an Energy-efficient Parallel Hybrid Architecture for {ANNs} and {SNNs}", journal = j-TODAES, volume = "29", number = "3", pages = "43:1--43:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3643134", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3643134", abstract = "Artificial neural networks (ANNs) and spiking neural networks (SNNs) are two general approaches to achieve artificial intelligence (AI). The former have been widely used in academia and industry fields; the latter, SNNs, are more similar to biological \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2024:DPD, author = "Aidong Zhao and Tianchen Gu and Zhaori Bi and Fan Yang and Changhao Yan and Xuan Zeng and Zixiao Lin and Wenchuang Hu and Dian Zhou", title = "{D$^3$PBO}: Dynamic Domain Decomposition-based Parallel {Bayesian} Optimization for Large-scale Analog Circuit Sizing", journal = j-TODAES, volume = "29", number = "3", pages = "44:1--44:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3643811", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3643811", abstract = "Bayesian optimization (BO) is an efficient global optimization method for expensive black-box functions, but the expansion for high-dimensional problems and large sample budgets still remains a severe challenge. In order to extend BO for large-scale \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2024:RCS, author = "Irith Pomeranz", title = "Reduced On-chip Storage of Seeds for Built-in Test Generation", journal = j-TODAES, volume = "29", number = "3", pages = "45:1--45:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3643810", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3643810", abstract = "Logic built-in self-test ( LBIST ) approaches use an on-chip logic block for test generation and thus enable in-field testing. Recent reports of silent data corruption underline the importance of in-field testing. In a class of storage-based LBIST \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Thakur:2024:VLL, author = "Shailja Thakur and Baleegh Ahmad and Hammond Pearce and Benjamin Tan and Brendan Dolan-Gavitt and Ramesh Karri and Siddharth Garg", title = "{VeriGen}: a Large Language Model for {Verilog} Code Generation", journal = j-TODAES, volume = "29", number = "3", pages = "46:1--46:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3643681", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3643681", abstract = "In this study, we explore the capability of Large Language Models (LLMs) to automate hardware design by automatically completing partial Verilog code, a common language for designing and modeling digital systems. We fine-tune pre-existing LLMs on Verilog \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Luo:2024:HTH, author = "Yandong Luo and Shimeng Yu", title = "{H3D}-Transformer: a Heterogeneous {3D} ({H3D}) Computing Platform for Transformer Model Acceleration on Edge Devices", journal = j-TODAES, volume = "29", number = "3", pages = "47:1--47:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3649219", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3649219", abstract = "Prior hardware accelerator designs primarily focused on single-chip solutions for 10 MB-class computer vision models. The GB-class transformer models for natural language processing (NLP) impose challenges on existing accelerator design due to the massive \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2024:TDS, author = "Irith Pomeranz", title = "Two-dimensional Search Space for Extracting Broadside Tests from Functional Test Sequences", journal = j-TODAES, volume = "29", number = "3", pages = "48:1--48:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3650207", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3650207", abstract = "Testing for delay faults after chip manufacturing is critical to correct chip operation. Tests for delay faults are applied using scan chains that provide access to internal memory elements. As a result, a circuit may operate under non-functional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Brzozowski:2024:CAD, author = "Ireneusz Brzozowski", title = "Comparative Analysis of Dynamic Power Consumption of Parallel Prefix Adder", journal = j-TODAES, volume = "29", number = "3", pages = "49:1--49:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3651984", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3651984", abstract = "The Newcomb-Benford law, also known as Benford's law, is the law of anomalous numbers stating that in many real-life numerical datasets, including physical and statistical ones, numbers have a small initial digit. Numbers irregularity observed in nature \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rahman:2024:SES, author = "Md Moshiur Rahman and Jim Geist and Daniel Xing and Yuntao Liu and Ankur Srivastava and Travis Meade and Yier Jin and Swarup Bhunia", title = "Security Evaluation of State Space Obfuscation of Hardware {IP} through a Red Team--Blue Team Practice", journal = j-TODAES, volume = "29", number = "3", pages = "50:1--50:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3640461", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3640461", abstract = "Due to the inclination towards a fab-less model of integrated circuit (IC) manufacturing, several untrusted entities get white-box access to the proprietary intellectual property (IP) blocks from diverse vendors. To this end, the untrusted entities pose \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pan:2024:RCA, author = "Renjian Pan and Xin Li and Krishnendu Chakrabarty", title = "Root-Cause Analysis with Semi-Supervised Co-Training for Integrated Systems", journal = j-TODAES, volume = "29", number = "3", pages = "51:1--51:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3649313", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3649313", abstract = "Root-cause analysis for integrated systems has become increasingly challenging due to their growing complexity. To tackle these challenges, machine learning (ML) has been applied to enhance root-cause analysis. Nonetheless, ML-based root-cause analysis \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Prasad:2024:SSE, author = "Govind Prasad and Bipin Mandi and Maifuz Ali", title = "{SEDONUT}: a Single Event Double Node Upset Tolerant {SRAM} for Terrestrial Applications", journal = j-TODAES, volume = "29", number = "3", pages = "52:1--52:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3651985", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3651985", abstract = "Radiation and its effect on neighboring nodes are critical not only for space applications but also for terrestrial applications at modern lower-technology nodes. This may cause static random-access memory (SRAM) failures due to single- and multi-node \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2024:HPA, author = "Hongduo Liu and Yijian Qian and Youqiang Liang and Bin Zhang and Zhaohan Liu and Tao He and Wenqian Zhao and Jiangbo Lu and Bei Yu", title = "A High-Performance Accelerator for Real-Time Super-Resolution on Edge {FPGAs}", journal = j-TODAES, volume = "29", number = "3", pages = "53:1--53:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3652855", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3652855", abstract = "In the digital era, the prevalence of low-quality images contrasts with the widespread use of high-definition displays, primarily due to low-resolution cameras and compression technologies. Image super-resolution (SR) techniques, particularly those \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2024:DRL, author = "Chunlin Li and Kun Jiang and Yong Zhang and Lincheng Jiang and Youlong Luo and Shaohua Wan", title = "Deep Reinforcement Learning-based Mining Task Offloading Scheme for Intelligent Connected Vehicles in {UAV}-aided {MEC}", journal = j-TODAES, volume = "29", number = "3", pages = "54:1--54:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3653451", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3653451", abstract = "The convergence of unmanned aerial vehicle (UAV)-aided mobile edge computing (MEC) networks and blockchain transforms the existing mobile networking paradigm. However, in the temporary hotspot scenario for intelligent connected vehicles (ICVs) in UAV-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Witharana:2024:ICT, author = "Hasini Witharana and Aruna Jayasena and Prabhat Mishra", title = "Incremental Concolic Testing of Register-Transfer Level Designs", journal = j-TODAES, volume = "29", number = "3", pages = "55:1--55:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3655621", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3655621", abstract = "Concolic testing is a scalable solution for automated generation of directed tests for validation of hardware designs. Unfortunately, concolic testing fails to cover complex corner cases such as hard-to-activate branches. In this article, we propose an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2024:FEA, author = "Bo Yang and Qi Xu and Hao Geng and Song Chen and Bei Yu and Yi Kang", title = "Floorplanning with Edge-aware Graph Attention Network and Hindsight Experience Replay", journal = j-TODAES, volume = "29", number = "3", pages = "56:1--56:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3653453", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3653453", abstract = "In this article, we focus on chip floorplanning, which aims to determine the location and orientation of circuit macros simultaneously, so the chip area and wirelength are minimized. As the highest level of abstraction in hierarchical physical design, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xian:2024:WJP, author = "Juming Xian and Yan Xing and Shuting Cai and Weijun Li and Xiaoming Xiong and Zhengfa Hu", title = "{WCPNet}: Jointly Predicting Wirelength, Congestion and Power for {FPGA} Using Multi-Task Learning", journal = j-TODAES, volume = "29", number = "3", pages = "57:1--57:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3656170", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3656170", abstract = "To speed up the design closure and improve the QoR of FPGA, supervised single-task machine learning techniques have been used to predict individual design metric based on placement results. However, the design objective is to achieve optimal performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sivakumar:2024:ELP, author = "S. Sivakumar and John Jose and Vijaykrishnan Narayanan", title = "Enhancing Lifetime and Performance of {MLC NVM} Caches Using Embedded Trace Buffers", journal = j-TODAES, volume = "29", number = "3", pages = "58:1--58:??", month = may, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3659102", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 10 08:08:40 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3659102", abstract = "Large volumes of on-chip and off-chip memory are required by contemporary applications. Emerging non-volatile memory technologies including STT-RAM, PCM, and ReRAM are becoming popular for on-chip and off-chip memories as a result of their desirable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2024:SML, author = "Nan Wu and Yingjie Li and Hang Yang and Hanqiu Chen and Steve Dai and Cong Hao and Cunxi Yu and Yuan Xie", title = "Survey of Machine Learning for Software-assisted Hardware Design Verification: Past, Present, and Prospect", journal = j-TODAES, volume = "29", number = "4", pages = "59:1--59:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3661308", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3661308", abstract = "With the ever-increasing hardware design complexity comes the realization that efforts required for hardware verification increase at an even faster rate. Driven by the push from the desired verification productivity boost and the pull from leap-ahead \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Han:2024:CMC, author = "Ruobing Han and Jun Chen and Bhanu Garg and Xule Zhou and John Lu and Jeffrey Young and Jaewoong Sim and Hyesoon Kim", title = "{CuPBoP}: Making {CUDA} a Portable Language", journal = j-TODAES, volume = "29", number = "4", pages = "60:1--60:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3659949", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3659949", abstract = "CUDA is designed specifically for NVIDIA GPUs and is not compatible with non-NVIDIA devices. Enabling CUDA execution on alternative backends could greatly benefit the hardware community by fostering a more diverse software ecosystem.\par To address the need for portability, our objective is to develop a framework that meets key requirements, such as extensive coverage, comprehensive end-to-end support, superior performance, and hardware scalability. Existing solutions that translate CUDA source code into other high-level languages, however, fall short of these goals.\par In contrast to these source-to-source approaches, we present a novel framework, CuPBoP, which treats CUDA as a portable language in its own right. Compared to two commercial source-to-source solutions, CuPBoP offers a broader coverage and superior performance for the CUDA-to-CPU migration. Additionally, we evaluate the performance of CuPBoP against manually optimized CPU programs, highlighting the differences between CPU programs derived from CUDA and those that are manually optimized.\par Furthermore, we demonstrate the hardware scalability of CuPBoP by showcasing its successful migration of CUDA to AMD GPUs. To promote further research in this field, we have released CuPBoP as an open-source resource.", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2024:LBP, author = "Xiang Zhao and Song Chen and Yi Kang", title = "Load Balanced {PIM-Based} Graph Processing", journal = j-TODAES, volume = "29", number = "4", pages = "61:1--61:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3659951", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3659951", abstract = "Graph processing is widely used for many modern applications, such as social networks, recommendation systems, and knowledge graphs. However, processing large-scale graphs on traditional Von Neumann architectures is challenging due to the irregular graph \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tian:2024:MRE, author = "Huan Tian and Jiewen Tang and Jun Li and Zhibing Sha and Fan Yang and Zhigang Cai and Jianwei Liao", title = "Modeling Retention Errors of {$3$D} {NAND} Flash for Optimizing Data Placement", journal = j-TODAES, volume = "29", number = "4", pages = "62:1--62:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3659101", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3659101", abstract = "Considering 3D NAND flash has a new property of process variation (PV), which causes different raw bit error rates (RBER) among different layers of the flash block. This article builds a mathematical model for estimating the retention errors of flash \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2024:CAW, author = "Zhisheng Chen and Xu Hu and Wenzhong Guo and Genggeng Liu and Jiaxuan Wang and Tsungyi Ho and Xing Huang", title = "Capacity-Aware Wash Optimization with Dynamic Fluid Scheduling and Channel Storage for Continuous-Flow Microfluidic Biochips", journal = j-TODAES, volume = "29", number = "4", pages = "63:1--63:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3659952", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3659952", abstract = "Continuous-flow microfluidic biochips are gaining increasing attention with promising applications for automatically executing various laboratory procedures in biology and biochemistry. Biochips with distributed channel-storage architectures enable each \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2024:EWP, author = "Jian-De Li and Sying-Jyan Wang and Katherine Shu-Min Li and Tsung-Yi Ho", title = "Enhanced Watermarking for Paper-Based Digital Microfluidic Biochips", journal = j-TODAES, volume = "29", number = "4", pages = "64:1--64:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3661309", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3661309", abstract = "Paper-based digital microfluidic biochip (PB-DMFB) technology provides a promising solution to many biochemical applications. However, the PB-DMFB manufacturing process may suffer from potential security threats. For example, a Trojan insertion attack may \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Spieck:2024:SBD, author = "Jan Spieck and Stefan Wildermann and J{\"u}rgen Teich", title = "A Scenario-Based {DVFS}-Aware Hybrid Application Mapping Methodology for {MPSoCs}", journal = j-TODAES, volume = "29", number = "4", pages = "65:1--65:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3660633", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3660633", abstract = "Sound techniques for mapping soft real-time applications to resources are indispensable for meeting the application deadlines and minimizing objectives such as energy consumption, particularly on heterogeneous MPSoC architectures. For applications with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Joshi:2024:SPS, author = "Priyanka Joshi and Bodhisatwa Mazumdar", title = "Semi-Permanent Stuck-At Fault injection attacks on {Elephant} and {GIFT} lightweight ciphers", journal = j-TODAES, volume = "29", number = "4", pages = "66:1--66:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3662734", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3662734", abstract = "Fault attacks pose a potent threat to modern cryptographic implementations, particularly those used in physically approachable embedded devices in IoT environments. Information security in such resource-constrained devices is ensured using lightweight \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Soni:2024:SBH, author = "Lokesh Soni and Neeta Pandey", title = "A Single Bitline Highly Stable, Low Power With High Speed Half-Select Disturb Free {11T SRAM} Cell", journal = j-TODAES, volume = "29", number = "4", pages = "67:1--67:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3653675", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3653675", abstract = "A half-select disturb-free 11T (HF11T) static random access memory (SRAM) cell with low power, better stability and high speed is presented in this paper. The proposed SRAM cell works well with bit-interleaving design, which enhances soft-error immunity. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Esmaeilzadeh:2024:OSM, author = "Hadi Esmaeilzadeh and Soroush Ghodrati and Andrew Kahng and Joon Kyung Kim and Sean Kinzer and Sayak Kundu and Rohan Mahapatra and Susmita Dey Manasi and Sachin Sapatnekar and Zhiang Wang and Ziqing Zeng", title = "An Open-Source {ML}-Based Full-Stack Optimization Framework for Machine Learning Accelerators", journal = j-TODAES, volume = "29", number = "4", pages = "68:1--68:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3664652", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3664652", abstract = "Parameterizable machine learning (ML) accelerators are the product of recent breakthroughs in ML. To fully enable their design space exploration (DSE), we propose a physical-design-driven, learning-based prediction framework for hardware-accelerated deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gandhi:2024:ARL, author = "Upma Gandhi and Erfan Aghaeekiasaraee and Sahir and Payam Mousavi and Ismail S. K. Bustany and Mathew E. Taylor and Laleh Behjat", title = "Applying reinforcement learning to learn best net to rip and re-route in global routing", journal = j-TODAES, volume = "29", number = "4", pages = "69:1--69:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3664286", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3664286", abstract = "Physical designers typically employ heuristics to solve challenging problems in global routing. However, these heuristic solutions are not adaptable to the ever-changing fabrication demands, and the experience and creativity of designers can limit their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2024:CDC, author = "Cheng-Hsien Lin and Kuan-Ting Chen and Yi-Yu Liu and Allen C.-H. Wu and Tingting Hwang", title = "A Cost-Driven Chip Partitioning Method for Heterogeneous {$3$D} Integration", journal = j-TODAES, volume = "29", number = "4", pages = "70:1--70:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3672558", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3672558", abstract = "Three-dimensional integration circuit (3D IC) offers significant benefits in terms of performance and cost. Existing research in through-silicon via (TSV)-based 3D IC partitioning has focused on minimizing the number of TSVs to reduce costs. Partitioning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "70", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{McDaniel:2024:RSH, author = "Isaac McDaniel and Michael Zuzak and Ankur Srivastava", title = "Removal of {SAT-Hard} Instances in Logic Obfuscation Through Inference of Functionality", journal = j-TODAES, volume = "29", number = "4", pages = "71:1--71:??", month = jul, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3674903", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3674903", abstract = "Logic obfuscation is a prominent approach to protect intellectual property within integrated circuits during fabrication. Many attacks on logic locking have been proposed, particularly in the Boolean satifiability (SAT) attack family, leading to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "71", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ogbogu:2024:DPE, author = "Chukwufumnanya Ogbogu and Biresh Joardar and Krishnendu Chakrabarty and Jana Doppa and Partha Pratim Pande", title = "Data Pruning-enabled High Performance and Reliable Graph Neural Network Training on {ReRAM}-based Processing-in-Memory Accelerators", journal = j-TODAES, volume = "29", number = "5", pages = "72:1--72:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3656171", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3656171", abstract = "Graph Neural Networks (GNNs) have achieved remarkable accuracy in cognitive tasks such as predictive analytics on graph-structured data. Hence, they have become very popular in diverse real-world applications. However, GNN training with large real-world \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "72", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2024:WWT, author = "Tinghuan Chen and Hao Geng and Qi Sun and Sanping Wan and Yongsheng Sun and Huatao Yu and Bei Yu", title = "{Wages}: The Worst Transistor Aging Analysis for Large-scale Analog Integrated Circuits via Domain Generalization", journal = j-TODAES, volume = "29", number = "5", pages = "73:1--73:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3659950", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3659950", abstract = "Transistor aging leads to the deterioration of analog circuit performance over time. The worst aging degradation is used to evaluate the circuit reliability. It is extremely expensive to obtain it since several circuit stimuli need to be simulated. The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "73", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2024:TTR, author = "Hongfei Wang and Jingyao Li and Jiayi Wang and Zijun Ping and Hongcan Xiong and Wei Liu and Dongmian Zou", title = "Translating Test Responses to Images for Test-termination Prediction via Multiple Machine Learning Strategies", journal = j-TODAES, volume = "29", number = "5", pages = "74:1--74:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3661310", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3661310", abstract = "Failure diagnosis is a software-based, data-driven procedure. Collecting an excessive amount of fail data not only increases the overall test cost but can also potentially reduce diagnostic resolution. Thus, test-termination prediction is proposed to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "74", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ghosh:2024:MBF, author = "Devleena Ghosh and Sumana Ghosh and Ansuman Banerjee and Raj Kumar Gajavelly and Sudhakar Surendran", title = "{MAB-BMC}: a Formal Verification Enhancer by Harnessing Multiple {BMC} Engines Together", journal = j-TODAES, volume = "29", number = "5", pages = "75:1--75:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3675168", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3675168", abstract = "In recent times, Bounded Model Checking (BMC) engines have gained wide prominence in formal verification. Different BMC engines exist, differing in their optimization, representations and solving mechanisms used to represent and navigate the underlying \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "75", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sabbagh:2024:ACA, author = "Negar Aghapour Sabbagh and Bijan Alizadeh", title = "Automatic Correction of Arithmetic Circuits in the Presence of Multiple Bugs by {Groebner} Basis Modification", journal = j-TODAES, volume = "29", number = "5", pages = "76:1--76:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3672559", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3672559", abstract = "One promising approach to verify large arithmetic circuits is making use of Symbolic Computer Algebra (SCA), where the circuit and the specification are translated to a set of polynomials, and the verification is performed by the ideal membership testing. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "76", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2024:PEA, author = "Changxu Liu and Hao Zhou and Patrick Dai and Li Shang and Fan Yang", title = "{PriorMSM}: an Efficient Acceleration Architecture for Multi-Scalar Multiplication", journal = j-TODAES, volume = "29", number = "5", pages = "77:1--77:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3678006", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3678006", abstract = "Multi-Scalar Multiplication (MSM) is a computationally intensive task that operates on elliptic curves based on GF(P). It is commonly used in zero-knowledge proof (ZKP), where it accounts for a significant portion of the computation time required for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "77", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2024:POA, author = "Xiaoqian Wu and Huaxiao Liu and Peng Wang and Lei Liu and Zhenxue He", title = "A Power Optimization Approach for Large-scale {RM-TB} Dual Logic Circuits Based on an Adaptive Multi-Task Intelligent Algorithm", journal = j-TODAES, volume = "29", number = "5", pages = "78:1--78:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3677033", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3677033", abstract = "Logic synthesis is a crucial step in integrated circuit design, and power optimization is an indispensable part of this process. However, power optimization for large-scale Mixed Polarity Reed--Muller (MPRM) logic circuits is an NP-hard problem. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "78", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bagchi:2024:PPO, author = "Aritra Bagchi and Dharamjeet and Ohm Rishabh and Manan Suri and Preeti Ranjan Panda", title = "{POEM}: Performance Optimization and Endurance Management for Non-volatile Caches", journal = j-TODAES, volume = "29", number = "5", pages = "79:1--79:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3653452", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3653452", abstract = "Non-volatile memories (NVMs), with their high storage density and ultra-low leakage power, offer promising potential for redesigning the memory hierarchy in next-generation Multi-Processor Systems-on-Chip (MPSoCs). However, the adoption of NVMs in cache \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "79", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2024:DLE, author = "Peng Xu and Siyuan Xu and Tinghuan Chen and Guojin Chen and Tsungyi Ho and Bei Yu", title = "{DeepOTF}: Learning Equations-constrained Prediction for Electromagnetic Behavior", journal = j-TODAES, volume = "29", number = "5", pages = "80:1--80:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3663476", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3663476", abstract = "High-quality passive devices are becoming increasingly important for the development of mobile devices and telecommunications, but obtaining such devices through simulation and analysis of electromagnetic (EM) behavior is time-consuming. To address this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "80", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mukherjee:2024:HIH, author = "Rijoy Mukherjee and Archisman Ghosh and Rajat Subhra Chakraborty", title = "{HLS-IRT}: Hardware {Trojan} Insertion through Modification of Intermediate Representation During High-Level Synthesis", journal = j-TODAES, volume = "29", number = "5", pages = "81:1--81:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3663477", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3663477", abstract = "Modern integrated circuit (IC) design incorporates the usage of proprietary computer-aided design (CAD) software and integration of third-party hardware intellectual property (IP) cores. Subsequently, the fabrication process for the design takes place in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "81", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bhat:2024:ISI, author = "Ganapati Bhat and Biresh Kumar Joardar and Mengying Zhao", title = "Introduction to the Special Issue on Embedded System Software\slash Tools", journal = j-TODAES, volume = "29", number = "5", pages = "82:1--82:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3682061", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3682061", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "82", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Deng:2024:OVI, author = "Can Deng and Zhaoyun Chen and Yang Shi and Yimin Ma and Mei Wen and Lei Luo", title = "Optimizing {VLIW} Instruction Scheduling via a Two-Dimensional Constrained Dynamic Programming", journal = j-TODAES, volume = "29", number = "5", pages = "83:1--83:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3643135", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3643135", abstract = "Typical embedded processors, such as Digital Signal Processors (DSPs), usually adopt Very Long Instruction Word (VLIW) architecture to improve computing efficiency. The performance of VLIW processors heavily relies on Instruction-Level Parallelism (ILP). \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "83", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lai:2024:GNS, author = "Chengtao Lai and Wei Zhang", title = "{gem5-NVDLA}: a Simulation Framework for Compiling, Scheduling, and Architecture Evaluation on {AI} System-on-Chips", journal = j-TODAES, volume = "29", number = "5", pages = "84:1--84:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3661997", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3661997", abstract = "Recent years have seen an increasing trend in designing AI accelerators together with the rest of the system, including CPUs and memory hierarchy. This trend calls for high-quality simulators or analytical models that enable such kind of co-exploration. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "84", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2024:ZZM, author = "Ping-Xiang Chen and Dongjoo Seo and Changhoon Sung and Jongheum Park and Minchul Lee and Huaicheng Li and Matias Bj{\o}rling and Nikil Dutt", title = "{ZoneTrace}: Zone Monitoring Tool for {F2FS} on {ZNS SSDs}", journal = j-TODAES, volume = "29", number = "5", pages = "85:1--85:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3656172", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3656172", abstract = "We present ZoneTrace, a runtime monitoring tool for the Flash-friendly File System (F2FS) on Zoned Namespace (ZNS) Solid-state Drives (SSDs). ZNS SSD organizes its storage into zones of sequential write access. Due to ZNS SSD's sequential write nature, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "85", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Aghapour:2024:ACA, author = "Ehsan Aghapour and Dolly Sapra and Andy Pimentel and Anuj Pathania", title = "{ARM-CO-UP}: {ARM COoperative} Utilization of Processors", journal = j-TODAES, volume = "29", number = "5", pages = "86:1--86:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3656472", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3656472", abstract = "HMPSoCs combine different processors on a single chip. They enable powerful embedded devices, which increasingly perform ML inference tasks at the edge. State-of-the-art HMPSoCs can perform on-chip embedded inference using different processors, such as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "86", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jordao:2024:ISD, author = "Rodolfo Jord{\~a}o and Matthias Becker and Ingo Sander", title = "{IDeSyDe}: Systematic Design Space Exploration via Design Space Identification", journal = j-TODAES, volume = "29", number = "5", pages = "87:1--87:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3647640", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3647640", abstract = "Design space exploration (DSE) is a key activity in embedded design processes, where a mapping between applications and platforms that meets the process design requirements must be found. Finding such mappings is very challenging due to the complexity of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "87", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yan:2024:MCT, author = "Wenyan Yan and Dongsheng Wei and Bin Fu and Renfa Li and Guoqi Xie", title = "A Mixed-Criticality Traffic Scheduler with Mitigating Congestion for {CAN}-to-{TSN} Gateway", journal = j-TODAES, volume = "29", number = "5", pages = "88:1--88:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3656173", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3656173", abstract = "The network architecture that Time-Sensitive Networking (TSN) is used as the backbone network and the Controller Area Network (CAN) serves as the intra-domain network is considered as the CAN-TSN interconnection network architecture, which has gained \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "88", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kim:2024:AHC, author = "Jiseung Kim and Hyunsei Lee and Mohsen Imani and Yeseong Kim", title = "Advancing Hyperdimensional Computing Based on Trainable Encoding and Adaptive Training for Efficient and Accurate Learning", journal = j-TODAES, volume = "29", number = "5", pages = "89:1--89:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3665891", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3665891", abstract = "Hyperdimensional computing (HDC) is a computing paradigm inspired by the mechanisms of human memory, characterizing data through high-dimensional vector representations, known as hypervectors. Recent advancements in HDC have explored its potential as a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "89", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2024:CST, author = "Mengyu Liu and Lin Zhang and Weizhe Xu and Shixiong Jiang and Fanxin Kong", title = "{CPSim}: Simulation Toolbox for Security Problems in Cyber-Physical Systems", journal = j-TODAES, volume = "29", number = "5", pages = "90:1--90:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3674904", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3674904", abstract = "There are various applications of Cyber-Physical systems (CPSs) that are life-critical where failure or malfunction can result in significant harm to human life, the environment, or substantial economic loss. Therefore, it is important to ensure their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "90", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Baroffio:2024:ECT, author = "Davide Baroffio and Federico Reghenzani and William Fornaciari", title = "Enhanced Compiler Technology for Software-based Hardware Fault Detection", journal = j-TODAES, volume = "29", number = "5", pages = "91:1--91:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3660524", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3660524", abstract = "Software-Implemented Hardware Fault Tolerance (SIHFT) is a modern approach for tackling random hardware faults of dependable systems employing solely software solutions. This work extends an automatic compiler-based SIHFT hardening tool called ASPIS, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "91", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{K:2024:FFA, author = "Keerthi K. and Chester Rebeiro", title = "{FortiFix}: a Fault Attack Aware Compiler Framework for Crypto Implementations", journal = j-TODAES, volume = "29", number = "5", pages = "92:1--92:??", month = sep, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3650029", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Sep 30 08:40:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3650029", abstract = "Fault attacks are one of the most powerful forms of cryptanalytic attack on embedded systems, which can corrupt a cipher's operations leading to a breach of confidentiality and integrity. A single precisely injected fault during the execution of a cipher can be exploited to retrieve the secret key in a few milliseconds. Naive countermeasures introduced into implementation can lead to huge overheads, making them unusable in resource-constraint environments. However, optimized countermeasures require significant knowledge, not only about the attack but also on the the cryptographic properties of the cipher, the program structure, and the underlying hardware architecture. This makes the protection against fault attacks tedious and error prone.\par In this article, we introduce FortiFix, the first automated compiler framework that can detect and patch fault exploitable regions in a block cipher implementation. The framework has two phases. The pre-compilation phase identifies regions in the source code of a block cipher that are vulnerable to fault attacks. The second phase is incorporated as transformation passes in the LLVM compiler to find exploitable instructions, quantify the impact of a fault on these instructions, and finally insert appropriate countermeasures based on user-defined security requirements. As a proof of concept, we have evaluated two block cipher implementations, AES-128 and CLEFIA-128, on three different hardware platforms: MSP430 (16-bit), ARM (32-bit), and RISCV (32-bit).", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "92", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sun:2024:EPP, author = "Xiaoyu Sun and Xiaochen Peng and Sai Qian Zhang and Jorge Gomez and Win-San Khwa and Syed Shakib Sarwar and Ziyun Li and Weidong Cao and Zhao Wang and Chiao Liu and Meng-Fan Chang and Barbara {De Salvo} and Kerem Akarvardar and H.-S. Philip Wong", title = "Estimating Power, Performance, and Area for On-Sensor Deployment of {AR\slash VR} Workloads Using an Analytical Framework", journal = j-TODAES, volume = "29", number = "6", pages = "93:1--93:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3670404", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3670404", abstract = "Augmented Reality and Virtual Reality have emerged as the next frontier of intelligent image sensors and computer systems. In these systems, 3D die stacking stands out as a compelling solution, enabling in situ processing capability of the sensory data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "93", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pereira:2024:MSS, author = "Danny Pereira and Sumana Ghosh and Soumyajit Dey", title = "Multi-Stream Scheduling of Inference Pipelines on Edge Devices --- a {DRL} Approach", journal = j-TODAES, volume = "29", number = "6", pages = "94:1--94:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3677378", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3677378", abstract = "Low-power edge devices equipped with Graphics Processing Units (GPUs) are a popular target platform for real-time scheduling of inference pipelines. Such application-architecture combinations are popular in Advanced Driver-assistance Systems for aiding in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "94", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2024:EAS, author = "Hongfei Wang and Wei Liu and Wenjie Cai and Yunxiao Lu and Caixue Wan", title = "Efficient Attacks on Strong {PUFs} via Covariance and {Boolean} Modeling", journal = j-TODAES, volume = "29", number = "6", pages = "95:1--95:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3687469", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3687469", abstract = "The physical unclonable function (PUF) is a widely used hardware security primitive. Before hacking into a PUF-protected system, intruders typically initiate attacks on the PUF as the first step. Many strong PUF designs have been proposed to thwart non-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "95", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2024:RNI, author = "Chencan Zhou and Yang Cao and Quan Shi and Luxin Wang and Xiaoqing Wen", title = "A Robust {Newton} Iteration Method for Mixed-Cell-Height Circuit Legalization Under Technology and Region Constraints", journal = j-TODAES, volume = "29", number = "6", pages = "96:1--96:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3689436", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3689436", abstract = "The evolution of advanced technology nodes has prompted a shift toward mixed-cell-height circuit design, while the introduction of technology and fence region constraints further increases the complexity of placement. In this article, we innovatively \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "96", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Nath:2024:AAL, author = "Arijit Nath and Hemangee K. Kapoor", title = "{AmLuCEP}: Amalgamating {LUT}-based Compression and Adaptive Encoding Assisted Block Placement To Improve Lifetime of {PCM}-based Main Memories", journal = j-TODAES, volume = "29", number = "6", pages = "97:1--97:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3689334", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3689334", abstract = "With the rising demands for high capacity memory and poor scalability of the existing DRAM-based main memories, the emerging Non-volatile memories captures higher attention due to their high density and low leakage power consumption. However, the possible \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "97", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2024:ATP, author = "Kean Chen and Mingsheng Ying", title = "Automatic Test Pattern Generation for Robust Quantum Circuit Testing", journal = j-TODAES, volume = "29", number = "6", pages = "98:1--98:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3689333", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3689333", abstract = "Quantum circuit testing is essential for detecting potential faults in realistic quantum devices, while the testing process itself also suffers from the inexactness and unreliability of quantum operations. This article alleviates the issue by proposing a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "98", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tseng:2024:BBA, author = "Wei-Hsiang Tseng and Yao-Wen Chang", title = "A Bridge-based Algorithm for Simultaneous Primal and Dual Defects Compression on Topologically Quantum-error-corrected Circuits", journal = j-TODAES, volume = "29", number = "6", pages = "99:1--99:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3695252", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3695252", abstract = "Topological quantum error correction (TQEC) using the surface code is among the most promising techniques for fault-tolerant quantum circuits. The required resource of a TQEC circuit can be modeled as a space-time volume of a three-dimensional diagram by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "99", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2024:ZFR, author = "Zhuoran Li and Danella Zhao", title = "{ZeroD-fender}: a Resource-aware {IoT} Malware Detection Engine via Fine-grained Side-channel Analysis", journal = j-TODAES, volume = "29", number = "6", pages = "100:1--100:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3687482", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3687482", abstract = "In early 2023, cyberattacks experienced a significant rise due to unknown (zero-day) malware targeting Internet of Things (IoT) devices. To tackle the challenge of zero-day detection within a highly resource-constrained IoT environment, we propose a novel \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "100", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Amuru:2024:TLE, author = "Deepthi Amuru and Raja Mavullu Vechalapu and Zia Abbas", title = "Transfer Learning Enabled Modeling Paradigm for {PVT}-aware Circuit Performance Estimation", journal = j-TODAES, volume = "29", number = "6", pages = "101:1--101:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3689435", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3689435", abstract = "Designing robust performance models for modern complex digital circuits in the face of rapidly accelerating process variations is a critical yet demanding task. This paper introduces an efficient statistical performance modeling approach for VLSI digital \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "101", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Fang:2024:PFS, author = "Wei-Kai Fang and Wai-Kei Mak", title = "Placement Flow Study and Detailed Placement for Hybrid-Row-Height Designs", journal = j-TODAES, volume = "29", number = "6", pages = "102:1--102:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3690385", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3690385", abstract = "At the 3 nm node, a hybrid-row-height design paradigm has emerged for better power efficiency and performance optimization. A diverse cell library that includes multiple variants of a cell with different fin counts is available. Instead of using cells \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "102", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gao:2024:MRM, author = "Zhenyi Gao and Sheqin Dong and Zhicong Tang and Wenjian Yu", title = "{MCMCF-Router}: Multi-capacity Ordered Escape Routing Algorithms for Grid\slash Staggered Pin Array", journal = j-TODAES, volume = "29", number = "6", pages = "103:1--103:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3695253", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3695253", abstract = "Ordered escape routing (OER), which means that the pins need to be routed to the boundary of a pin array in a given order, is an important research topic in PCB design. Although OER has been widely investigated, most works assume that the routing capacity \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "103", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sanjaya:2024:ABV, author = "Sahan Sanjaya and Hasini Witharana and Prabhat Mishra", title = "Assertion-Based Validation using Clustering and Dynamic Refinement of Hardware Checkers", journal = j-TODAES, volume = "29", number = "6", pages = "104:1--104:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3696108", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3696108", abstract = "Post-silicon validation is a vital step in System-on-Chip (SoC) design cycle. A major challenge in post-silicon validation is the limited observability of internal signal states using trace buffers. Hardware assertions are promising to improve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "104", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2024:EMD, author = "Jingui Lin and Wenxiong Lin and Shiyan Liang and Peng Gao and Yan Xing and Tingting Wu and Xiaoming Xiong and Shuting Cai", title = "An Efficient Method of {DRC} Violation Prediction with a Serial Deep Learning Model", journal = j-TODAES, volume = "29", number = "6", pages = "105:1--105:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3694968", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3694968", abstract = "In VLSI design, the utilization of Design Rule Check (DRC) tools in the early stage is crucial for predicting and resolving violations, thereby expediting the physical design process. In our study, we present an efficient model that predicts DRC \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "105", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dahiya:2024:RMC, author = "Ayush Dahiya and Poornima Mittal and Rajesh Rohilla", title = "Realizing In-Memory Computing using Reliable Differential {8T} {SRAM} for Improved Latency", journal = j-TODAES, volume = "29", number = "6", pages = "106:1--106:??", month = nov, year = "2024", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3696666", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Oct 21 06:29:26 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3696666", abstract = "Traditional von Neumann computing architectures suffer from high energy and lower speed as compared to the requirements of modern applications like those required in neural network accelerators. A modified differential eight transistor (8$^+$ T) static random \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "106", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2025:DPA, author = "Quan Zhou and Si Cai and Jianjun Li and Yi Gao and Zhi Qu and Tao Jin", title = "Deadline and Period Assignment for Guaranteeing Timely Response of the Cyber-Physical System", journal = j-TODAES, volume = "30", number = "1", pages = "1:1--1:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3689048", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3689048", abstract = "Cyber-physical systems (CPSs) need to respond to each change of each monitored object in time. The entire response process can be divided into two stages: the update stage and the control stage. Tasks in CPSs can thus be divided into two kinds: update \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hussein:2025:SAD, author = "Dina Hussein and Taha Belkhouja and Ganapati Bhat and Jana Doppa", title = "Sensor-Aware Data Imputation for Time-Series Machine Learning on Low-Power Wearable Devices", journal = j-TODAES, volume = "30", number = "1", pages = "2:1--2:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3698195", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3698195", abstract = "Wearable devices that have low-power sensors, processors, and communication capabilities are gaining wide adoption in several health applications. The machine learning algorithms on these devices assume that data from all sensors are available during \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Esmaeilzadeh:2025:PAC, author = "Hadi Esmaeilzadeh and Soroush Ghodrati and Andrew B. Kahng and Sean Kinzer and Susmita Dey Manasi and Sachin S. Sapatnekar and Zhiang Wang", title = "Performance Analysis of {CNN} Inference\slash Training with Convolution and Non-Convolution Operations on {ASIC} Accelerators", journal = j-TODAES, volume = "30", number = "1", pages = "3:1--3:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3696665", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3696665", abstract = "Today's performance analysis frameworks for deep learning accelerators suffer from two significant limitations. First, although modern convolutional neural networks (CNNs) consist of many types of layers other than convolution, especially during training, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mo:2025:LCP, author = "Guiqi Mo and Yimin Xia and Jianhong Ou and Shuting Cai and Xiaoming Xiong", title = "Layout Congestion Prediction Based on Regression --- {ViT}", journal = j-TODAES, volume = "30", number = "1", pages = "4:1--4:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3698196", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3698196", abstract = "To accelerate the back-end design flow of integrated circuit (IC), numerous studies have made exploratory advancements in machine learning (ML) for electronic design automation (EDA). However, most research works are limited to deep learning (DL) models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Bernasconi:2025:ADB, author = "Anna Bernasconi and Valentina Ciriani and Jordi Cortadella and Marco Costa and Tiziano Villa", title = "Area-driven {Boolean} bi-decomposition by function approximation", journal = j-TODAES, volume = "30", number = "1", pages = "5:1--5:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3698879", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3698879", abstract = "Bi-decomposition rewrites logic functions as the composition of simpler components. It is related to Boolean division, where a given function is rewritten as the product of a divisor and a quotient, but bi-decomposition can be defined for any Boolean \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2025:SST, author = "Irith Pomeranz", title = "{SHAREDD}: Sharing of Test Data and Design-for-Testability Logic for Transition Fault Tests under Standard Scan", journal = j-TODAES, volume = "30", number = "1", pages = "6:1--6:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3698198", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3698198", abstract = "High reliability requirements in certain systems are combined with constraints on test overheads, including test data volume, test application time and design-for-testability ( DFT ) logic. The overheads can be reduced if they are shared among different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sharma:2025:PPR, author = "Sonam Sharma and Dipanjan Roy and Digambar Pawar", title = "{PROTECTS}: {Progressive Rtl Obfuscation with ThrEshold Control Technique during architectural Synthesis}", journal = j-TODAES, volume = "30", number = "1", pages = "7:1--7:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3701032", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3701032", abstract = "Due to the supply chain globalization of the semiconductor industry, securing heterogeneous System-on-Chip (SoC) is becoming necessary. A malicious alteration, inserting Hardware Trojan, infringement, or counterfeiting of design via Reverse Engineering \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2025:SET, author = "Shiyuan Huang and Fangxin Liu and Tian Li and Zongwu Wang and Ning Yang and Haomin Li and Li Jiang", title = "{STCO}: Enhancing Training Efficiency via Structured Sparse Tensor Compilation Optimization", journal = j-TODAES, volume = "30", number = "1", pages = "8:1--8:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3701033", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3701033", abstract = "Network sparsification serves as an effective technique to accelerate Deep Neural Network (DNN) inference. However, existing sparsification techniques often rely on structured sparsity, which yields limited benefits. This is primarily due to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2025:FCS, author = "Hongfei Wang and Longyun Bian and Hongcan Xiong and Hai Jin", title = "Fast Candidate Screening for Post-diagnosis Refinement", journal = j-TODAES, volume = "30", number = "1", pages = "9:1--9:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3698197", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3698197", abstract = "Oftentimes fault candidates produced by logic diagnosis are too many to effectively guide the follow-on failure analysis. In this work, we propose a novel two-stage fast screening method to sift through a large of candidates in the fault callout outputted \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2025:CRB, author = "Jiahong Xu and Haikun Liu and Xiaoyang Peng and Zhuohui Duan and Xiaofei Liao and Hai Jin", title = "A Cascaded {ReRAM}-based Crossbar Architecture for Transformer Neural Network Acceleration", journal = j-TODAES, volume = "30", number = "1", pages = "10:1--10:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3701034", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3701034", abstract = "Emerging resistive random-access memory (ReRAM) based processing-in-memory (PIM) accelerators have been increasingly explored in recent years because they can efficiently perform in-situ matrix-vector multiplication (MVM) operations involved in a wide \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Joseph:2025:RTB, author = "Tresa Joseph and Bindiya T. S", title = "Real-time Blood Pressure Prediction on Wearables with Edge-Based {DNNs}: a Co-Design Approach", journal = j-TODAES, volume = "30", number = "1", pages = "11:1--11:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3699512", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3699512", abstract = "This paper presents the hardware realization of a real-time blood pressure (BP) prediction model for wearable devices, utilizing long short-term memory (LSTM) deep neural networks (DNNs). The proposed system uses both electrocardiogram (ECG) and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2025:DDL, author = "He Jiang and Peiyu Zou and Xiaochen Li and Zhide Zhou and Xu Zhao and Yi Zhang and Shikai Guo", title = "{DeLoSo}: Detecting Logic Synthesis Optimization Faults Based on Configuration Diversity", journal = j-TODAES, volume = "30", number = "1", pages = "12:1--12:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3701232", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3701232", abstract = "Logic synthesis tools are the core components of digital circuit design, which convert programs written in hardware description languages into gate-level netlists and optimize the netlists. However, the netlist optimization is complex, with numerous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2025:WIV, author = "Ziyu Liu and Yukui Luo and Yuheng Zhang and Shijin Duan and Xiaolin Xu", title = "Watch Out for the Inherent Vulnerabilities in Developing Multi-tenant Cloud-{FPGA}: Communication Protocols", journal = j-TODAES, volume = "30", number = "1", pages = "13:1--13:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3702324", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3702324", abstract = "As FPGAs are being deployed in the cloud infrastructure for acceleration, the technology of multi-tenant FPGA has emerged as a topic of interest. This development has drawn considerable attention to its security issues. While previous research primarily \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2025:ACR, author = "Guangwei Zhao and Kaveh Shamsi", title = "Adversarial Circuit Rewriting against Graph Neural Network-based Operator Detection", journal = j-TODAES, volume = "30", number = "1", pages = "14:1--14:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3703911", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3703911", abstract = "Recent work has shown that graph neural networks (GNNs) can be used to recover high-level word operators and their boundaries in gate-level netlists. Unlike formal methods, however, the GNN does not prove functional equivalence. As such, there is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Fuentes:2025:SHT, author = "Francisco Fuentes and Sergi Alcaide and Raimon Casanova and Jaume Abella", title = "{SafeTI}: a Hardware Traffic Injector for Complex {MPSoC} Platform Validation and Characterization", journal = j-TODAES, volume = "30", number = "1", pages = "15:1--15:??", month = jan, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3703910", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Dec 21 08:47:11 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3703910", abstract = "Functional and timing validation of high performing safety-related platforms requires testing specific traffic patterns in the network-on-chip interconnects. Generally, testing needs to be performed by using software tests whose degree of control on the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gundi:2025:SEL, author = "Noel Daniel Gundi and Sanghamitra Roy and Koushik Chakraborty", title = "{STRIVE}: Empowering a Low Power Tensor Processing Unit with Fault Detection and Error Resilience", journal = j-TODAES, volume = "30", number = "2", pages = "16:1--16:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3705003", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3705003", abstract = "Rapid growth in Deep Neural Network (DNN) workloads has increased the energy footprint of the Artificial Intelligence (AI) computing realm. For optimum energy efficiency, we propose operating a DNN hardware in the Low-Power Computing (LPC) region. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Fang:2025:GPE, author = "Donghao Fang and Boyang Zhang and Hailiang Hu and Wuxi Li and Bo Yuan and Jiang Hu", title = "Global Placement Exploiting Soft {$2$D} Regularity", journal = j-TODAES, volume = "30", number = "2", pages = "17:1--17:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3705729", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3705729", abstract = "Cell placement is a step of paramount importance in chip physical design and requests relentless effort for continuous improvement. Recently, designs with two-dimensional (2D) processing element arrays have become popular primarily due to their deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2025:PIL, author = "Xiaoman Yang and Haibao Chen and Yuhan Zhang and Tianshu Hou and Pengpeng Ren and Runsheng Wang and Zhigang Ji and Ru Huang", title = "Physics-Informed Learning Based Multiphysics Simulation for Fast Transient {TSV} Electromigration Analysis", journal = j-TODAES, volume = "30", number = "2", pages = "18:1--18:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3706106", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3706106", abstract = "Through Silicon Vias (TSVs) are vulnerable to electromigration (EM) degradation due to their high local current densities, thereby reducing the reliability of 3D ICs with stack dies and TSVs. Due to the broad application of 3D ICs, it is necessary to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Maity:2025:HML, author = "Srijeeta Maity and Anirban Majumder and Rudrajyoti Roy and Ashish Hota and Soumyajit Dey", title = "Harnessing Machine Learning in Dynamic Thermal Management in Embedded {CPU--GPU} Platforms", journal = j-TODAES, volume = "30", number = "2", pages = "19:1--19:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3708890", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3708890", abstract = "With increasing transistor density, modern heterogeneous embedded processors often exhibit high temperature gradients due to complex application scheduling scenarios which may have missed design considerations. In many use cases, off-chip ``active'' cooling \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2025:SGD, author = "Zhihao Xu and Shikai Guo and Xiaochen Li and Zun Wang and He Jiang", title = "{SIMTAM}: Generation Diversity Test Programs for {FPGA} Simulation Tools Testing Via Timing Area Mutation", journal = j-TODAES, volume = "30", number = "2", pages = "20:1--20:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3705730", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3705730", abstract = "Field-Programmable Gate Array (FPGA) timing simulation is essential in electronic circuit design, allowing for the verification of timing characteristics like delays and clock frequencies. However, bugs in timing simulation tools can lead to inaccurate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wen:2025:PPW, author = "Chenyi Wen and Haonan Du and Jiayi Wang and Zhengrui Chen and Li Zhang and Qi Sun and Cheng Zhuo", title = "{PACE}: a Piece-Wise Approximate Floating-Point Divider with Runtime Configurability and High Energy Efficiency", journal = j-TODAES, volume = "30", number = "2", pages = "21:1--21:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3706634", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3706634", abstract = "Approximate computing emerges as a viable solution to enhance energy efficiency in applications sensitive to human perception, particularly on edge devices. This work introduces a novel piece-wise approximate floating-point divider that boasts resource \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2025:ISS, author = "Gaoyang Zhao and Junzhong Shen and Rongzhen Lin and Hua Li and Yaohua Wang", title = "{ISOAcc}: In-situ Shift Operation-based Accelerator For Efficient in-{SRAM} Multiplication", journal = j-TODAES, volume = "30", number = "2", pages = "22:1--22:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3707205", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3707205", abstract = "Digital SRAM-based CIM architectures must balance three critical factors: quantized neural network bitwidth, accuracy loss, and computational efficiency, each crucial to optimizing performance and efficiency. In Domain Specific Accelerators (DSAs), \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Deligiannis:2025:EES, author = "Nikolaos Deligiannis and Tobias Faller and Josie Esteban Rodriguez Condia and Riccardo Cantoro and Bernd Becker and Matteo Sonza Reorda", title = "Enhancing the Effectiveness of {STLs} for {GPUs} via Bounded Model Checking", journal = j-TODAES, volume = "30", number = "2", pages = "23:1--23:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3706635", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3706635", abstract = "Graphics Processing Units (GPUs) are becoming widespread, even in safety-critical applications. In that case, it is imperative to guarantee that the probability of producing critical failures due to hardware faults is lower than a given threshold. To \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Vatsavai:2025:HHT, author = "Sairam Sri Vatsavai and Venkata Sai Praneeth Karempudi and Ishan Thakkar", title = "{HEANA}: a Hybrid Time-Amplitude Analog Optical Accelerator with Flexible Dataflows for Energy-Efficient {CNN} Inference", journal = j-TODAES, volume = "30", number = "2", pages = "24:1--24:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3711845", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3711845", abstract = "Several photonic microring resonator (MRR)-based analog accelerators have been proposed to accelerate the inference of integer-quantized Convolutional Neural Networks (CNNs) with remarkably higher throughput and energy efficiency compared to their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rashed:2025:LLS, author = "Muhammad Rashedul Haq Rashed and Sven Thijssen and Sumit Jha and Rickard Ewetz", title = "{LOGIC}: Logic Synthesis for Digital In-Memory Computing", journal = j-TODAES, volume = "30", number = "2", pages = "25:1--25:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3711848", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3711848", abstract = "In-memory processing offers a promising solution for enhancing the performance of data-intensive applications. While analog in-memory computing demonstrates remarkable efficiency, its limited precision is suitable only for approximate computing tasks. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pouget:2025:AHP, author = "St{\'e}phane Pouget and Louis-No{\"e}l Pouchet and Jason Cong", title = "Automatic Hardware Pragma Insertion in High-Level Synthesis: a Non-Linear Programming Approach", journal = j-TODAES, volume = "30", number = "2", pages = "26:1--26:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3711847", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3711847", abstract = "High-Level Synthesis enables the rapid prototyping of hardware accelerators, by combining a high-level description of the functional behavior of a kernel with a set of micro-architecture optimizations as inputs. Such optimizations can be described by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2025:AHC, author = "Guoqing Li and Rengang Li and Tuo Li and Tinghuan Chen and Meng Zhang and Henk Corporaal", title = "Algorithm-Hardware Co-design for Accelerating Depthwise Separable {CNNs}", journal = j-TODAES, volume = "30", number = "2", pages = "27:1--27:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3711846", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3711846", abstract = "Depthwise separable convolution (DSC) is a popular method for constructing lightweight neural networks. However, the pointwise convolution (PWC) has a much larger number of parameters than the depthwise convolution (DWC), causing the imbalanced parameter \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2025:NPS, author = "Jinchao Chen and Yang Wang and Ying Zhang and Yantao Lu and Qing Li and Qiuhao Shu", title = "Non-Preemptive Scheduling of Periodic Tasks with Data Dependencies in Heterogeneous Multiprocessor Embedded Systems", journal = j-TODAES, volume = "30", number = "2", pages = "28:1--28:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3711849", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3711849", abstract = "Heterogeneous multiprocessor architecture is frequently employed as an economical and efficient means of providing excellent parallel processing capabilities while keeping production cost and power consumption under control. Although this architecture \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2025:MRG, author = "Yi-Ting Lin and Kang-Ting Fan and Iris Hui-Ru Jiang", title = "Multi-Row Guiding Template Design for Lamellar Directed Self-Assembly with Self-Aligned Via Process", journal = j-TODAES, volume = "30", number = "2", pages = "29:1--29:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3711851", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3711851", abstract = "Directed self-assembly (DSA) of block copolymers can generate tiny and dense layout features, holding great potential for patterning vias and contacts at advanced nodes. Existing studies mainly focused on guiding template design for cylindrical DSA, but \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cai:2025:SES, author = "Huayang Cai and Pengcheng Huang and Genggeng Liu and Xing Huang and Yidan Jing and Wenhao Liu and Ting-Chi Wang", title = "{SPTA 2.0}: Enhanced Scalable Parallel Track Assignment Algorithm with Two-Stage Partition Considering Timing Delay", journal = j-TODAES, volume = "30", number = "2", pages = "30:1--30:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3712009", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3712009", abstract = "Routability has always been a significant challenge in Very Large Scale Integration (VLSI) design. To overcome the potential mismatch between the global routing results and the detailed routing requirements, track assignment is introduced to achieve an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2025:EAR, author = "Yuhao Zhou and Jianhui Jiang and Zhenxue He and Ying Zhang and Chengcheng Chen and Zhanhui Shi and Wei Zhang and Keying Yang", title = "An Efficient Area and Reliability Optimization Method for {MPRM} Circuits Based on High-dimensional Genetic Algorithm", journal = j-TODAES, volume = "30", number = "2", pages = "31:1--31:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3712591", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3712591", abstract = "Area and reliability optimization have become the primary constraints in circuits logic synthesis. To address the increasing area and transient fault susceptibility in combinational circuits, we propose a high-dimensional genetic algorithm (HGA). HGA \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2025:UCC, author = "Yike Zhou and Yanyan Jiang and Jian Lu", title = "Unveiling Cross-checking Opportunities in {Verilog} Compilers", journal = j-TODAES, volume = "30", number = "2", pages = "32:1--32:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3715325", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3715325", abstract = "The landscape of Verilog toolchains for electronic design automation (EDA) is diverse, and their reliability is crucial, as errors can lead to significant debugging challenges and delays in development. Methodologies such as testing and formal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Balaguera:2025:EFE, author = "Juan David {Guerrero Balaguera} and Josie Esteban {Rodriguez Condia} and Matteo {Sonza Reorda}", title = "Effective Fault Effects Evaluation for Permanent Faults in {GPUs} executing {DNNs}", journal = j-TODAES, volume = "30", number = "2", pages = "33:1--33:??", month = mar, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3715327", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Feb 13 06:21:20 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/10.1145/3715327", abstract = "Deep Neural Networks (DNNs) have permeated multiple applications, including cutting-edge safety-critical domains, which require relevant computational power, often provided by Graphic Processing Units (GPUs). GPUs are manufactured with advanced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pan:2025:SRL, author = "Jingyu Pan and Guanglei Zhou and Chen-Chia Chang and Isaac Jacobson and Jiang Hu and Yiran Chen", title = "A Survey of Research in Large Language Models for Electronic Design Automation", journal = j-TODAES, volume = "30", number = "3", pages = "34:1--34:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3715324", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Within the rapidly evolving domain of Electronic Design Automation (EDA), Large Language Models (LLMs) have emerged as transformative technologies, offering unprecedented capabilities for optimizing and automating various aspects of electronic design. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gouert:2025:DPM, author = "Charles Gouert and Nektarios Georgios Tsoutsos", title = "Data Privacy Made Easy: Enhancing Applications with Homomorphic Encryption", journal = j-TODAES, volume = "30", number = "3", pages = "35:1--35:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3715877", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Homomorphic encryption is a powerful privacy-preserving technology that is notoriously difficult to configure and use, even for experts. The key difficulties include restrictive programming models of homomorphic schemes and choosing suitable parameters \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ahmadi-Pour:2025:MTM, author = "Sallar Ahmadi-Pour and Sangeet Saha and Klaus McDonald-Maier and Rolf Drechsler", title = "{MESSI}: Task Mapping and Scheduling Strategy for {FPGA}-based Heterogeneous Real-Time Systems", journal = j-TODAES, volume = "30", number = "3", pages = "36:1--36:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3715323", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Continuous demands for improved performance within constrained resource budgets are driving a move from homogeneous to heterogeneous processing platforms for the implementation of today's Real-Time (RT) embedded systems. The applications executing on such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sunkavilli:2025:NDC, author = "Sandeep Sunkavilli and Nishanth Chennagouni and Qiaoyan Yu", title = "A New Dynamic Countermeasure to Strengthen Design Obfuscation in {FPGAs}", journal = j-TODAES, volume = "30", number = "3", pages = "37:1--37:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3716502", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "FPGAs are being challenged by various security threats, including reverse engineering attacks, hardware tampering, and side-channel analysis attacks. Although the existing static obfuscation methods can protect FPGA systems from IP piracy and hardware \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2025:DPF, author = "Wenxue Wu and Tong Zhang and Zhen Li and Xiaoqin Feng and Liwei Zhang and Fengyuan Ren", title = "Dynamic Per-Flow Queues in Shared Buffer {TSN} Switches", journal = j-TODAES, volume = "30", number = "3", pages = "38:1--38:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3718087", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Time-Sensitive Networking (TSN), as an enhancement based on Ethernet, can ensure deterministic traffic transmission with low delays and minimal jitters. However, TSN switches have only eight priority queues inherited from Ethernet at each egress port, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Gupta:2025:SRS, author = "Aastha Gupta and Ravi Sindal and Vaibhav Neema", title = "Secure \& Reliable {10T} {SRAM} Cell during Read, Write and Hold Operations against Power Analysis Attack", journal = j-TODAES, volume = "30", number = "3", pages = "39:1--39:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3718086", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Cryptography is essential to ensure data security in embedded devices that handle sensitive data. SRAM boosts overall performance by temporarily storing cryptographic keys. However, attackers can use side-channel, such as Power Analysis, to exploit power \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2025:LET, author = "Cong Jiang and Haoyang Sun and Dan Feng and Zhiyao Xie and Benjamin Tan and Kang Liu", title = "{LithoExp}: Explainable Two-stage {CNN}-based Lithographic Hotspot Detection with Layout Defect Localization", journal = j-TODAES, volume = "30", number = "3", pages = "40:1--40:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3721129", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Convolutional neural networks (CNNs) successfully detect lithographic hotspots by learning from hand-designed features of layout patterns or entire layouts, as images, in an end-to-end fashion. However, compared to lithography simulation, CNN-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sharma:2025:HCA, author = "Harsh Sharma and Pratyush Dhingra and Jana Doppa and Umit Ogras and Partha Pratim Pande", title = "A Heterogeneous Chiplet Architecture for Accelerating End-to-End Transformer Models", journal = j-TODAES, volume = "30", number = "3", pages = "41:1--41:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3718487", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Transformers have revolutionized deep learning and generative modeling, enabling advancements in natural language processing tasks. However, the size of transformer models is increasing continuously, driven by enhanced capabilities across various deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Feng:2025:HIR, author = "Xingwei Feng and Yifan Xu and Zhangcheng Huang and Wuyi Xu and Zhaori Bi and Fan Yang and Xuan Zeng and Ye Lu", title = "Hierarchical Integration of Reinforcement Learning and Optimization Algorithms for Time-Efficient Design Automation of Complex Analog Circuit", journal = j-TODAES, volume = "30", number = "3", pages = "42:1--42:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3723162", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Design automation of complex analog circuits (CAC) with multiple sub-blocks is challenging mainly due to large design search space, uncertain intermediate subgoal creation, and lengthy CAC simulation runtime. In this work, we propose a hierarchical and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhang:2025:FTC, author = "Liwei Zhang and Tong Zhang and Xiaoqin Feng and Yanying Ma and Hao Yang and Fengyuan Ren", title = "Fault-Tolerant Cyclic Queuing and Forwarding with Fast {ACK} in Time-Sensitive Networking", journal = j-TODAES, volume = "30", number = "3", pages = "43:1--43:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3723163", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "TSN is widely used in industrial automation networks because it can provide deterministic transmission services for critical data. Cyclic Queuing and Forwarding (CQF) is used to shape critical data. However, unexpected data errors may occur due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ma:2025:HCA, author = "Yuang Ma and Yulong Meng and Zihao Xuan and Song Chen and Yi Kang", title = "{HNM-CIM}: an Algorithm-Hardware Co-designed {SRAM}-based {CIM} for Transformer Acceleration Exploiting Hybrid {N:M} Sparsity", journal = j-TODAES, volume = "30", number = "3", pages = "44:1--44:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3724394", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "SRAM-based computing-in-memory (CIM) is an efficient technology for computing neural networks where matrix operations are dominated. However, leveraging sparsity in CIM presents challenges due to the crossbar architecture, which complicates the avoidance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2025:TTG, author = "Irith Pomeranz", title = "Test Templates to Guide Test Generation for Single-Cycle Gate-Exhaustive Faults", journal = j-TODAES, volume = "30", number = "3", pages = "45:1--45:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3724395", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Advanced fault models, such as the defect-aware, cell-aware, and gate-exhaustive fault models, associate several faults with each standard cell or gate of a design. Test generation procedures, including ones that target advanced fault models, produce \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hu:2025:CAD, author = "Jian Hu and Zhenlei Liu", title = "Context-aware Data Augmentation for Hardware Code Fault localization", journal = j-TODAES, volume = "30", number = "3", pages = "46:1--46:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3725889", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The maintenance of quality and reliability in hardware products inherently relies on the verification of hardware code. Despite being a time-consuming process, the localization of faults in hardware code is essential for effective hardware verification. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2025:EEM, author = "Jiahao Xu and Zhuolun He and Shuo Yin and Yuan Pu and Wenjian Yu and Bei Yu", title = "{EasyMRC}: Efficient Mask Rule Checking via Representative Edge Sampling", journal = j-TODAES, volume = "30", number = "3", pages = "47:1--47:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3723044", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The photolithography process is getting more sophisticated with technology node scaling down and VLSI designs becoming complex. As photomask patterns get finer, mask rule checks are inevitable to avoid discrepancies in the layout and to ensure \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhou:2025:CLO, author = "Zhaoxu Zhou and Zihang Huang and Junwei Li and Yanjiang Liu and Zibin Dai", title = "{CRM\_BF}: a Low-Overhead, High-Efficient and Reconfigurable Operation Unit Design Approach Using the Customized {Reed--Muller} Unit For {Boolean} Functions of Sequence Cipher Algorithms", journal = j-TODAES, volume = "30", number = "3", pages = "48:1--48:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3725869", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Sequence ciphers algorithms encrypt or decrypt information at a low cost and high speed compared to other cryptographic algorithms, which are widely applied to critical applications and sensitive fields. As the core component of sequence ciphers, Boolean \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2025:DRL, author = "Chunlin Li and Long Chai and Yong Zhang and Mengjie Yang and Ruidong Zhao and Zihao Zhang and Denghua Li and Shaohua Wan", title = "Deep Reinforcement Learning-Based Resource Allocation with Enhanced Perception and Low-Latency for Autonomous Driving in {ISAC}-aided {VEC}", journal = j-TODAES, volume = "30", number = "3", pages = "49:1--49:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3727146", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As autonomous driving technology advances, the intelligence levels of vehicles continue to increase. However, meeting the demands of autonomous driving in various scenarios requires improved wireless communication and vehicle perception capabilities. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2025:OFR, author = "Wenhao Liu and Yan Xing and Shuting Cai and Weijun Li and Xiaoming Xiong", title = "Optimizing {FPGA} Routing with Explainable Co-Learning of Congestion and Wirelength", journal = j-TODAES, volume = "30", number = "3", pages = "50:1--50:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3728467", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In FPGA routing, machine learning-based optimization methods have achieved improved routing solutions by integrating traditional heuristics with predictive capabilities. However, these approaches mostly relied on single-task learning models with black-box \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2025:DSE, author = "Wei-Chun Huang and Chih-Wei Tang and Kuei-Chung Chang and Tien-Fu Chen and Hsiang-Cheng Hsieh and Ming-Hsuan Tsai", title = "Design Space Exploration for Scalable {DNN} Accelerators Using a Memory-Centric Analytical Model for {HW\slash SW} Co-Design", journal = j-TODAES, volume = "30", number = "3", pages = "51:1--51:??", month = may, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3729227", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri May 16 07:23:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As Deep Neural Network (DNN) models became more complex, the escalating computational demands on hardware made DNN accelerators a critical research topic. The rapid growth of DNN models required DNN accelerators to keep pace with these computational \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jalilvand:2025:SIH, author = "Amir Jalilvand and Faeze S. Banitaba and S. Newsha Estiri and Sercan Aygun and M. Hassan Najafi", title = "Sorting it out in Hardware: a State-of-the-Art Survey", journal = j-TODAES, volume = "30", number = "4", pages = "52:1--52:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3734797", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Sorting is a fundamental operation in various applications and a traditional research topic in computer science. Improving the performance of sorting operations can have a significant impact on many application domains. Much attention has been paid to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mahmoudi:2025:SMS, author = "Ahmed Mahmoudi and Andrija Ne{\v{s}}kovi{\'c} and Celine Thermann and Robin Sehm and Christoph H{\"u}bner and Tavia Plattenteich and Rolf Meyer and Rainer Buchty and Mladen Berekovic and Saleh Mulhem", title = "A Systematic Mapping Study on {SystemC\slash TLM} Modeling Capabilities in New Research Domains", journal = j-TODAES, volume = "30", number = "4", pages = "53:1--53:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735641", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With increasingly complex circuits and systems, the need for advanced design methodologies is growing. These methodologies shift the designers' focus from technology-specific implementations to more abstract electronic system design (ESL). SystemC was \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhu:2025:BHD, author = "Binwu Zhu and Su Zheng and Yuzhe Ma and Bei Yu and Martin Wong", title = "Bridging Hotspot Detection and Mask Optimization via Domain-Crossing Masked Layout Modeling", journal = j-TODAES, volume = "30", number = "4", pages = "54:1--54:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3728468", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the rapid development of semiconductors, the size of transistors is continuously scaling down. The shrinking circuit size poses great challenges to optical proximity correction (OPC) and hotspot detection (HSD). Recent advancements in OPC and HSD \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2025:ESD, author = "Jingui Lin and Shiyan Liang and Wenxiong Lin and Peng Gao and Yan Xing and Tingting Wu and Xiaoming Xiong and Shuting Cai", title = "Early Stage {DRC} Hotspot Prediction for Mixed-Size Designs Through an Efficient Graph-Based Deep Learning", journal = j-TODAES, volume = "30", number = "4", pages = "55:1--55:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3733236", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Predicting hotspot locations in the early stage of Design Rule Check (DRC) is crucial for designers to proactively prevent design rule violations. However, obtaining an accurate and efficient predictor faces significant challenges due to the influence of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Heidari:2025:FAC, author = "Mahsa Heidari and Bijan Alizadeh", title = "{FixRTL}: Auto-correction of Multiple {RTL} Bugs by a New Feature Burst Clustering Algorithm and Mutation", journal = j-TODAES, volume = "30", number = "4", pages = "56:1--56:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3733238", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Existing debugging and correction approaches suffer from weaknesses such as scalability, reproducing new bugs, and lacking a strategy to deal with multiple bugs. Hence, this article proposes FixRTL, a fully automated scalable methodology for localizing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shohel:2025:AST, author = "Mohammad Abdullah {Al Shohel} and Vidya A. Chhabria and Nestor Evmorfopoulos and Sachin S. Sapatnekar", title = "An Analytical Solution for Transient Electromigration Stress in Multisegment Straight-line Interconnects Based on a Stress-wave Model", journal = j-TODAES, volume = "30", number = "4", pages = "57:1--57:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3734796", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work presents an analytical approach for analyzing electromigration (EM) in modern technologies that use copper dual damascene (Cu DD) interconnects. In these technologies, due to design rule and methodology constraints, wires are typically laid out \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2025:PHP, author = "Ying Wang and Haopeng Yan and Yiwen Zhang and Peng Gao and Fei Yu and Xiaoming Xiong and Shuting Cai", title = "{PSCaps}: High-Performance Pose-Sensitive Layout Hotspot Detector based on {CapsNet}", journal = j-TODAES, volume = "30", number = "4", pages = "58:1--58:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735132", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Advanced technology nodes face challenges with Design Rule Violations (DRVs), primarily due to the possibility of nm-level small variations that can lead to the occurrence of DRVs. Various Machine Learning (ML) techniques have been introduced to detect \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2025:DDT, author = "Irith Pomeranz", title = "{DTGx2}: Dual Target Diagnostic Test Generation", journal = j-TODAES, volume = "30", number = "4", pages = "59:1--59:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735131", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Logic diagnosis is important for deriving information about defects that are present in fabricated units when they are found to be faulty. This information can assist in yield learning and improvement. When needed, the accuracy of logic diagnosis can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2025:IIL, author = "Nuo Xu and Yihong Hu and Chaochao Feng and Wei Tong and Kang Liu and Liang Fang", title = "{ILOSSS} --- Improved Logic Synthesis based on Several Stateful Logic Gates", journal = j-TODAES, volume = "30", number = "4", pages = "60:1--60:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3731245", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Memristor stateful logic is an effective way to achieve the real sense of in-memory computing in memristor-based crossbar array (MCBA). At present, the synthesis tools fall short in conducting a thorough exploration of the optimization potential \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "60", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2025:HHA, author = "Wenqian Zhao and Lancheng Zou and Zixiao Wang and Xufeng Yao and Bei Yu", title = "{HAPE}: Hardware-Aware {LLM} Pruning For Efficient On-Device Inference Optimization", journal = j-TODAES, volume = "30", number = "4", pages = "61:1--61:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3744244", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Over the past few years, large language models (LLMs) have demonstrated remarkable performance and versatility across a variety of complex tasks. However, their deployment has been challenged by their substantial model size and computational requirements. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "61", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Tang:2025:CZS, author = "Mingxin Tang and Wei Chen and Lizhou Wu and Libo Huang and Kun Zeng", title = "{ChatDSE}: a Zero-Shot Microarchitecture Design Space Explorer Powered by {GPT4.0}", journal = j-TODAES, volume = "30", number = "4", pages = "62:1--62:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735640", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Design Space Exploration (DSE) aims at identifying Pareto optimal synthesis configurations. Previous works require microarchitecture samples with key labels, including power and clock cycles, to train their models. However, as the chip design space \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "62", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Collini:2025:AAD, author = "Luca Collini and Jitendra Bhandari and Chiara Muscari Tomajoli and Abdul Moosa and Benjamin Tan and Xifan Tang and Pierre-Emanuel Gaillardon and Ramesh Karri and Christian Pilato", title = "{ARIANNA}: an Automatic Design Flow for Fabric Customization and {eFPGA} Redaction", journal = j-TODAES, volume = "30", number = "4", pages = "63:1--63:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3737287", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the modern global Integrated Circuit (IC) supply chain, protecting intellectual property (IP) is a complex challenge, and balancing IP loss risk and added cost for theft countermeasures is hard to achieve. Using embedded configurable logic allows \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "63", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ahmadi-Pour:2025:FLF, author = "Sallar Ahmadi-Pour and Sajjad Parvin and Chandan Kumar Jha and Rolf Drechsler", title = "{FV-LIDAC}: Formally Verified Library of Input Data Aware Approximate Arithmetic Circuits", journal = j-TODAES, volume = "30", number = "4", pages = "64:1--64:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3744710", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Approximate circuits have become ubiquitous in error-resilient applications. These circuits provide large reductions in area, power, and delay at the cost of erroneous computations. The error-resilient applications produce acceptable output quality, even \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "64", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Rajan:2025:SNC, author = "Manju Rajan and Abhijit Das and John Jose", title = "Securing Network-on-Chips against {Trojan}-Induced Packet Duplication Attacks", journal = j-TODAES, volume = "30", number = "4", pages = "65:1--65:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3744645", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The third-party Intellectual Property (IP) supply chain exposes System-on-Chip designs to malicious implants like Hardware Trojans (HTs). With extremely rare trigger conditions, some HTs can evade conventional and even machine learning-based validation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "65", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Uhlmann:2025:IVP, author = "Yannick Uhlmann and Till Moldenhauer and Juergen Scheible", title = "Interactive Visual Performance Space Exploration of Operational Amplifiers with Differentiable Neural Network Surrogate Models", journal = j-TODAES, volume = "30", number = "4", pages = "66:1--66:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3744245", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "To this day, the design of analog integrated circuits is a predominantly manual task, heavily reliant on the knowledge and intuition of human experts. Many current automation approaches aim to be holistic solutions, attempting to take the human out of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "66", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2025:COH, author = "Yuefei Wang and Wendong Mao and Lang Feng and Jin Sha and Zhongfeng Wang", title = "A {CPU+FPGA OpenCL} Heterogeneous Computing Platform for Multi-Kernel Pipeline", journal = j-TODAES, volume = "30", number = "4", pages = "67:1--67:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3744922", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Over the past decades, Field-Programmable Gate Arrays (FPGAs) have become a choice for heterogeneous computing due to their flexibility, energy efficiency, and processing speed. OpenCL is used in FPGA heterogeneous computing for its high-level abstraction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "67", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2025:LSQ, author = "Po-Wei Chen and Sheng-Tan Huang and Shao-Yun Fang", title = "Layout Synthesis for Quantum Circuits Considering {Toffoli} Gate Decomposition", journal = j-TODAES, volume = "30", number = "4", pages = "68:1--68:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3744646", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "State-of-the-art studies on quantum layout synthesis have proposed various approaches based on the assumption that the input circuit is only composed of single-qubit and two-qubit gates. This assumption greatly simplifies the layout synthesis problem, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "68", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xing:2025:CPT, author = "Yan Xing and Hongtao Hu and Weijun Li and Shuting Cai and Xiaoming Xiong", title = "Concurrent Prediction of Timing and wire Length Using a Multi-Task Graph Neural Network", journal = j-TODAES, volume = "30", number = "4", pages = "69:1--69:??", month = jul, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3747181", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Jul 26 08:03:38 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditional supervised single-task learning models are used in timing-driven placement exploration to improve both effectiveness and efficiency by predicting wire length, wire delay, and cell delay separately. However, these metrics are interdependent, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "69", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Qin:2025:EGD, author = "Ruiyang Qin and Dancheng Liu and Chenhui Xu and Zheyu Yan and Zhaoxuan Tan and Zhenge Jia and Amir Nassereldine and Jiajie Li and Meng Jiang and Ahmed Abbasi and Jinjun Xiong and Yiyu Shi", title = "Empirical Guidelines for Deploying {LLMs} onto Resource-constrained Edge Devices", journal = j-TODAES, volume = "30", number = "5", pages = "70:1--70:58", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3736721", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The scaling laws have become the de facto guidelines for designing large language models (LLMs), but they were studied under the assumption of unlimited computing resources for both training and inference. As LLMs are increasingly used as personalized \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "70", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2025:RDN, author = "Peng Xu and Su Zheng and Mingzi Wang and Ziyang Yu and Shixin Chen and Tinghuan Chen and Keren Zhu and Tsungyi Ho and Bei Yu", title = "{Rank-DSE}: Neural {Pareto} Comparator of Microarchitecture Design Space Exploration", journal = j-TODAES, volume = "30", number = "5", pages = "71:1--71:24", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3747294", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The complexity of microarchitecture design has surged due to the expanding design space and time-intensive verification processes. Existing regression-based machine learning methods struggle with inaccurate estimations because of limited training samples. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "71", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Das:2025:PPS, author = "Upoma Das and Mohammad Rahman and Akshay Kulkarni and Mark Tehranipoor and Farimah Farahmandi", title = "{PSCMark}: Power Side Channel-based Watermarking for {SoC IPs} Using Clock Gates", journal = j-TODAES, volume = "30", number = "5", pages = "72:1--72:27", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3747293", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Intellectual property (IP) core reuse serves as a key factor to the rapid development of modern system-on-chips (SoCs) by minimizing time-to-market and manufacturing cost. However, it is crucial to prevent security risks such as IP piracy and over-use \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "72", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ewert:2025:LAI, author = "Christian Ewert and Andrija Neskovic and Carsten Heinz and Felix Muuss and Alexander Treff and Marc Gourjon and Rainer Buchty and Thomas Eisenbarth and Andreas Koch and Mladen Berekovic and Saleh Mulhem", title = "Lightweight Authenticated Integration and In-Field Secure Operation of System-in-Package", journal = j-TODAES, volume = "30", number = "5", pages = "73:1--73:23", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3745780", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "System in Package (SiP) relies on integrating different chiplets potentially involving many third-party devices and chiplet foundries. This type of advanced packaging technology opens up numerous threat scenarios, especially: (a) the inauthentic and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "73", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhao:2025:PMT, author = "Yequan Zhao and Hai Li and Ian Young and Zheng Zhang", title = "Poor Man's Training on {MCUs}: a Memory-Efficient Quantized Back-Propagation-Free Approach", journal = j-TODAES, volume = "30", number = "5", pages = "74:1--74:33", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3745772", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Back propagation (BP) is the default solution for gradient computation in neural network training. However, implementing BP-based training on various edge devices such as FPGA, microcontrollers (MCUs), and analog computing platforms faces multiple major \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "74", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pham:2025:AAG, author = "Phuoc Pham and Tae-Min Park and Sung-Hyuk Cho and Tayyeb Mahmood and Joon-Sung Yang and Jaeyong Chung", title = "{AGD}: Analytic Gradient Descent for Discrete Optimization in {EDA} and its Use to Gate Sizing", journal = j-TODAES, volume = "30", number = "5", pages = "75:1--75:22", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3748257", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In electronic design automation (EDA), simulation models are often non-differentiable, and many design choices are discrete. As a result, greedy optimization methods based on numerical gradients are widely used, although they often lead to suboptimal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "75", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wang:2025:RRI, author = "Xinrui Wang and Lang Feng and Yujie Wang and Taotao Xu and Yinhe Han and Zhongfeng Wang", title = "Resister: a Resilient Interposer Architecture for Chiplet to Mitigate Timing Side-Channel Attacks", journal = j-TODAES, volume = "30", number = "5", pages = "76:1--76:23", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3748258", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Chiplet technology has been a hot topic due to its potential for more efficient implementation of large-scale integrated circuits. In chiplet manufacturing, the general-purpose active interposer usually integrates chiplets from different vendors with a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "76", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pan:2025:RLR, author = "Hongyang Pan and Keren Zhu and Fan Yang and Xuan Zeng and Sen Liu and Yong Xiao and Yun Shao and Zhufei Chu", title = "Rethinking Logic Rewriting: Technology-Aware Subgraph Matching with Exact Synthesis", journal = j-TODAES, volume = "30", number = "5", pages = "77:1--77:29", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3749103", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Logic synthesis is crucial in digital design automation, significantly enhancing performance, reducing area, and lowering power consumption through technology-independent optimization followed by technology mapping. Logic rewriting, a key strategy for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "77", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yao:2025:HLS, author = "Xufeng Yao and Wenqian Zhao and Qi Sun and Cheng Zhuo and Bei Yu", title = "High-level Synthesis Directives Design Optimization via Large Language Model", journal = j-TODAES, volume = "30", number = "5", pages = "78:1--78:24", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3747291", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High-level synthesis is an effective methodology that accelerates early-stage circuit design. The optimization of HLS directives has been a critical yet challenging endeavor, with prevailing research primarily concentrating on custom feature engineering \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "78", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Datta:2025:TFS, author = "Aniruddha Datta and Bhanu Yaganti and Mate Palocska and Andrew Dove and Arik Peltz and Krishnendu Chakrabarty", title = "Test-Fleet Scheduling in Complex Validation and Production Environments", journal = j-TODAES, volume = "30", number = "5", pages = "79:1--79:32", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3749985", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "We present a solution to the complex design-automation problem of scheduling test operations in a validation laboratory or production facility. Our goal is to maximize the utilization of a fleet of test stations and minimize the overall test time for a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "79", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chao:2025:MEA, author = "Zhiteng Chao and Feng Gu and Junying Huang and Wenjie Li and Jing Ye and Huawei Li and Xiaowei Li", title = "Memory-Efficient and Adaptive Heterogeneous Framework for Gate-Level Fault Simulation", journal = j-TODAES, volume = "30", number = "5", pages = "80:1--80:27", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3760777", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Gate-level fault simulation is essential for automatic test pattern generation (ATPG). The traditional event-driven simulation is time-consuming due to the large number of faults. While parallel fault simulation with GPGPUs shows promise, it faces reduced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "80", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liang:2025:RRB, author = "Shiyan Liang and Jingui Lin and Dongwei Liu and Wenxiong Lin and Peng Gao and Yuzhe Ma and Tingting Wu and Xiaoming Xiong and Shuting Cai", title = "{RTMF}: Routing based on {TDM} for {Multi-FPGA} System", journal = j-TODAES, volume = "30", number = "5", pages = "81:1--81:24", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3760778", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As modern VLSI design advances, the significance of multi-FPGA systems in prototyping and verification is steadily growing. Due to the physical I/O limitations, the Time-Division Multiplexing (TDM) and I/O assignment techniques are introduced to solve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "81", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dahiya:2025:VTW, author = "Ayush Dahiya and Vansh Singhal and Poornima Mittal", title = "A Variation Tolerant Write Assist Read Decoupled {9T SRAM} Cell for Low Voltage Application", journal = j-TODAES, volume = "30", number = "5", pages = "82:1--82:22", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3754451", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Driven by the swift expansion of energy-demanding Internet of Things devices, on-chip SRAM is undergoing a significant evolution to attain reduced power usage. This shift ushers in a new era of self-sufficient and energy-efficient technology. This article \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "82", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2025:PDD, author = "Wei-Kai Liu and Benjamin Tan and Krishnendu Chakrabarty", title = "Patchability-Driven Design Exploration for System-on-Chip Patching Architectures", journal = j-TODAES, volume = "30", number = "5", pages = "83:1--83:21", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3762185", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As System-on-Chip (SoC) designs become increasingly complex, ensuring comprehensive verification has become more challenging, leading to overlooked hardware bugs that can be found in the field. Addressing hardware bugs post-deployment is difficult, as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "83", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Deng:2025:IMA, author = "Xinguo Deng and Wen Xu and Mingsheng Mei and Henghui Hong and Yourun Lan and Jiarui Chen", title = "An Improved {MCTS} Algorithm for Ordered Escape Routing of Differential Pair", journal = j-TODAES, volume = "30", number = "5", pages = "84:1--84:26", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3760776", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The ordered escape routing of the differential pair represents a critical component in the physical design of the printed circuit board. An improved Monte Carlo Tree Search (MCTS) algorithm and an innovative technique for estimating escape points are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "84", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Miranda:2025:CTR, author = "Bruno D. Miranda and M{\'a}rcio Castro and Luiz C.v. dos Santos", title = "A Canonical Test Representation for Verification of Shared-Memory Behavior in Multiprocessor Systems", journal = j-TODAES, volume = "30", number = "5", pages = "85:1--85:22", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3762184", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The scope of this article is the design verification of a multicore chip or multichip multiprocessor by running concurrent test programs until coverage goals are reached. Interactions between multiple processors through shared memory must obey a memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "85", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2025:MMO, author = "Jingyuan Li and Yuan Dai and Wenbo Yin and Lingli Wang", title = "{MoDAF}: a Multi-objective Divide-and-Conquer Parameter Tuning Framework for {CGRAs}", journal = j-TODAES, volume = "30", number = "5", pages = "86:1--86:28", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3766063", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Coarse-grained reconfigurable architectures (CGRAs) are gaining increasing attention as domain-specific accelerators due to their high flexibility and energy efficiency. These architectures offer a compelling solution for applications that require custom \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "86", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ferres:2025:STL, author = "Bruno Ferres and Oussama Oulkaid and Matthieu Moy and Gabriel Radanne and Ludovic Henrio and Pascal Raymond and Mehdi Khosravian", title = "A Survey on Transistor-Level Electrical Rule Checking of Integrated Circuits", journal = j-TODAES, volume = "30", number = "5", pages = "87:1--87:28", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3748327", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hardware verification is crucial to ensure the quality of Integrated Circuits, and prevent costly bugs down the manufacturing flow. Electrical Rule Checking (ERC) is a verification step used to assert that a circuit complies with some electrical rules, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "87", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Golzan:2025:AMS, author = "Morteza Golzan and Telex M. N. Ngatched and Karteek Popuri and Lihong Zhang", title = "Analog and Mixed-Signal {IC} Modeling and Optimization: an Artificial Intelligence Perspective", journal = j-TODAES, volume = "30", number = "5", pages = "88:1--88:32", month = sep, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3754339", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Oct 2 10:35:16 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The design of circuits and systems has witnessed a growing interest in leveraging the potential of artificial intelligence in the realm of analog and mixed-signal (AMS) integrated circuits (ICs). This article presents a comprehensive survey on the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "88", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Dick:2025:ISI, author = "Robert Paul Dick and Hammond Pearce and Li Shang and Fan Yang", title = "Introduction to Special Issue on Large Language Models for Electronic System Design Automation", journal = j-TODAES, volume = "30", number = "6", pages = "89:1--89:3", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3746636", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Large Language Models are having a substantial impact on electronic design automation in areas ranging from hardware architecture to verification and optimization. The special issue provides a snapshot of work on this topic. This introduction describes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "89", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{He:2025:LLM, author = "Zhuolun He and Yuan Pu and Haoyuan Wu and Tairu Qiu and Bei Yu", title = "Large Language Models for {EDA}: Future or Mirage?", journal = j-TODAES, volume = "30", number = "6", pages = "90:1--90:53", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3736167", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this article, we explore the burgeoning intersection of large language models (LLMs) and electronic design automation (EDA). We critically assess whether LLMs represent a transformative future for EDA or merely a fleeting mirage. By organizing existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "90", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pinckney:2025:RVY, author = "Nathaniel Pinckney and Christopher Batten and Mingjie Liu and Haoxing Ren and Brucek Khailany", title = "Revisiting {VerilogEval}: a Year of Improvements in Large-Language Models for Hardware Code Generation", journal = j-TODAES, volume = "30", number = "6", pages = "91:1--91:20", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3718088", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The application of large language models (LLMs) to digital hardware code generation is an emerging field, with most LLMs primarily trained on natural language and software code. Hardware code like Verilog constitutes a small portion of training data, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "91", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lopes:2025:ELL, author = "Samuel Gomes Lopes and Shien Zhu and Gustavo Alonso", title = "Exploring Large Language Models for Hierarchical Hardware Circuit and Testbench Generation", journal = j-TODAES, volume = "30", number = "6", pages = "92:1--92:39", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3742430", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Designing and verifying hardware circuits using a Hardware Description Language (HDL) is an essential but time-consuming part of hardware design. Generating the desired correct circuit and testbench code usually requires a significant engineering effort. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "92", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chang:2025:DCC, author = "Kaiyan Chang and Wenlong Zhu and Kun Wang and Xinyang He and Nan Yang and Zhirong Chen and Dantong Jin and Cangyuan Li and Yunhao Zhou and Hao Yan and Zhuoliang Zhao and Yuan Cheng and Mengdi Wang and Shengwen Liang and Yinhe Han and Xiaowei Li and Huawei Li and Ying Wang", title = "A data-centric chip design agent framework for {Verilog} code generation", journal = j-TODAES, volume = "30", number = "6", pages = "93:1--93:27", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3727980", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent advances in large language models (LLMs) have demonstrated significant potential for automated hardware description language (HDL) code generation from high-level specifications. However, two critical challenges limit further progress in this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "93", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shi:2025:AKN, author = "Yichen Shi and Zhuofu Tao and YuHao Gao and Tianjia Zhou and Cheng Chang and Yaxin Wang and Bingyu Chen and Genhao Zhang and Alvin Liu and Zhiping Yu and Ting-Jung Lin and Lei He", title = "{AMSnet-KG}: a Netlist Dataset for {LLM}-based {AMS} Circuit Auto-design Using Knowledge Graph {RAG}", journal = j-TODAES, volume = "30", number = "6", pages = "94:1--94:37", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3736166", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High-performance analog and mixed-signal (AMS) circuits are mainly full-custom designed, which is time-consuming and labor-intensive. A significant portion of the effort is experience-driven, which makes the automation of AMS circuit design a formidable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "94", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Reddy:2025:LLA, author = "E. Bhawani Eswar Reddy and Sutirtha Bhattacharyya and Ankur Sarmah and Fedrick Nongpoh and Karthik Maddala and Chandan Karfa", title = "{LHS}: {LLM} Assisted Efficient High-level Synthesis of Deep Learning Tasks", journal = j-TODAES, volume = "30", number = "6", pages = "95:1--95:27", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3734523", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Deep learning tasks, especially those involving complex convolution neural networks (CNNs), are computationally intensive and pose significant challenges when implemented on hardware. Accelerating these tasks is critical for improving performance. High-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "95", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Collini:2025:CLL, author = "Luca Collini and Siddharth Garg and Ramesh Karri", title = "{C2HLSC}: Leveraging Large Language Models to Bridge the Software-to-Hardware Design Gap", journal = j-TODAES, volume = "30", number = "6", pages = "96:1--96:24", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3734524", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "High-Level Synthesis (HLS) tools offer rapid hardware design from C code, but their compatibility is limited by code constructs. This article investigates Large Language Models (LLMs) for automatically refactoring C code into HLS-compatible formats. We \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "96", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2025:ASR, author = "Cangyuan Li and Chujie Chen and Yudong Pan and Wenjun Xu and Yiqi Liu and Kaiyan Chang and Yujie Wang and Mengdi Wang and Huawei Li and Yinhe Han and Ying Wang", title = "{AutoSilicon}: Scaling Up {RTL} Design Generation Capability of Large Language Models", journal = j-TODAES, volume = "30", number = "6", pages = "97:1--97:21", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3737286", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Hardware description language (HDL) code designing is a critical component of the chip design process, requiring substantial engineering and time resources. Recent advancements in large language models (LLMs), such as GPT series, have shown promise in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "97", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2025:HHL, author = "Magi Chen and Ting-Chi Wang", title = "{HyperPlace}: Harnessing a Large Language Model for Efficient Hyperparameter Optimization in {GPU}-Accelerated {VLSI} Placement", journal = j-TODAES, volume = "30", number = "6", pages = "98:1--98:27", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3733601", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "While GPU-based placers have demonstrated significant speed advantages over their CPU-based counterparts, hyperparameter tuning remains a bottleneck, often requiring substantial human intervention and expert knowledge. This challenge is particularly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "98", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xiao:2025:ECR, author = "Zhe Xiao and Xu He and Haoying Wu and Bei Yu and Yang Guo", title = "{EDA-Copilot}: a {RAG}-Powered Intelligent Assistant for {EDA} Tools", journal = j-TODAES, volume = "30", number = "6", pages = "99:1--99:24", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3715326", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the rise of Large Language Models (LLMs), researchers have become increasingly interested in their applications in EDA flows, particularly in specific subdomains such as serving as knowledge assistants and generating RTL code. In this study, we \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "99", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Blocklove:2025:AIL, author = "Jason Blocklove and Shailja Thakur and Benjamin Tan and Hammond Pearce and Siddharth Garg and Ramesh Karri", title = "Automatically Improving {LLM}-based {Verilog} Generation using {EDA} Tool Feedback", journal = j-TODAES, volume = "30", number = "6", pages = "100:1--100:26", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3723876", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditionally, digital hardware designs are written in the Verilog hardware description language (HDL) and debugged manually by engineers. This can be time-consuming and error-prone for complex designs. Large Language Models (LLMs) are emerging as a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "100", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Qayyum:2025:LAB, author = "Khushboo Qayyum and Chandan Kumar Jha and Sallar Ahmadi-Pour and Muhammad Hassan and Rolf Drechsler", title = "{LLM}-assisted Bug Identification and Correction for {Verilog HDL}", journal = j-TODAES, volume = "30", number = "6", pages = "101:1--101:28", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3733237", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As technology continues to advance, it becomes increasingly integrated into daily life facilitating complex tasks across a range of environments. While some applications such as smartphones and smartwatches are less critical, others like healthcare \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "101", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yao:2025:HSH, author = "Xufeng Yao and Haoyang Li and Tsz Ho Chan and Wenyi Xiao and Mingxuan Yuan and Yu Huang and Lei Chen and Bei Yu", title = "{HDLdebugger}: Streamlining {HDL} debugging with Large Language Models", journal = j-TODAES, volume = "30", number = "6", pages = "102:1--102:26", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735638", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the domain of chip design, hardware description languages (HDLs) play a pivotal role. However, due to the inherent complexity of HDLs and the scarcity of high-quality debugging resources, HDL bug fixing remains a challenging and time-consuming task, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "102", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ahmad:2025:FFL, author = "Baleegh Ahmad and Joey Ah-Kiow and Benjamin Tan and Ramesh Karri and Hammond Pearce", title = "{FLAG}: {Finding Line Anomalies (in RTL code) with Generative AI}", journal = j-TODAES, volume = "30", number = "6", pages = "103:1--103:30", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3736411", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Bug detection in Hardware Design Languages (HDLs) is an important problem in the System-on-Chip (SoC) development cycle. It is crucial to find defects at the earliest stage possible. While most fault localization requires the use of ``tests'' (e.g., test \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "103", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Ghimire:2025:HAE, author = "Sujan Ghimire and Yu-Zheng Lin and Muntasir Mamun and Muhtasim Alam Chowdhury and Farhad Alemi and Shuyu Cai and Jinduo Guo and Mingyu Zhu and Honghui Li and Banafsheh Saber Latibari and Setareh Rafatirad and Pratik Satam and Soheil Salehi", title = "{HWREx}: {AI}-enabled Hardware Weakness and Risk Exploration and Storytelling Framework with {LLM}-assisted Mitigation Suggestion", journal = j-TODAES, volume = "30", number = "6", pages = "104:1--104:33", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3737459", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The growing complexity of modern computing frameworks has led to an increase in cybersecurity vulnerabilities reported to the National Vulnerability Database (NVD). Extracting meaningful trends from this vast amount of unstructured data is challenging \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "104", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lu:2025:HRH, author = "Zhouyang Lu and Hailin Xu and Anrui Chen and Siyuan Tang and Junyi Zhang and Yifei Feng and Wentao Pan and Jiangli Huang", title = "{HSG-RAG}: Hierarchical Knowledge Base Construction for Embedded System Development", journal = j-TODAES, volume = "30", number = "6", pages = "105:1--105:21", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3731680", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Customization is a fundamental aspect of embedded system development, requiring developers to acquire extensive domain-specific knowledge from technical documents. However, the sheer volume of these documents, coupled with the intricate relationships \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "105", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wei:2025:MAS, author = "Yangbo Wei and Li Huang and Qi Feng and Zhanfei Chen and Jinlong Yan and Ting-Jung Lin and Zhen Huang and Kun Ren and Wei Xing and Lei He", title = "{ModelGen}: Automating Semiconductor Parameter Extraction with Large Language Model Agents", journal = j-TODAES, volume = "30", number = "6", pages = "106:1--106:26", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3736165", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Device models require large numbers of parameters to characterize complex physical effects. Although the latest advancements in machine learning and automated tools have drastically improved efficiency over the classic methods, they still demand a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "106", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Shim:2025:DPP, author = "Jun S. Shim and Hyeonji Chang and Yeseong Kim and Jihong Kim", title = "{DeepPM}: Predicting Performance and Energy Consumption of Program Binaries Using Transformers", journal = j-TODAES, volume = "30", number = "6", pages = "107:1--107:27", month = nov, year = "2025", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3725887", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Nov 4 10:21:34 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurate estimation of performance and energy consumption is critical for optimizing application efficiency on diverse hardware platforms. Traditional methods often rely on profiling and measurements, requiring at least one execution, making them time-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "107", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Qi:2026:DAT, author = "Shuang Qi and Zhiwen Yu and Bin Guo and Sizhao Li and Zipeng Li and Hanbin Ma and Tsungyi Ho and Krishnendu Chakrabarty and Xing Huang", title = "Design Automation Techniques for Microfluidic Fully Programmable Valve Array Biochips: a Systematic Survey", journal = j-TODAES, volume = "31", number = "1", pages = "1:1--1:31", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3768629", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Flow-based microfluidic biochips have attracted much attention over the past two decades. By integrating diverse micro-components, e.g., mixers and filters, on a miniaturized planar substrate, complicated bioassays such as protein crystallization and drug \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "1", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2026:EAS, author = "Jinchao Chen and Qinwei Zhang and Pengcheng Han and Ying Zhang and Yantao Lu and Pengyi Zheng", title = "Energy-aware Scheduling of Workflow Applications Towards Schedule Length Optimization in Heterogeneous Distributed Embedded Systems", journal = j-TODAES, volume = "31", number = "1", pages = "2:1--2:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3767164", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Energy optimization constitutes a paramount design consideration in the realm of embedded systems development since these devices are inherently constrained by finite battery resources. Designing and developing an effective energy-aware scheduling \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "2", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cai:2026:ACL, author = "Huayang Cai and Genggeng Liu and Wenzhong Guo and Zipeng Li and Tsungyi Ho and Xing Huang", title = "Adaptive Control-Logic Routing with Length Matching and Fault Tolerance for {FPVA} Biochips Using Deep Reinforcement Learning", journal = j-TODAES, volume = "31", number = "1", pages = "3:1--3:22", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3765631", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "With the increasing integration level of flow-based microfluidics, fully programmable valve arrays (FPVAs) have emerged as the next generation of flow-based microfluidic devices. Microvalves in an FPVA are typically managed by a control logic, where \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "3", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pfromm:2026:MMF, author = "Lukas Pfromm and Alish Kanani and Harsh Sharma and Parth Solanki and Eric Tervo and Jaehyun Park and Janardhan Doppa and Partha Pratim Pande and Umit Ogras", title = "{MFIT}: Multi-{FIdelity} Thermal Modeling for {2.5D} and {3D} Multi-Chiplet Architectures", journal = j-TODAES, volume = "31", number = "1", pages = "4:1--4:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3765905", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Rapidly evolving artificial intelligence and machine learning applications require ever-increasing computational capabilities, while monolithic 2D design technologies approach their limits. 2.5D/3D heterogeneous integration of smaller chiplets using \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "4", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Akarvardar:2026:UGA, author = "Kerem Akarvardar and Xiaoyu Sun and Brian Crafton and Xiaochen Peng and Haruki Mori and Abhiroop Bhattacharjee and Hidehiro Fujiwara and H.-S. Philip Wong", title = "Ultrafast Generative {AI} by Ultradense {3D} Integration: a Case Study on {LLM}-based Edge Inference", journal = j-TODAES, volume = "31", number = "1", pages = "5:1--5:31", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3768168", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Generative AI (GenAI) is one of the most critical applications today, continually challenging the limits of semiconductor technology. We introduce a very fine-grained 3D memory-on-logic architecture along with a novel data mapping strategy to support \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "5", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Alcon:2026:STR, author = "Miguel Alcon and Enrico Mezzetti and Jaume Abella and Francisco J. Cazorla", title = "Supporting Timing-related Metrics for Autonomous Driving Frameworks in {CyberRT}", journal = j-TODAES, volume = "31", number = "1", pages = "6:1--6:37", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3768344", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The provision of increasingly advanced autonomous software functionalities builds on cutting-edge autonomous driving frameworks to enable modular interactions among multiple software components. This approach helps to support functional cause-effect \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "6", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pouget:2026:HOF, author = "St{\'e}phane Pouget and Michael Lo and Louis-No{\"e}l Pouchet and Jason Cong", title = "Holistic Optimization Framework for {FPGA} Accelerators", journal = j-TODAES, volume = "31", number = "1", pages = "7:1--7:37", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3769307", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Customized accelerators have revolutionized modern computing by delivering substantial gains in energy efficiency and performance through hardware specialization. Field-Programmable Gate Arrays (FPGAs) play a crucial role in this paradigm, offering \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "7", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Li:2026:REP, author = "Jiale Li and Yulin Fu and Sean Longyu Ma and Chiu-Wing Sham and Chong Fu", title = "{RedPIM}: an Efficient {PIM} Accelerator Design with Reduced Analog-to-Digital Conversions", journal = j-TODAES, volume = "31", number = "1", pages = "8:1--8:26", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3769122", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "ReRAM-based Processing-In-Memory (PIM) architectures are compelling contenders for deep learning due to their ability to perform matrix-vector multiplications (MVMs) directly within the memory, significantly reducing data movement and enhancing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "8", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Guo:2026:TFP, author = "Lidong Guo and Zhenhua Zhu and Xuefei Ning and Tengxuan Liu and Shiyao Li and Guohao Dai and Huazhong Yang and Wangyang Fu and Yu Wang", title = "Towards Floating Point-Based {AI} Acceleration: Hybrid {PIM} with Non-Uniform Data Format and Reduced Multiplications", journal = j-TODAES, volume = "31", number = "1", pages = "9:1--9:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3769304", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Neural networks (NNs) have exhibited excellent performance in various fields of artificial intelligence. However, the primary operations in these mainstream models, including matrix-vector multiplication (MVM), element-wise multiplication (EWM), and depth-wise convolution (DWConv), require massive data movements during computation, which greatly impacts NNs inference performance. The emerging Processing-In-Memory (PIM) architectures have shown great potential to overcome the memory wall problem. However, constrained by the supported data format and operator type, directly adopting PIM architectures for neural network acceleration faces three challenges: (1) Floating-point (FP) format has been widely adopted for ensuring high algorithm accuracy. However, Resistive Random-Access Memory (RRAM)-based analog PIM architectures perform integer (INT) MVMs in the analog domain, limiting their application to the more accurate FP format; (2) Static Random-Access Memory (SRAM)-based digital PIM architectures require additional circuits to support the FP format, and the SRAM capacity cannot satisfy the storage requirement of latest large language models (LLMs); (3) When performing the operators with few accumulation steps, such as EWMs and DWConvs, only few memory units in PIM architecture are activated, resulting in severe device under-utilization.\par To tackle the above challenges, this article proposes an RRAM and 3D-SRAM-based hybrid PIM architecture, achieving FP-based algorithm accuracy, high device utilization, and high energy efficiency. At the software level, we first analyze the impact of quantization errors on NN s inference accuracy. For the quantization error-insensitive MVM operations, we propose the PIM-oriented exponent-free non-uniform (PN) data format. The proposed PN format can be flexibly adjusted to fit the non-uniform distribution and approach FP-based algorithm accuracy using bit-slicing-based full INT operations. For the quantization error-sensitive EWM\slash DWConv operations, we introduce the multiplication-free approximated FP multiplications to reduce the additional hardware overhead. At the hardware level, we propose a hybrid PIM architecture, including an RRAM analog PIM using shift-and-add for PN-based MVMs, and a 3D-SRAM digital PIM with high utilization for DWConv\slash EWM operations. Extensive experiments on CNNs and attention-free LLMs validate that the proposed PIM architecture achieves up to $ 99.4 \times $ and $ 33.94 \times $ speedup with $ 5697.74 \times $ and $ 8.24 \times $ energy efficiency improvement compared to GPU and PIM-baseline, respectively. With the proposed PN format and approximated FP multiplications, the algorithm accuracy of CNNs and attention-free LLMs can be improved by up to 3.01\% and 10.18\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "9", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Jiang:2026:DAS, author = "Danping Jiang and Zibin Dai and Yanjiang Liu and Xiaoyu Song and Zhaoxu Zhou", title = "{DPTM}: an Adaptive Scheduler Design Utilizing Timeslot Matching and Release Methods for Concurrent and Multi-task Interleaved Pipelining-oriented {CGRA}", journal = j-TODAES, volume = "31", number = "1", pages = "10:1--10:28", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3769303", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Coarse-grained reconfigurable architectures (CGRAs) are increasingly employed as domain-specific accelerators due to their efficiency and flexibility. However, the existing CGRA architectures suffer from low hardware resource utilization and performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "10", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2026:RFL, author = "Wei-Kai Liu and Jonti Talukdar and Benjamin Tan and Krishnendu Chakrabarty", title = "Runtime Fault Localization in Deep Neural Network Accelerators", journal = j-TODAES, volume = "31", number = "1", pages = "11:1--11:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3770920", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Systolic arrays are a popular choice for accelerating deep neural networks (DNNs) due to their inherent parallelism and efficient data reuse. However, ensuring the reliability of these DNN accelerators is crucial, as hardware faults can significantly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Guo:2026:HES, author = "Junhao Guo and Hongxin Kong and Lang Feng", title = "A High Efficient and Scalable Obstacle-Avoiding {VLSI} Global Routing Flow", journal = j-TODAES, volume = "31", number = "1", pages = "12:1--12:25", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3769005", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Routing is a crucial step in the VLSI design flow. With advancements in manufacturing technology, more constraints have emerged in design rules, particularly regarding obstacles during routing, leading to increased routing complexity. Unfortunately, many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "12", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2026:CCM, author = "Renyu Yang and Xin Ju and Mei Wen and Jinjin Deng and Yi Wen and Junzhong Shen and Bin Liang and Tianyu Wang and Zhaoyan Shen and Zili Shao", title = "{C-CIM}: a Multi-Mode Convolution-Capable {SRAM-CIM}", journal = j-TODAES, volume = "31", number = "1", pages = "13:1--13:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3769859", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "SRAM is widely used in computing-in-memory (CIM) neural network accelerators because of its relatively mature technology and good compatibility with complementary metal oxide semiconductor logic process. Digital SRAM-CIM is favored by researchers because \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "13", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2026:YON, author = "Silin Chen and Kangjian Di and Yibo Huang and Binwu Zhu and Ningmu Zou", title = "You Only Need Non-Hotspot: an Unsupervised Training-Free Method for Layout Hotspot Detection", journal = j-TODAES, volume = "31", number = "1", pages = "14:1--14:24", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3771767", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent advances in deep learning-based layout hotspot detection have made remarkable progress in identifying potential defect patterns at early design stages. However, most existing methods rely on supervised learning, which requires manual identification \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "14", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Darjani:2026:SSO, author = "Armin Darjani and Nima Kavand and Zhentao Han and Akash Kumar", title = "Structurally Secure Obfuscation: Assessing and Mitigating Structural Vulnerabilities in Circuits Obfuscation", journal = j-TODAES, volume = "31", number = "1", pages = "15:1--15:30", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3772062", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Because of the globalization of IC manufacturing and to protect IP integrity and confidentiality, circuit obfuscation techniques have been developed. These methods secure the circuit through obfuscation approaches. Recently, advanced machine learning (ML)-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "15", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Nadeem:2026:AIG, author = "Mohamed Nadeem and Luca M{\"u}ller and Chandan Kumar Jha and Rolf Drechsler", title = "Advanced And-Inverter Graph Decomposition Technique for Reducing Circuit Complexity", journal = j-TODAES, volume = "31", number = "1", pages = "16:1--16:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3771280", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In the field of Electronic Design Automation (EDA), managing circuit complexity is a crucial task for efficient circuit verification, testing, and optimization. Increasing design complexity presents challenges for tasks such as formal verification, fault \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "16", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Raha:2026:IRA, author = "Arnab Raha and Sandeep Krishna Thirumala and Sumeet Kumar Gupta and Vijay Raghunathan", title = "{InterAxNN}: Reconfigurable and Approximate in-Memory Processing Accelerator for Ultra-Low-Power Binary Neural Network Inference in Intermittently Powered Systems", journal = j-TODAES, volume = "31", number = "1", pages = "17:1--17:44", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3771845", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In this work, we propose InterAxNN , an energy-aware approximate hardware architecture to perform vector-matrix multiplications in the binary precision regime for energy-constrained intermittently powered systems (IPS). In contrast to existing XNOR \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "17", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2026:DCE, author = "Zhihan Chen and Xindi Zhang and Yuhang Qian and Shaowei Cai", title = "Datapath Combinational Equivalence Checking With Hybrid Sweeping Engines and Parallelization", journal = j-TODAES, volume = "31", number = "1", pages = "18:1--18:27", month = jan, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3773040", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Nov 17 05:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Synthesizing circuits to achieve better PPA is crucial, particularly in datapath netlists with various arithmetic operators. The verification relies on the Combinational Equivalence Checking (CEC) techniques, checking the equivalence of two combinational \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "18", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chaudhary:2026:STG, author = "Chhavi Chaudhary and Rajesh Devaraj and Arnab Sarkar", title = "Scheduling Task Graph Applications on Preloaded Shared-Bus based Heterogeneous Platforms", journal = j-TODAES, volume = "31", number = "2", pages = "19:1--19:29", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3772003", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Modern embedded control applications in Cyber-Physical Systems (CPSs) often have complex inter-dependencies in their functionalities and are hence represented as Directed-Acyclic Task Graphs (DTGs). To meet complex performance as well as deployment-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "19", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Xu:2026:PPH, author = "Wendong Xu and Yuhao Ji and Yang Bai and Yueting Li and Yuxuan Zhao and Zhengwu Liu and Bei Yu and Ngai Wong", title = "{PPD}: a Portable and Highly Parallel Dispatching System for Deep Learning", journal = j-TODAES, volume = "31", number = "2", pages = "20:1--20:24", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3773039", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The acceleration of inference process for deep learning models is closely tied with the parallelization capability of computational graph operators and the parallel scheduling strategies. Most existing deep learning compilers focus on optimizing intra-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "20", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2026:FRC, author = "Qingyu Yang and Jingjin Li and Rui Li and Yuting He and Yajun Ha and Linlin Shen and Ruibin Bai and Heng Yu", title = "{FPGA} Routing Congestion Prediction via Graph Learning-Aided Conditional {GAN}", journal = j-TODAES, volume = "31", number = "2", pages = "21:1--21:24", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3773770", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Routing congestion prediction expedites the closure of FPGA placement and routing (PnR). Current prediction methods employ convolutional models, taking advantage of their capacity of dealing with image-style inputs. However, these methods neglect the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "21", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2026:EEG, author = "Tianji Liu and Nutdranai Jaruthikorn and Shiju Lin and Bentian Jiang and Guannan Guo and Weihua Sheng and Evangeline F. Y. Young", title = "Efficient and Effective E-graph-based Logic Optimization", journal = j-TODAES, volume = "31", number = "2", pages = "22:1--22:16", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3774883", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Recent efforts of applying e-graphs in logic synthesis have shown promising results. Nevertheless, e-graph-based gate-level logic optimization suffers from inefficiency and limited extraction quality. In this article, we propose a fast parallel e-matching \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "22", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sharma:2026:TTR, author = "Vibhanshu Sharma and Pratyush Dhingra and Janardhan Doppa and Partha Pratim Pande", title = "{ThRIve}: Thermally Robust {CNN} Inference via Low-Rank Adaptation in Heterogeneous {PIM} Architectures", journal = j-TODAES, volume = "31", number = "2", pages = "23:1--23:25", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3774328", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Processing-In-Memory (PIM) has emerged as a promising technology for accelerating machine learning (ML) workloads. Specifically, non-volatile memory-based PIM architectures have enabled effective ML acceleration due to their ability to perform energy-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "23", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Mandal:2026:OSM, author = "Tamal Mandal and Debraj Kundu and Sudip Roy", title = "Online Synthesis of {MEDA} Biochips with Area and Reliability-Aware Module Placement using Chamber-Less Virtual Topology", journal = j-TODAES, volume = "31", number = "2", pages = "24:1--24:29", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3774890", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Real-time execution of bioassays on microelectrode dot array (MEDA) biochips can revolutionize point-of-care diagnostics. Despite numerous design automation efforts for digital microfluidic biochips (DMFBs), there is no online synthesis framework for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "24", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2026:CKD, author = "Zheng Wu and Zhuochu Yang and Zhuoyuan Yang and Zihao Chen and Li Shang and Fan Yang", title = "{ChatArch}: a Knowledge-driven Graph-of-thought {LLM} Framework for Processor Architecture Optimization", journal = j-TODAES, volume = "31", number = "2", pages = "25:1--25:26", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3774888", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Processors serve as the cornerstone of modern computing systems. Although processor design encompasses multiple VLSI levels, the architectural design plays a critical role in determining performance, power consumption, and area efficiency. To address the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "25", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2026:VAA, author = "Jiangli Huang and Jinyi Shen and Fan Yang and Li Shang and Zhaori Bi and Changhao Yan and Wenchuang Hu and Dian Zhou and Xuan Zeng", title = "Variation-aware Analog Circuit Design via Contextual Modeling and Robust Optimization", journal = j-TODAES, volume = "31", number = "2", pages = "26:1--26:19", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3774879", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Robust analog circuit design is becoming increasingly challenging due to process, voltage, and temperature (PVT) variations at advanced technology nodes. In this article, we formulate analog circuit synthesis as a robust optimization problem, and propose \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "26", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2026:ENO, author = "Yu-Shan Lin and Yung-Chih Chen and Li-Cheng Zheng and Kuei-Chung Chen", title = "Enhanced 2nd-Order Threshold Function Identification with Application to 2nd-Order Threshold Logic Network Synthesis", journal = j-TODAES, volume = "31", number = "2", pages = "27:1--27:21", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3776580", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Threshold logic is an alternative representation of conventional Boolean logic and re-attracted researchers' attention in recent years. Previous works have demonstrated that a 2$^{nd}$-order threshold logic gate (2-TLG) could have a lower area cost than a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "27", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhu:2026:LOS, author = "Kaixiang Zhu and Zhen Li and Jide Zhang and Wai-Shing Luk and Lingli Wang", title = "{LOFMPL}: an Open-source Logic Optimization Framework with {MFFC-based} Hypergraph Partition and Reinforcement Learning for Large Circuits", journal = j-TODAES, volume = "31", number = "2", pages = "28:1--28:34", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3776740", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As the size of a circuit increases, previous reinforcement learning (RL) approaches struggle to effectively explore the logic optimization sequences of large-scale Boolean networks due to the long runtime overhead with poor optimization results. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "28", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Srivastava:2026:BBC, author = "Ankur Srivastava and Arijit Nath", title = "Breaking The Buffer: Covert Channel Attacks by Overrunning Buffer and Countermeasures", journal = j-TODAES, volume = "31", number = "2", pages = "29:1--29:23", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3777554", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The large capacity and energy-efficient shared Last Level Caches play a vital role in improving system performance with a relatively low power consumption in the modern processors. Unfortunately, this highly utilized shared resource often turns out to be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "29", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Schafer:2026:HLS, author = "Benjamin Carrion Schafer and Chaitali Sathe", title = "From High-Level Synthesis Lite to High-Level Synthesis Full: Unlocking {HLS} tool Limitations", journal = j-TODAES, volume = "31", number = "2", pages = "30:1--30:17", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3777908", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Many Software (SW) vendors limit the functionality of their product based on the version purchased. This trend has also carried over to Electronic Design Automation (EDA). For example, Field-Programmable Gate Array (FPGA) vendors make their Lite versions \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Kumar:2026:ESS, author = "Pardeep Kumar and Gaurav Kumar and Mahendra Kumar Gurve and Yamuna Prasad and Satyadev Ahlawat", title = "On Enhancing the Security of Streaming Scan Network through Dual-Functional {TDR}", journal = j-TODAES, volume = "31", number = "2", pages = "31:1--31:20", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779438", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increasing complexity and core count of modern System-on-Chips (SoCs) have raised significant concerns regarding test time and test data volume. Despite advancements in SoC design, the physical size of SoCs remains relatively constant, imposing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "31", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Palesi:2026:ARC, author = "Maurizio Palesi and Enrico Russo and Giuseppe Ascia and Hamaad Rafique and Davide Patti and Vincenzo Catania and Sergi Abadal and Abhijit Das and Pau Escofet and Eduard Alarcon and Carmen Almudever", title = "Assessing the Role of Communication in Modular Multi-Core Quantum Systems", journal = j-TODAES, volume = "31", number = "2", pages = "32:1--32:40", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779440", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The scalability of quantum computing is constrained by the physical and architectural limitations of monolithic quantum processors. Modular multi-core quantum architectures, which interconnect multiple quantum cores (QCs) via classical and quantum-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "32", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2026:MTC, author = "Irith Pomeranz", title = "Multicycle Tests with Compressed Primary Input Sequences and an Extended Primary Input Scan Chain", journal = j-TODAES, volume = "31", number = "2", pages = "33:1--33:15", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779068", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Test data compression supports reductions in test data volume and test application time. The use of multicycle scan-based tests with several functional capture cycles between scan operations supports test compaction beyond that possible with single-cycle \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "33", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lei:2026:EET, author = "Runquan Lei and Lang Feng and Zetao Zhang and Xin Gao and Yang Liu", title = "{ETOF}: an Efficient Transistor-Level Optimization Flow for Large-Scale {CMOS} Circuits", journal = j-TODAES, volume = "31", number = "2", pages = "34:1--34:21", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779434", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The computing efficiency of digital VLSIs has increased with advancements in manufacturing, but this progress is slowing due to post-Moore physical limits. To improve efficiency, better standard cell-level synthesis can reduce transistor counts for low-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "34", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Qiao:2026:HWS, author = "Yibo Qiao and Weiping Xie and Shunyuan Lou and Qian Jin and Lichao Zeng and Yining Chen and Qi Sun and Cheng Zhuo", title = "A Hybrid Weakly Supervised Approach for enhanced High-Precision {SEM} Defect Segmentation in Nanoscale Semiconductor Manufacturing", journal = j-TODAES, volume = "31", number = "2", pages = "35:1--35:22", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3780101", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Accurate analysis of nanoscale defects in semiconductor manufacturing is essential for optimizing yield and reliability. Existing methods heavily rely on large, labor-intensive datasets and primarily focus on macroscopic defect distributions rather than \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "35", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Yang:2026:PLR, author = "Menglin Yang and Jian Hu", title = "Precise Learning-to-Rank Bug Localization Using Multi-Feature Fusion for Hardware Code", journal = j-TODAES, volume = "31", number = "2", pages = "36:1--36:25", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779450", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Verification is crucial in hardware code development as it ensures the design behaves as intended under all possible conditions, meeting its functional and performance specifications. Bug localization plays a vital role in the verification of hardware \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "36", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sun:2026:IDS, author = "Minqing Sun and Lanqi Ding and Huifeng Zhu and Yier Jin and An Zou", title = "From {ICs} to Device: a Survey on Hardware Tampering Detection via Power Delivery Network and Signal Trace", journal = j-TODAES, volume = "31", number = "2", pages = "37:1--37:31", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779441", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Protecting the integrity of hardware against invasive tampering within the supply chain is critical to ensuring overall system resilience, reliability, and trustworthiness. As electronic systems become increasingly complex and globally distributed, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "37", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2026:SML, author = "Miao Liu and Liwei Ni and Junfeng Liu and Xingyu Meng and Rui Wang and Xiaoze Lin and Xinhua Lai and Xingquan Li and Jungang Xu", title = "A Survey of Machine Learning Approaches in Logic Synthesis", journal = j-TODAES, volume = "31", number = "2", pages = "38:1--38:43", month = mar, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3785362", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Sat Feb 7 06:58:39 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The increasing complexity of digital circuits and the limitations of heuristic methods have led to growing interest in applying Machine Learning (ML) to Logic Synthesis (LS). ML provides a promising paradigm shift by implementing automated, scalable, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "38", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Held:2026:ISI, author = "Stephan Held and Gracieli Posser and Iris Hui-Ru Jiang and David Chinnery", title = "Introduction to the Special Issue on Advances in Physical Design Automation", journal = j-TODAES, volume = "31", number = "3", pages = "39:1--39:3", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3770739", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Wu:2026:EEO, author = "Bing-Huan Wu and Wai-Kei Mak", title = "An Efficient and Effective Optimization Algorithm for Buffer and Splitter Insertion in {AQFP} Circuits", journal = j-TODAES, volume = "31", number = "3", pages = "40:1--40:23", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3721130", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Adiabatic quantum-flux parametron (AQFP) is a superconducting technology with extremely low power consumption compared to traditional CMOS structures. Since AQFP logic gates are all clocked by AC current, extra buffer cells are required to balance the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "40", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Huang:2026:OMC, author = "Da-Wei Huang and Ying-Jie Jiang and Shao-Yun Fang", title = "Optimal Mixed-Cell-Height Detailed Placement with Discrete Spacing Costs", journal = j-TODAES, volume = "31", number = "3", pages = "41:1--41:25", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3742429", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Mixed-cell-height VLSI circuits are widely used to meet various design requirements. Due to design for manufacturability (DFM) considerations such as layout-dependent effects (LDEs), drain-to-drain abutment (DDA), and pattern coloring for multiple \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "41", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2026:TAC, author = "Yu-Min Lee and Hong-Wen Chiou and Jia-Hao Jiang", title = "Thermal-Aware Chiplet Placement for {2.5D} {ICs} with Sequence Pair Based Tree", journal = j-TODAES, volume = "31", number = "3", pages = "42:1--42:20", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3716893", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This work develops an efficient thermal-aware chiplet placer with sequence-pair representation. It provides wirelength-driven placement and thermal-aware placement. Its wirelength-driven option combines the sequence-pair based tree, a parallel branch-and-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "42", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2026:IAA, author = "Jai-Ming Lin and Jia-Ting Tsai and Hsin-Lin Chen", title = "Innovative Approaches to Addressing Challenges in {$3$D} Macro Placement", journal = j-TODAES, volume = "31", number = "3", pages = "43:1--43:26", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3767154", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Macro placement is a critical step in 3D integrated circuit (IC) designs due to the challenges posed by the large size of macros and the presence of obstacles in certain areas. Previous research in 3D placement often simplifies macro validation based on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "43", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lai:2026:ICL, author = "Xinhua Lai and Miao Liu and Xingquan Li and Yihang Qiu and Shijian Chen and Xinhao Li and Jungang Xu", title = "{iPO}: Constant Liar Parameter Optimization for Placement with Representation and Transfer Learning", journal = j-TODAES, volume = "31", number = "3", pages = "44:1--44:29", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3747292", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Placement is a critical and time-consuming step in very-large-scale integration (VLSI) design flow. As placement methods continue to be researched, they introduce more parameters, making current methods for configuring parameters heavily reliant on human \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "44", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2026:AKU, author = "He Liu and Zhisheng Zeng and Simin Tao and Zhipeng Huang and Yifan Li and Biwei Xie and Wei Gao and Xingquan Li", title = "{AiTPO}: {KAN-UNet} Heterogeneous Network for Timing Prediction and Optimization at Global Routing", journal = j-TODAES, volume = "31", number = "3", pages = "45:1--45:28", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735639", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Routing is a critical stage in achieving timing closure in integrated circuit design. Due to the time-consuming flow of detailed routing (DR), the lack of accurate routing information, and the impact of congestion during global routing (GR), rapidly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "45", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chuang:2026:CBM, author = "Ming-Yen Chuang and Yu-En Lin and Yi-Yu Liu", title = "Clustered-based Multi-pin Substrate Routing Optimization for Fine-Pitch Ball Grid Array", journal = j-TODAES, volume = "31", number = "3", pages = "46:1--46:24", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3737285", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As an important intermediate between integrated circuits (ICs) and the printed circuit board (PCB), the routing in the package substrate plays a crucial role in the efficiency and accuracy of signal and power transmission. While numerous research efforts \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "46", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Zhuang:2026:ARL, author = "Zhen Zhuang and Weishiun Hung and Md Arafat Kabir and Yarui Peng and Tsung-Yi Ho", title = "Adaptive Redistribution Layer Routing for Chiplet-Package Co-Design in {2.5D} System", journal = j-TODAES, volume = "31", number = "3", pages = "47:1--47:18", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3723043", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "2.5D packaging has become a popular alternative to integrate advanced logic and memory chiplets for high-performance computing and artificial intelligence systems. In the conventional design flow, chiplets and packages are independently designed and then \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "47", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liu:2026:RAL, author = "Siting Liu and Jieya Zhou and Jiaxi Jiang and Zhuolun He and Ziyi Wang and Yibo Lin and Bei Yu and Martin Wong", title = "Routing-aware Legal Hybrid Bonding Terminal Assignment for {$3$D} Face-to-Face Stacked {ICs}", journal = j-TODAES, volume = "31", number = "3", pages = "48:1--48:20", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3721131", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Face-to-face (F2F) stacked three-dimensional (3D) IC is a promising alternative for scaling beyond Moore's Law. In F2F 3D ICs, dies are connected through bonding terminals whose positions can significantly impact routing performance. Further, there exists \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "48", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Liang:2026:TTH, author = "Jun-Wei Liang and Iris Hui-Ru Jiang", title = "{TriHOT}: Triangular and Hexagonal Norm Based Timing-Driven Optical Routing with Wavelength Division Multiplexing", journal = j-TODAES, volume = "31", number = "3", pages = "49:1--49:20", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3725888", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "As semiconductor technology continues scaling, interconnect delay becomes a major bottleneck for circuit performance. On-chip optical interconnect with wavelength division multiplexing (WDM) is a promising alternative due to its high speed, broad \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "49", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Hsiao:2026:HRL, author = "Hao-Hsiang Hsiao and Yi-Chen Lu and Pruek Vanna-Iampikul and Sung Kyu Lim", title = "A Hybrid Reinforcement Learning Framework for Efficient Physical Design Parameter Tuning", journal = j-TODAES, volume = "31", number = "3", pages = "50:1--50:27", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3779423", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Traditional Design Space Exploration (DSE) methods in Physical Design (PD), such as Bayesian Optimization (BO) and Ant Colony Optimization (ACO), as well as state-of-the-art commercial tools like Synopsys DSO.ai, typically treat the design flow as a black \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "50", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2026:GCL, author = "Kaichang Chen and Georges Gielen", title = "A Generalized Constraint Learning and Transfer Methodology with Net-First Graph Neural Network and Selective Topological Search for Hierarchical Analog\slash Mixed-Signal Circuit Layout Synthesis", journal = j-TODAES, volume = "31", number = "3", pages = "51:1--51:32", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3722556", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Achieving efficient and effective automation in hierarchical analog/mixed-signal (AMS) integrated circuit layout synthesis remains a significant challenge in the electronic design automation domain, due to the vast design space and diverse layout \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "51", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lee:2026:GKM, author = "Wan Luan Lee and Dian-Lun Lin and Shui Jiang and Cheng-Hsiang Chiu and Yibo Lin and Bei Yu and Tsung-Yi Ho and Tsung-Wei Huang", title = "{G-kway}: Multilevel {GPU}-Accelerated $k$-way Graph Partitioner using Task Graph Parallelism", journal = j-TODAES, volume = "31", number = "3", pages = "52:1--52:26", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3734522", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Graph partitioning is important for the design of many CAD algorithms. However, as the graph size continues to grow, graph partitioning becomes increasingly time-consuming. Recent research has introduced parallel graph partitioners using either multi-core \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "52", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Chen:2026:GEH, author = "Magi Chen and Ting-Chi Wang", title = "{GenPart 2.0}: Enhanced Hypergraph Partitioning with Vertex Weight Handling using a Generative Model", journal = j-TODAES, volume = "31", number = "3", pages = "53:1--53:24", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3735133", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "This article introduces GenPart 2.0, an enhanced version of the hypergraph partitioner GenPart. While GenPart was limited to handling only unit vertex weights, GenPart 2.0 extends capabilities to include varying vertex weights. This extension is achieved \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "53", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Lin:2026:LDP, author = "Yi-Ting Lin and Meng Lian and Hu Peng and Bernhard Wolfrum and Tsun-Ming Tseng and Iris Hui-Ru Jiang", title = "Layout Decomposition and Printing Time Optimization for Inkjet-Printed Electronics", journal = j-TODAES, volume = "31", number = "3", pages = "54:1--54:16", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3721132", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Inkjet-printed electronics is a low-cost option for large-scale production. To avoid manufacturing defects, recent research has considered design constraints, such as Laplace and proximity conflicts, decomposed the layouts into different layers, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "54", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Cheng:2026:EPI, author = "Jiawen Cheng and Lingjie Li and Zhiqiang Liu and Kan Liu and Shan Shen and Zhenya Zhou and Wenjian Yu", title = "Efficient Parallel {ILU} Factorization and Forward\slash Backward Substitution with Application to Large-Scale Nonlinear Circuit Simulation", journal = j-TODAES, volume = "31", number = "3", pages = "55:1--55:24", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3787469", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Efficient techniques are proposed for parallel incomplete LU (ILU) factorization and forward/backward substitution, for the sparse matrices with the same sparsity pattern. These parallel algorithms are then used as a preconditioner for the generalized \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "55", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Pomeranz:2026:AAS, author = "Irith Pomeranz", title = "Aging Aware Steepening of the Fault Coverage Curve of a Scan Based Transition Fault Test Set", journal = j-TODAES, volume = "31", number = "3", pages = "56:1--56:24", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3786348", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "Chip aging may result in hardware defects whose likelihood of occurrence depends on the layout and functional workload at the defect site. In-field testing is important for the detection of defects that occur because of aging. In-field test periods have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "56", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{He:2026:THS, author = "Yiran He and Haihua Shen and Zirui Jiang and Shan Li and Xiao Ji and Huawei Li", title = "{Twins}: Hardware Similarity Evaluation Framework Using Graph Neural Network", journal = j-TODAES, volume = "31", number = "3", pages = "57:1--57:25", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3786598", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The globalization of the integrated circuit supply chain has introduced untrustworthy entities at various stages, arousing increasing attention to hardware security research from both academia and industry. Some tasks in hardware security research require \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "57", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Sun:2026:GCR, author = "Rong Sun and Qi Xu and Song Chen and Yi Kang and Bei Yu", title = "{GoSteiner}: Constructing Rectilinear {Steiner} Minimum Tree on Directed Graph", journal = j-TODAES, volume = "31", number = "3", pages = "58:1--58:20", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3788283", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The Rectilinear Steiner Minimum Tree (RSMT) problem is a key issue in the back-end physical design of integrated circuits (ICs), which directly affects the quality of the routing. In this work, we formulate the RSMT problem as a sequential decision \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "58", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Maisto:2026:SVF, author = "Vincenzo Maisto and Stefano Mercogliano and Manuel Maddaluno and Alessandro Cilardo", title = "The {Simply-V Framework}: an Extensible {RISC-V} Reconfigurable Soft-{SoC} for Open Research and Fast Prototyping", journal = j-TODAES, volume = "31", number = "3", pages = "59:1--59:32", month = may, year = "2026", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3787500", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Mar 17 15:38:52 MDT 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "The recent rise of open hardware, mainly driven by the momentum of the RISC-V ecosystem, has sparked significant innovation in the development of open-source CPUs and SoCs. This movement has enabled broad exploration across academia and industry, fostering collaboration and reuse. However, the diversity and openness that empower this space also introduce challenges: academic projects often fall short of industry-grade robustness, and meaningful comparison across hardware platforms remains difficult due to ad hoc infrastructures, lack of standardization, and simulation limitations. To ease the work of researchers some key challenges must be faced in open hardware development: platforms' reconfigurability, ease of integration of third-party IPs, and support for technological heterogeneity. A core problem lies in validating and comparing CPUs and SoC components across varying protocols, toolchains, and design languages, especially in real hardware settings. To address these issues, we present Simply-V, a flexible FPGA-based soft-SoC platform designed for rapid prototyping and open hardware research. Simply-V enables plug-and-play support for multiple CPUs, IPs and accelerators, offers structured configurability across embedded and high-performance profiles, and supports the integration of both RTL and HLS-based components. Capabilities such as a high-level configuration flow, frequency scaling, and cross-device portability make our platform a powerful tool to simplify open hardware research. We demonstrate the SoC generator's capabilities through multi-task FreeRTOS examples, platform-fair CPU benchmarking and the iterative development of HLS-designed convolutional accelerators. Moreover, we validate multi-accelerator and multi-CPU scalability and compare with the state-of-the-art SoC generators. Our platform showcases simplified fast prototyping, configurability, scalability and heterogeneous IP support on real hardware. Simply-V is openly available at \url{https://github.com/HiSA-Team/Simply-V}", acknowledgement = ack-nhfb, ajournal = "ACM Transact. Des. Automat. Electron. Syst.", articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", }