%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.70", %%% date = "14 January 2026", %%% time = "06:38:27 MDT", %%% filename = "jetc.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "52657 28076 146617 1399572", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "ACM Journal on Emerging Technologies in %%% Computing Systems (JETC); bibliography; %%% BibTeX", %%% license = "public domain", %%% supported = "no", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% the journal ACM Journal on Emerging %%% Technologies in Computing Systems (JETC) %%% (CODEN unknown, ISSN: 1550-4832 (print), %%% 1550-4840 (electronic)), for 2005--date. %%% %%% Publication began with volume 1, number 1, %%% in March 2005. The journal appears %%% quarterly. %%% %%% The journal has a World-Wide Web site at: %%% %%% http://www.acm.org/pubs/jetc %%% %%% Tables-of-contents of all issues are %%% available at: %%% %%% http://www.acm.org/pubs/contents/journals/jetc/ %%% http://portal.acm.org/browse_dl.cfm?idx=J967 %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% At version 1.70, the COMPLETE journal %%% coverage looked like this: %%% %%% 2005 ( 7) 2013 ( 30) 2021 ( 63) %%% 2006 ( 11) 2014 ( 57) 2022 ( 81) %%% 2007 ( 15) 2015 ( 42) 2023 ( 34) %%% 2008 ( 24) 2016 ( 27) 2024 ( 15) %%% 2009 ( 19) 2017 ( 50) 2025 ( 14) %%% 2010 ( 15) 2018 ( 48) 2026 ( 10) %%% 2011 ( 20) 2019 ( 39) %%% 2012 ( 34) 2020 ( 44) %%% %%% Article: 699 %%% %%% Total entries: 699 %%% %%% Data for this bibliography was derived from %%% data at the ACM Web site. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. %%% %%% The bibsource keys in the bibliography %%% entries below indicate the data sources. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% Spelling has been verified with the UNIX %%% spell and GNU ispell programs using the %%% exception dictionary stored in the %%% companion file with extension .sok. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{"\input bibnames.sty" # "\ifx \undefined \circled \def \circled #1{(#1)} \fi" # "\ifx \undefined \pkg \def \pkg #1{{{\tt #1}}} \fi" # "\ifx \undefined \reg \def \reg {\circled{R}} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-JETC = "ACM Journal on Emerging Technologies in Computing Systems (JETC)"} %%% ==================================================================== %%% Bibliography entries: @Article{Irwin:2005:E, author = "Mary Jane Irwin and Vijaykrishnan Narayanan", title = "Editorial", journal = j-JETC, volume = "1", number = "1", pages = "1--6", month = apr, year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Sep 17 15:29:54 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Narendra:2005:CDC, author = "Siva G. Narendra", title = "Challenges and design choices in nanoscale {CMOS}", journal = j-JETC, volume = "1", number = "1", pages = "7--49", month = apr, year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Sep 17 15:29:54 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lim:2005:PPB, author = "Sung Kyu Lim and Ramprasad Ravichandran and Mike Niemier", title = "Partitioning and placement for buildable {QCA} circuits", journal = j-JETC, volume = "1", number = "1", pages = "50--72", month = apr, year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Sep 17 15:29:54 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Gojman:2005:EDS, author = "Benjamin Gojman and Eric Rachlin and John E. Savage", title = "Evaluation of design strategies for stochastically assembled nanoarray memories", journal = j-JETC, volume = "1", number = "2", pages = "73--108", year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Sep 17 15:29:54 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Dehon:2005:NBP, author = "Andr{\'e} Dehon", title = "Nanowire-based programmable architectures", journal = j-JETC, volume = "1", number = "2", pages = "109--162", year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Sep 17 15:29:54 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Huang:2005:TBQ, author = "J. Huang and M. Momenzadeh and L. Schiano and M. Ottavi and F. Lombardi", title = "Tile-based {QCA} design using majority-like logic primitives", journal = j-JETC, volume = "1", number = "3", pages = "163--185", month = oct, year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Mar 7 16:16:02 MST 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chakrabarty:2005:DAM, author = "Krishnendu Chakrabarty and Jun Zeng", title = "Design automation for microfluidics-based biochips", journal = j-JETC, volume = "1", number = "3", pages = "186--223", month = oct, year = "2005", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Mar 7 16:16:02 MST 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Patwardhan:2006:NNS, author = "Jaidev P. Patwardhan and Chris Dwyer and Alvin R. Lebeck and Daniel J. Sorin", title = "{NANA}: a nano-scale active network architecture", journal = j-JETC, volume = "2", number = "1", pages = "1--30", month = jan, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 28 07:08:02 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{VanMeter:2006:AIQ, author = "Rodney {Van Meter} and Mark Oskin", title = "Architectural implications of quantum computing technologies", journal = j-JETC, volume = "2", number = "1", pages = "31--63", month = jan, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 28 07:08:02 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xie:2006:DSE, author = "Yuan Xie and Gabriel H. Loh and Bryan Black and Kerry Bernstein", title = "Design space exploration for {$3$D} architectures", journal = j-JETC, volume = "2", number = "2", pages = "65--103", month = apr, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 28 07:08:02 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Su:2006:YER, author = "Fei Su and Krishnendu Chakrabarty", title = "Yield enhancement of reconfigurable microfluidics-based biochips using interstitial redundancy", journal = j-JETC, volume = "2", number = "2", pages = "104--128", month = apr, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 28 07:08:02 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Savage:2006:RAN, author = "John E. Savage and Eric Rachlin and Andr{\'e} DeHon and Charles M. Lieber and Yue Wu", title = "Radial addressing of nanowires", journal = j-JETC, volume = "2", number = "2", pages = "129--154", month = apr, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 28 07:08:02 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Massoud:2006:MDC, author = "Yehia Massoud and Arthur Nieuwoudt", title = "Modeling and design challenges and solutions for carbon nanotube-based interconnect in future high performance integrated circuits", journal = j-JETC, volume = "2", number = "3", pages = "155--196", month = jul, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Nov 16 18:25:43 MST 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tahoori:2006:AID, author = "Mehdi B. Tahoori", title = "Application-independent defect tolerance of reconfigurable nanoarchitectures", journal = j-JETC, volume = "2", number = "3", pages = "197--218", month = jul, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Nov 16 18:25:43 MST 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Datta:2006:ADF, author = "Kushal Datta and Arindam Mukherjee and Arun Ravindran", title = "Automated design flow for diode-based nanofabrics", journal = j-JETC, volume = "2", number = "3", pages = "219--241", month = jul, year = "2006", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Nov 16 18:25:43 MST 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ottavi:2006:HHE, author = "Marco Ottavi and Luca Schiano and Fabrizio Lombardi and Douglas Tougaw", title = "{HDLQ}: {A HDL} environment for {QCA} design", journal = j-JETC, volume = "2", number = "4", pages = "243--261", month = oct, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1216396.1216397", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:17 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Emerging technologies have attracted a substantial interest in overcoming the physical limitations of CMOS as projected at the end of the Technology Roadmap; among these technologies, quantum-dot cellular automata (QCA) relies on different and novel paradigms to implement dense, low power circuits and systems for high-performance computing. As applicable to existing technologies, a hierarchical process can be utilized to facilitate the design of QCA circuits. Tools and methodologies both at system and physical levels are required to support all design phases. This article presents an HDL model to describe QCA ``devices'' (also referred elsewhere in the technical literature as building blocks, i.e., majority voter, inverter, wire, crossover) and facilitate the evaluation of their design. This tool, referred to as HDLQ, allows a designer to verify the logic characteristics of a QCA system, while supporting within a design environment different operational mechanisms (such as fault injection) and the unique features of QCA (such as bidirectionality and timing/clocking partitioning). The applicability of this design environment to various memory circuits for logic and timing verification is presented in detail. Various defective conditions for kinks due to thermodynamic effects and permanent faults due to manufacturing defects are considered for injection.", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "CAD; fault injection; HDL; QCA", } @Article{Davids:2006:MFD, author = "Daniel Davids and Siddhartha Datta and Arindam Mukherjee and Bharat Joshi and Arun Ravindran", title = "Multiple fault diagnosis in digital microfluidic biochips", journal = j-JETC, volume = "2", number = "4", pages = "262--276", month = oct, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1216396.1216398", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:17 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidics-based biochips consist of microfluidic arrays on rigid substrates through which, movement of fluids is tightly controlled to facilitate biological reactions. Biochips are soon expected to revolutionize biosensing, clinical diagnostics, and drug discovery. Critical to the deployment of biochips in such diverse areas is the dependability of these systems. Thus, robust testing techniques are required to ensure an adequate level of system dependability. Due to the underlying mixed technology and energy domains, such biochips exhibit unique failure mechanisms and defects. In this article we present a highly effective fault diagnosis strategy that uses a single source and sink to detect and locate multiple faults in a microfluidic array, without flooding the array, a problem that has hampered realistic implementations of all existing strategies. The strategy renders itself well for a built-in self-test that could drastically reduce the operating cost of microfluidic biochips. It can be used during both the manufacturing phase of the biochip, as well as field operation. Furthermore, the algorithm can pinpoint the actual fault, as opposed to merely the faulty regions that are typically identified by strategies proposed in the literature. Also, analytical results suggest that it is an effective strategy that can be used to design highly dependable biochip systems.", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "droplet flooding; faults tolerance; Microfluidic biochip; multiple fault; testing", } @Article{Prasad:2006:DSA, author = "Aditya K. Prasad and Vivek V. Shende and Igor L. Markov and John P. Hayes and Ketan N. Patel", title = "Data structures and algorithms for simplifying reversible circuits", journal = j-JETC, volume = "2", number = "4", pages = "277--293", month = oct, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1216396.1216399", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:17 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reversible logic is motivated by low-power design, quantum circuits, and nanotechnology. We develop a compact representation of small reversible circuits to generate and store optimal circuits for all 40,320 three-input reversible functions, and millions of four-input circuits. This allows implementing a function optimally in constant time for use in the peephole optimization of larger circuits produced by existing techniques, and guarantees that every three-bit subcircuit is optimal. To generate subcircuits, we use a graph-based data structure and algorithms for circuit restructuring. Finally, we demonstrate a suboptimal circuit for which peephole optimization fails.", acknowledgement = ack-nhfb, fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "circuit libraries; Circuit simplification; optimal subcircuit", } @Article{Zhao:2007:PTM, author = "Wei Zhao and Yu Cao", title = "Predictive technology model for nano-{CMOS} design exploration", journal = j-JETC, volume = "3", number = "1", pages = "1:1--1:??", month = apr, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1229175.1229176", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:25 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A predictive MOSFET model is critical for early circuit design research. In this work, a new generation of Predictive Technology Model (PTM) is developed, covering emerging physical effects and alternative structures, such as the double-gate device (i.e., FinFET). Based on physical models and early stage silicon data, PTM of bulk and double-gate devices are successfully generated from 130nm to 32nm technology nodes, with effective channel length down to 13nm. By tuning only ten primary parameters, PTM can be easily customized to cover a wide range of process uncertainties. The accuracy of PTM predictions is comprehensively verified with published silicon data: the error of the current is below 10\\% for both NMOS and PMOS. Furthermore, the new PTM correctly captures process sensitivities in the nanometer regime. PTM is available online at http://www.eas.asu.edu/~ptm.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "early design exploration; FinFET; predictive modeling; process variations; Technology scaling", } @Article{Schulhof:2007:SRC, author = "Gabriel Schulhof and Konrad Walus and Graham A. Jullien", title = "Simulation of random cell displacements in {QCA}", journal = j-JETC, volume = "3", number = "1", pages = "2:1--2:??", month = apr, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1229175.1229177", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:25 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We analyze the behavior of quantum-dot cellular automata (QCA) building blocks in the presence of random cell displacements. The QCA cells are modeled using the coherence vector description and simulated using QCADesigner. We evaluate various fundamental circuits: the wire, the inverter, the majority gate, and the two-wire crossing approaches: the coplanar crossover and the multilayer crossover. Our results show that different building blocks have different displacement tolerances. The coplanar crossover and inverter perform the weakest. The wire is the most robust. We have found displacement tolerances to be a function of circuit layout and geometry rather than cell size.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "fabrication variances; fault tolerance; QCA; Quantum-dot cellular automata", } @Article{Rose:2007:DCM, author = "Garrett S. Rose and Yuxing Yao and James M. Tour and Adam C. Cabe and Nadine Gergel-Hackett and Nabanita Majumdar and John C. Bean and Lloyd R. Harriott and Mircea R. Stan", title = "Designing {CMOS}\slash molecular memories while considering device parameter variations", journal = j-JETC, volume = "3", number = "1", pages = "3:1--3:??", month = apr, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1229175.1229178", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:25 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In recent years, many advances have been made in the development of molecular scale devices. Experimental data shows that these devices have potential for use in both memory and logic. This article describes the challenges faced in building crossbar array-based molecular memory and develops a methodology to optimize molecular scale architectures based on experimental device data taken at room temperature. In particular, issues in reading and writing such as memory using CMOS are discussed, and a solution is introduced for easily reading device conductivity states (typically characterized by very small currents). Additionally, a metric is derived to determine the voltages for writing to the crossbar array. The proposed memory design is also simulated with consideration to device parameter variations. Thus, the results presented here shed light on important design choices to be made at multiple abstraction levels, from devices to architectures. Simulation results, incorporating experimental device data, are presented using Cadence Spectre.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "CMOS; molecular electronics; nanotechnology", } @Article{McKee:2007:ESI, author = "Sally A. McKee", title = "Editorial to special issue on reliable computing", journal = j-JETC, volume = "3", number = "2", pages = "4:1--4:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265950", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Eshaghian-Wilner:2007:SWN, author = "Mary M. Eshaghian-Wilner and Alex Khitun and Shiva Navab and Kang L. Wang", title = "The spin-wave nanoscale reconfigurable mesh and the labeling problem", journal = j-JETC, volume = "3", number = "2", pages = "5:1--5:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265951", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we present a nanoscale reconfigurable mesh which is interconnected by ferromagnetic spin-wave buses. In this architecture, unlike the traditional spin-based nano structures which transmit charge, waves are transmitted. As a result, the power consumption of the proposed modules can be low. This reconfigurable mesh, while requiring the same number of switches and buses as the standard reconfigurable mesh, is capable of simultaneously transmitting $N$ waves on each of the spin-wave buses. Because of this highly parallel feature, very fast and fault-tolerant algorithms can be designed. To illustrate the superior performance of the proposed spin-wave reconfigurable mesh, we present three fast labeling algorithms.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "image processing; nanoscale architectures; reconfigurable mesh; Spin waves", } @Article{Prodan:2007:DDE, author = "Lucian Prodan and Mihai Udrescu and Oana Boncalo and Mircea Vladutiu", title = "Design for dependability in emerging technologies", journal = j-JETC, volume = "3", number = "2", pages = "6:1--6:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265952", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As current microelectronics will reach their physical limits within the foreseeable future, emerging technologies may offer a solution for maintaining the trends to increase computing performance. Biologically-inspired and quantum computing represent two emerging technology vectors for novel computing architectures within nanoelectronics. However, potential benefits will come at the cost of increased device sensitivity to the surrounding environment. This article provides a dependability perspective over these technologies from a designer's standpoint. Maintaining or increasing the dependability of unconventional computational processes is discussed in two different contexts, a bio-inspired computing architecture (the Embryonics project) and a quantum computational architecture (the QUERIST project).", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "bio-inspired computing; bio-inspired digital design; Dependability; Embryonics; emerging technologies; evolvable hardware; fault-tolerance assessment; quantum computing; reliability", } @Article{Tyrrell:2007:ED, author = "Andy M. Tyrrell and Andrew J. Greensted", title = "Evolving dependability", journal = j-JETC, volume = "3", number = "2", pages = "7:1--7:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265953", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Evolvable hardware offers much for the future of complex systems design. Evolutionary techniques not only have the potential for larger solution space coverage, but when implemented on hardware, also allow system designs to adapt to changes in the environment, including failures in system components. This article reviews a number of novel techniques, all based in the field of bio-inspired systems, that provide varying degrees of dependability over and above standard designs. In particular, three different techniques are considered: using FPGAs and ideas from developmental biology to create designs that possess emergent fault-tolerant properties, using FPGAs and continuous evolution to circumvent faults as and when they occur, and, finally, we consider a novel ASIC designed and built with bio-inspired systems in mind.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "bio-inspired architectures; Evolutionary algorithms; fault tolerance; RISA architecture", } @Article{Sekanina:2007:EFR, author = "Luk{\'a}{\v{s}} Sekanina", title = "Evolutionary functional recovery in virtual reconfigurable circuits", journal = j-JETC, volume = "3", number = "2", pages = "8:1--8:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265954", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A virtual reconfigurable circuit (VRC) is a domain-specific reconfigurable device developed using an ordinary FPGA in order to easily implement evolvable hardware applications. While a fast partial runtime reconfiguration and application-specific programmable elements represent the main advantages of VRC, the main disadvantage of the VRC is the area consumed. This study describes experiments conducted to estimate how the use of VRC influences the dependability of FPGA-based evolvable systems. It is shown that these systems are not as sensitive to faults as their area-demanding implementations might suggest. An evolutionary algorithm is utilized to design fault tolerant circuits as well as to perform an automatic functional recovery when faults are detected in the configuration memory of the FPGA. All the experiments are performed on models of reconfigurable devices.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Dependability; evolutionary algorithms; evolvable hardware; FPGA", } @Article{Tempesti:2007:SRH, author = "Gianluca Tempesti and Daniel Mange and Pierre-Andre Mudry and Jo{\"e}l Rossier and Andre Stauffer", title = "Self-replicating hardware for reliability: {The Embryonics Project}", journal = j-JETC, volume = "3", number = "2", pages = "9:1--9:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265955", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The multicellular structure of biological organisms and the interpretation in each of their cells of a chemical program (the DNA string or genome ) is the source of inspiration for the Embryonics (embryonic electronics) project, whose final objective is the design of highly robust integrated circuits, endowed with properties usually associated with the living world: self-repair and self-replication. In this article, we provide an overview of our latest research in the domain of the self-replication of processing elements within a programmable logic substrate, a key prerequisite for achieving system-level fault tolerance in our bio-inspired approach.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Bio-inspired architectures; embryonic electronics; growth; hierarchical fault tolerance; self-repair; self-replication", } @Article{Patwardhan:2007:SOD, author = "Jaidev Patwardhan and Chris Dwyer and Alvin R. Lebeck", title = "A self-organizing defect tolerant {SIMD} architecture", journal = j-JETC, volume = "3", number = "2", pages = "10:1--10:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1265949.1265956", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:32 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The continual decrease in transistor size (through either scaled CMOS or emerging nanotechnologies) promises to usher in an era of tera to peta-scale integration but with increasing defects. Regardless of fabrication methodology (top-down or bottom-up), defect-tolerant architectures are necessary to exploit the full potential of future increased device densities.\par This article explores a defect-tolerant SIMD architecture (SOSA) that self-organizes a large number of limited capability nodes with high defect rates into SIMD processing elements. Simulation results show that SOSA matches or exceeds the performance of conventional systems for moderate to large problems, but with lower power density.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "bit-serial; data parallel; defect tolerance; DNA; nanocomputing; Self-organizing; SIMD", } @Article{Chakrabarty:2007:ESI, author = "Krishnendu Chakrabarty and Sachin Sapatnekar", title = "Editorial to special issue {DAC 2006}", journal = j-JETC, volume = "3", number = "3", pages = "11:1--11:??", month = nov, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1295231.1295232", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:49 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Paul:2007:PBC, author = "Bipul C. Paul and Shinobu Fujita and Masaki Okajima and Thomas Lee", title = "Prospect of ballistic {CNFET} in high performance applications: {Modeling} and analysis", journal = j-JETC, volume = "3", number = "3", pages = "12:1--12:??", month = nov, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1295231.1295233", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:49 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With the advent of carbon nanotube technology, evaluating circuit and system performance using these devices is becoming extremely important. In this article, we present a quasi-analytical device model for intrinsic ballistic CNFET, which can be used in any conventional circuit simulator like SPICE. This simple quasi-analytical model is effective in a wide variety of CNFET structures as well as for a wide range of operating conditions in the digital circuit application domain. We also provide insight into how the parasitic fringe capacitance in state-of-the-art CNFET geometries impacts the overall performance of CNFET circuits. We show that unless the device width can be significantly reduced, the effective gate capacitance of CNFET will be strongly dominated by the parasitic fringe capacitances, and the superior performance of intrinsic CNFET over silicon MOSFET cannot be achieved in circuit. We further show that unlike conventional MOSFET, nanotube FETs are significantly less sensitive to many process parameter variations due to their inherent device structures and cylindrical gate geometry.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Ballistic carbon nanotube FET (CNFET); circuit compatible model; circuit performance; parasitic capacitance; process variability", } @Article{Yuh:2007:PDT, author = "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang", title = "Placement of defect-tolerant digital microfluidic biochips using the {$T$}-tree formulation", journal = j-JETC, volume = "3", number = "3", pages = "13:1--13:??", month = nov, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1295231.1295234", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:49 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Droplet-based microfluidic biochips have recently gained much attention and are expected to revolutionize the biological laboratory procedures. As biochips are adopted for the complex procedures in molecular biology, its complexity is expected to increase due to the need of multiple and concurrent assays on a chip. In this article, we formulate the placement problem of digital microfluidic biochips with a tree-based topological representation, called $T$-tree. To the best knowledge of the authors, this is the first work that adopts a topological representation to solve the placement problem of digital microfluidic biochips. We also consider the defect tolerant issue to avoid to use defective cells due to fabrication. Experimental results demonstrate that our approach is more efficient and effective than the previous unified synthesis and placement framework.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "biochip; Microfluidics; placement", } @Article{Xu:2007:ADP, author = "Tao Xu and William L. Hwang and Fei Su and Krishnendu Chakrabarty", title = "Automated design of pin-constrained digital microfluidic biochips under droplet-interference constraints", journal = j-JETC, volume = "3", number = "3", pages = "14:1--14:??", month = nov, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1295231.1295235", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:49 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidics-based biochips, also referred to as lab-on-a-chip, are devices that integrate fluid-handling functions such as sample preparation, analysis, separation, and detection. This emerging technology combines electronics with biology to open new application areas such as point-of-care diagnosis, on-chip DNA analysis, and automated drug discovery. We propose a design automation method for pin-constrained biochips that manipulate nanoliter volumes of discrete droplets on a microfluidic array. In contrast to the direct-addressing scheme that has been studied thus far in the literature, we assign a small number of independent control pins to a large number of electrodes in the biochip, thereby reducing design complexity and product cost. The design procedure relies on a droplet-trace-based array partitioning scheme and an efficient pin assignment technique, referred to as the ``Connect-5 algorithm.'' The proposed method is evaluated using a set of multiplexed bioassays.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "biochips; droplets; microfluidics; Physical design automation", } @Article{Rad:2007:EAP, author = "Reza M. P. Rad and Mohammad Tehranipoor", title = "Evaluating area and performance of hybrid {FPGAs} with nanoscale clusters and {CMOS} routing", journal = j-JETC, volume = "3", number = "3", pages = "15:1--15:??", month = nov, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1295231.1295236", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:03:49 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Advances in fabrication technology of nanoscale devices such as nanowires, carbon nanotubes and molecular switches provide new opportunities for implementing cluster-based FPGAs. Extensive research is needed to evaluate area and performance of FPGAs made from these devices and compare with their CMOS counterparts. In this work, we propose a hybrid FPGA that uses nanoscale clusters with a functionality similar to the clusters of traditional CMOS FPGAs. The proposed cluster is constructed by a crossbar of nanowires and can be configured to implement the required LUTs and intracluster MUXes. A CMOS interface is also proposed to provide configuration and memory elements for the nanoscale cluster. In the proposed architecture, inter-cluster routing remains at CMOS scale. We have developed models for area and delay of clusters and interconnects of the proposed hybrid FPGA. FPGA tools are configured with these models and used to synthesize and configure the benchmark circuits onto the hybrid FPGAs with NiSi nanowires or nanotubes. Experiments are conducted to evaluate and compare area and performance of the hybrid FPGA and traditional CMOS FPGA (scaled to 22nm). Up to 82\\% area reduction was obtained from implementing MCNC benchmarks on the hybrid FPGA. Performance of the hybrid FPGA is shown to be close to that of CMOS FPGA.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "CMOS; FPGA; Nanotechnology; performance; reliability", } @Article{Su:2008:HLS, author = "Fei Su and Krishnendu Chakrabarty", title = "High-level synthesis of digital microfluidic biochips", journal = j-JETC, volume = "3", number = "4", pages = "1:1--1:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324177.1324178", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:00 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidic biochips offer a promising platform for massively parallel DNA analysis, automated drug discovery, and real-time biomolecular recognition. Current techniques for full-custom design of droplet-based ``digital'' biochips do not scale well for concurrent assays and for next-generation system-on-chip (SOC) designs that are expected to include microfluidic components. We propose a system design methodology that attempts to apply classical high-level synthesis techniques to the design of digital microfluidic biochips. We focus here on the problem of scheduling bioassay functions under resource constraints. We first develop an optimal scheduling strategy based on integer linear programming. However, because the scheduling problem is NP-complete, we also develop two heuristic techniques that scale well for large problem instances. A clinical diagnostic procedure, namely multiplexed in-vitro diagnostics on human physiological fluids, is first used to illustrate and evaluate the proposed method. Next, the synthesis approach is applied to a protein assay, which serves as a more complex bioassay application. The proposed synthesis approach is expected to reduce human effort and design cycle time, and it will facilitate the integration of microfluidic components with microelectronic components in next-generation SOCs.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "biochips; High-level synthesis; microfluidics; scheduling; system-on-chip", } @Article{VanMeter:2008:ADM, author = "Rodney {Van Meter} and W. J. Munro and Kae Nemoto and Kohei M. Itoh", title = "Arithmetic on a distributed-memory quantum multicomputer", journal = j-JETC, volume = "3", number = "4", pages = "2:1--2:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324177.1324179", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:00 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We evaluate the performance of quantum arithmetic algorithms run on a distributed quantum computer (a quantum multicomputer). We vary the node capacity and I/O capabilities, and the network topology. The tradeoff of choosing between gates executed remotely, through ``teleported gates'' on entangled pairs of qubits (telegate), versus exchanging the relevant qubits via quantum teleportation, then executing the algorithm using local gates (teledata), is examined. We show that the teledata approach performs better, and that carry-ripple adders perform well when the teleportation block is decomposed so that the key quantum operations can be parallelized. A node size of only a few logical qubits performs adequately provided that the nodes have two transceiver qubits. A linear network topology performs acceptably for a broad range of system sizes and performance parameters. We therefore recommend pursuing small, high-I/O bandwidth nodes and a simple network. Such a machine will run Shor's algorithm for factoring large numbers efficiently.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "quantum computer architecture; Quantum computing", } @Article{Ma:2008:MCE, author = "Xiaojun Ma and Jing Huang and Fabrizio Lombardi", title = "A model for computing and energy dissipation of molecular {QCA} devices and circuits", journal = j-JETC, volume = "3", number = "4", pages = "3:1--3:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324177.1324180", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:00 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum-dot Cellular Automata is an emerging technology that offers significant improvements over CMOS. Recently QCA has been advocated as a technology for implementing reversible computing. However, existing tools for QCA design and evaluation have limited capabilities. This paper presents a new mechanical-based model for computing in QCA. By avoiding a full quantum-thermodynamical calculation, it offers a classical view of the principles of QCA operation and can be used in evaluating energy dissipation for reversible computing. The proposed model is mechanically based and is applicable to six-dot (neutrally charged) QCA cells for molecular implementation. The mechanical model consists of a sleeve of changing shape; four electrically charged balls are connected by a stick that rotates around an axle in the sleeve. The sleeve acts as a clocking unit, while the angular position of the stick within the changing shape of the sleeve, identifies the phase for quasi-adiabatic switching. A thermodynamic analysis of the proposed model is presented. The behaviors of various QCA basic devices and circuits are analyzed using the proposed model. It is shown that the proposed model is capable of evaluating the energy consumption for reversible computing at device and circuit levels for molecular QCA implementation. As applicable to QCA, two clocking schemes are also analyzed for energy dissipation and performance (in terms of number of clocking zones).", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "emerging technology; QCA; reversible computing; thermodynamic analysis", } @Article{Chuang:2008:SRS, author = "Min-Lun Chuang and Chun-Yao Wang", title = "Synthesis of reversible sequential elements", journal = j-JETC, volume = "3", number = "4", pages = "4:1--4:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324177.1324181", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:00 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "To construct a reversible sequential circuit, reversible sequential elements are required. This work presents novel designs of reversible sequential elements such as the $D$ latch, $ J K$ latch, and $T$ latch. Based on these reversible latches, we construct the designs of the corresponding flip-flops. Then we further discuss the physical implementations of our designs based on electron waveguide $Y$-branch switch technology. Test costs, including test generation and test application, of reversible sequential circuits with these reversible flip-flops are also discussed. Compared with previous work, the implementation cost of our new designs, including the number of gates and the number of garbage outputs, is significantly reduced. The number of gates in our designs is 47.4\\% of the designs in previous work on average. The number of garbage outputs in our designs is 25\\% of the designs in previous work on average.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Reversible logic; sequential circuits; sequential elements", } @Article{Metodi:2008:HLI, author = "Tzvetan S. Metodi and Darshan D. Thaker and Andrew W. Cross and Isaac L. Chuang and Frederic T. Chong", title = "High-level interconnect model for the quantum logic array architecture", journal = j-JETC, volume = "4", number = "1", pages = "1:1--1:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1330521.1330522", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:09 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We summarize the main characteristics of the quantum logic array (QLA) architecture with a careful look at the key issues not described in the original conference publications: primarily, the teleportation-based logical interconnect. The design goal of the quantum logic array architecture is to illustrate a model for a large-scale quantum architecture that solves the primary challenges of system-level reliability and data distribution over large distances. The QLA's logical interconnect design, which employs the quantum repeater protocol, is in principle capable of supporting the communication requirements for applications as large as the factoring of a 2048-bit number using Shor's quantum factoring algorithm. Our physical-level assumptions and architectural component validations are based on the trapped ion technology for implementing quantum computing.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "fault tolerance; large scale; QLA; quantum; Quantum computer architecture design; teleportation", } @Article{Donald:2008:RLS, author = "James Donald and Niraj K. Jha", title = "Reversible logic synthesis with {Fredkin} and {Peres} gates", journal = j-JETC, volume = "4", number = "1", pages = "2:1--2:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1330521.1330523", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:09 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reversible logic has applications in low-power computing and quantum computing. Most reversible logic synthesis methods are tied to particular gate types, and cannot synthesize large functions. This article extends RMRLS, a reversible logic synthesis tool, to include additional gate types. While classic RMRLS can synthesize functions using NOT, CNOT, and $n$-bit Toffoli gates, our work details the inclusion of $n$-bit Fredkin and Peres gates. We find that these additional gates reduce the average gate count for three-variable functions from 6.10 to 4.56, and improve the synthesis results of many larger functions, both in terms of gate count and quantum cost.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Quantum computing; reversible logic", } @Article{Guiducci:2008:HPP, author = "Carlotta Guiducci and Christine Nardini", title = "High parallelism, portability, and broad accessibility: {Technologies} for genomics", journal = j-JETC, volume = "4", number = "1", pages = "3:1--3:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1330521.1330524", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:09 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Biotechnology is an area of great innovations that promises to have deep impact on everyday life thanks to profound changes in biology, medicine, and health care. This article will span from the description of the biochemical principles of molecular biology to the definition of the physics that supports the technology and to the devices and algorithms necessary to observe molecular events in a controlled, portable, and highly parallel manner. Throughout this discussion, constant attention will be given to the ultimate goals and applications of these innovations as well as to the related issues.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "biosensors; Genomics; microarrays; point-of-care diagnostics", } @Article{Narayanan:2008:E, author = "Vijaykrishnan Narayanan", title = "Editorial", journal = j-JETC, volume = "4", number = "2", pages = "4:1--4:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1350763.1350764", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bahar:2008:IJA, author = "R. Iris Bahar and Krishnendu Chakrabarty", title = "Introduction to joint {ACM JETC\slash TODAES} special issue on new, emerging, and specialized technologies", journal = j-JETC, volume = "4", number = "2", pages = "5:1--5:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1350763.1350765", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kuo:2008:MSA, author = "Shih-Hsien Kuo and Bruce Tidor and Jacob White", title = "A meshless, spectrally accurate, integral equation solver for molecular surface electrostatics", journal = j-JETC, volume = "4", number = "2", pages = "6:1--6:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1350763.1350766", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The need to determine electrostatic fields in domains bounded by molecular surfaces arises in a number of nanotechnology applications including: biomolecule design, carbon nanotube simulation, and molecular electron transport analysis. Molecular surfaces are typically smooth, without the corners common in electrical interconnect problems, but are often so geometrically complicated that numerical evaluation of the associated electrostatic fields is extremely time-consuming. In this paper we describe and demonstrate a meshless spectrally-accurate integral equation method that only requires a description of the molecular surface in the form of a collection of surface points. Our meshless method is a synthesis of techniques, suitably adapted, including: spherical harmonic surface interpolation, spectral-element-like integral equation discretization, integral desingularization via variable transformation, and matrix-implicit iterative matrix solution. The spectral accuracy of this combined method is verified using analytically solvable sphere and ellipsoid problems, and then its accuracy and efficiency is demonstrated numerically by solving capacitance and coupled Poisson\slash linearized Poisson--Boltzmann problems associated with a commonly used model of a molecule in solution. The results demonstrate that for a tolerance of 10$^{-3}$ this new approach reduces the number of unknowns by as much as two orders of magnitude over the more commonly used flat panel methods.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "integral equation; meshless; Poisson--Boltzmann equation; spectral method", } @Article{Deng:2008:CNT, author = "Jie Deng and Albert Lin and Gordon C. Wan and H.-S. Philip Wong", title = "Carbon nanotube transistor compact model for circuit design and performance optimization", journal = j-JETC, volume = "4", number = "2", pages = "7:1--7:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1350763.1350767", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this paper, we describe the development of the Stanford University Carbon Nanotube FET (CNFET) Compact Model. The CNFET Model is a circuit-compatible, compact model which describes enhancement-mode, CMOS-like CNFETs. It can be used to simulate both functionality and performance of large-scale circuits with hundreds of CNFETs. To produce realistic and relevant results, the model accounts for several practical non-idealities such as scattering in the near-ballistic channel, effects of the source/drain extension region, and charge-screening for multiple-nanotube CNFETs. The model also includes a full transcapacitance network for more accurate transient and AC results. The Stanford University CNFET Model is implemented in both HSPICE macro language and VerilogA. The VerilogA implementation shows speedups of roughly $ 7 \times $ -- $ 15 \times $ over HSPICE. Applications of the model suggest that $n$- and $p$-CNFETs will have $ 6 \times $ and $ 13 \times $ speed advantage over Si $n$- and $p$-MOSFETs respectively at the 32nm node, and that a CNT density of 250 CNTs/$ \mu $ m is ideal for multiple-nanotube gates. Such a compact CNFET model will be absolutely essential in ushering in the Design Era of CNFET circuits as carbon nanotube technology outgrows its ``science discovery'' phase.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "carbon nanotube FET; CNT; compact model; HSPICE; VerilogA", } @Article{Carmona:2008:FMA, author = "Josep Carmona and Jordi Cortadella and Yousuke Takada and Ferdinand Peper", title = "Formal methods for the analysis and synthesis of nanometer-scale cellular arrays", journal = j-JETC, volume = "4", number = "2", pages = "8:1--8:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1350763.1350768", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nanometer-scale structures suitable for computing have been investigated by several research groups in recent years. A common feature of these structures is their dynamic evolution through cascaded local interactions embedded on a discrete grid. Finding configurations capable of conducting computations is a task that often requires tedious experiments in laboratories. Formal methods --- though used extensively for the specification and verification of software and hardware computing systems --- are virtually unexplored with respect to computational structures at atomic scales. This paper presents a systematic approach toward the application of formal methods in this context, using techniques like abstraction, model-checking, and symbolic representations of states to explore and discover computational structures. The proposed techniques are applied to a system of CO molecules on a grid of Copper atoms, resulting in the design of a complete library of combinational logic gates based on this molecular system. The techniques are also applied on (more general) systems of cellular automata that employ an asynchronous mode of timing. The use of formal methods may narrow the gap between Physical Chemistry and Computer Science, allowing the description of interactions of nanometer scale systems on a level of abstraction suitable to devise computing devices.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "cellular array; model checking; Nanocomputing; symbolic techniques", } @Article{Crocker:2008:MQD, author = "Michael Crocker and Michael Niemier and X. Sharon Hu and Marya Lieberman", title = "Molecular {QCA} design with chemically reasonable constraints", journal = j-JETC, volume = "4", number = "2", pages = "9:1--9:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1350763.1350769", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 20 11:04:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article we examine the impacts of the fundamental constraints required for circuits and systems made from molecular Quantum-dot Cellular Automata (QCA) devices. Our design constraints are ``chemically reasonable'' in that we consider the characteristics and dimensions of devices and scaffoldings that have actually been fabricated. This work is a necessary first step for any work in QCA CAD, and can also help shape experiments in the physical sciences for emerging, nano-scale devices. Our work shows that QCA circuits, scaffoldings, substrates, and devices should all be considered simultaneously. Otherwise, there is a very real possibility that the devices and scaffoldings that are eventually manufactured will result in devices that only work in isolation. ``Chemically reasonable'' also means that expected manufacturing defects must be considered. In our simulations we introduce defects associated with self-assembled systems into various designs to begin to define manufacturing tolerances. This work is especially timely as experimentalists are beginning to work on merging experimental tracks that address devices and scaffolds --- and the end result should facilitate correct logical operations.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "defects; Nanotechnology; physical simulation; quantum-dot cellular automata", } @Article{Lebeck:2008:IDS, author = "Alvin R. Lebeck and Krishnendu Chakrabarty", title = "Introduction to {DAC 2007} special section", journal = j-JETC, volume = "4", number = "3", pages = "10:1--10:??", month = aug, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1389089.1389090", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Sep 4 14:23:10 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2008:IDR, author = "Tao Xu and Krishnendu Chakrabarty", title = "Integrated droplet routing and defect tolerance in the synthesis of digital microfluidic biochips", journal = j-JETC, volume = "4", number = "3", pages = "11:1--11:??", month = aug, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1389089.1389091", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Sep 4 14:23:10 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidic biochips are revolutionizing high-throughput DNA sequencing, immunoassays, and clinical diagnostics. As high-throughput bioassays are mapped to digital microfluidic platforms, the need for design automation techniques is being increasingly felt. Moreover, as most applications of biochips are safety-critical in nature, defect tolerance is an essential system attribute. Several synthesis tools have recently been proposed for the automated design of biochips from the specifications of laboratory protocols. However, only a few of these tools address the problem of defect tolerance. In addition, most of these methods do not consider the problem of droplet routing in microfluidic arrays. These methods typically rely on postsynthesis droplet routing to implement biochemical protocols. Such an approach is not only time consuming, but also imposes an undue burden on the chip user. Postsynthesis droplet routing does not guarantee that feasible droplet pathways can be found for area-constrained biochip layouts; nonroutable fabricated biochips must be discarded. We present a synthesis tool that integrates defect tolerance and droplet routing in the design flow. Droplet routability, defined as the ease with which droplet pathways can be determined, is estimated and integrated in the synthesis procedure. Presynthesis and postsynthesis defect-tolerance methods are also presented. We use a large-scale protein assay as a case study to evaluate the proposed synthesis method.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "biochips; microfluidics; module placement; physical design automation", } @Article{Huang:2008:RAF, author = "Tsung-Ching Huang and Kwang-Ting (Tim) Cheng and Huai-Yuan Tseng and Chen-Pang Kung", title = "Reliability analysis for flexible electronics: {Case} study of integrated {a-Si:H} {TFT} scan driver", journal = j-JETC, volume = "4", number = "3", pages = "12:1--12:??", month = aug, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1389089.1389092", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Sep 4 14:23:10 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Flexible electronics fabricated on thin-film, lightweight, and bendable substrates (e.g., plastic) have great potential for novel applications in consumer electronics such as flexible displays, e-paper, and smart labels; however, the key elements, namely thin-film transistors (TFTs), for implementing flexible circuits often suffer from electrical instability. Therefore, thorough reliability analysis is critical for flexible circuit design to ensure that the circuit will operate reliably throughout its lifetime. In this article we propose a methodology for reliability simulation of hydrogenated amorphous silicon (a-Si:H) TFT circuits. We show that: (1) the threshold voltage ({\em V$_{TH}$ \/}) shift of a single TFT can be estimated by analyzing its operating conditions; and (2) the circuit lifetime can be predicted accordingly by using SPICE-like simulators with proper modeling. We also propose an algorithm to reduce the simulation time by orders of magnitude, with good prediction accuracy. To validate our analytical model and simulation methodology, we compare simulation results with the actual circuit measurements of an integrated a-Si:H TFT scan driver fabricated on a glass substrate and we demonstrate very good consistency.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "amorphous hydrogenated silicon (a-Si:H); flexible electronics; reliability; scan driver; thin-film transistor; threshold voltage", } @Article{Li:2008:ADP, author = "Jing Li and Aditya Bansal and Swarop Ghosh and Kaushik Roy", title = "An alternate design paradigm for low-power, low-cost, testable hybrid systems using scaled {LTPS TFTs}", journal = j-JETC, volume = "4", number = "3", pages = "13:1--13:??", month = aug, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1389089.1389093", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Sep 4 14:23:10 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents a holistic hybrid design methodology for low-power, low-cost, testable digital designs using low-temperature polycrystalline-silicon thin-film transistors (LTPS TFTs). An alternate scaling rule under low thermal budget (due to flexible substrate) is developed to improve the performance of TFTs in the presence of process variation. We demonstrate that LTPS TFTs can be further optimized for ultralow-power subthreshold operation with performances comparable to contemporary single-crystal silicon-on-insulator (c-Si SOI) devices after process optimization. The optimized LTPS TFTs with high current drivability and less variability can comprise a promising low-cost option to augment Si CMOS technology, opening up a plethora of new hybrid 3D applications. We illustrate one such application: IC testing. Testing of complex VLSI systems is a prime concern due to design cost of DFT circuits, area/delay overheads, and poor test confidence. To harness the benefits of TFT technology, a novel low-power, process-tolerant, generic, and reconfigurable test structure designed using LTPS TFTs is proposed to reduce the test cost, as well as to improve diagnosability and verifiability, of complex VLSI systems. Due to proper optimization of TFT devices, the proposed test structure consumes low power but operates with reasonable performance. Furthermore, the test circuits do not consume any silicon area because they can be integrated on-chip using 3D technology. Since the test architecture is reconfigurable, this eliminates the need to redesign built-in-self-test (BIST) components that may vary from one processor generation to another. We have developed test structures using 200nm TFT devices and evaluated them on designs implemented in 130nm bulk CMOS. For circuit simulations, we have developed a SPICE-compatible model for TFT devices. The BIST components designed using the test structures operate at 0.8--4.3 GHz (compared to 8.2 GHz in bulk CMOS) with low power consumption. The enhanced scan cells partially implemented in TFT (3D hybrid design) consume \sim 24\% less power and \sim 15--20\% less area of Si die compared to conventional bulk-Si design (2D planar design), with minimal delay overhead.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D integration; BIST; DFT; generic; grain boundary (GB); hybrid system; inherent variation; low-temperature polycrystalline silicon (LTPS); reconfigurable; thin-film transistor (TFT)", } @Article{Rad:2008:SNA, author = "Reza Rad and Mohammad Tehranipoor", title = "{SCT}: a novel approach for testing and configuring nanoscale devices", journal = j-JETC, volume = "4", number = "3", pages = "14:1--14:??", month = aug, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1389089.1389094", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Sep 4 14:23:10 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Novel strategies are necessary to efficiently test and configure emerging reconfigurable nanoscale devices, in addition to providing defect tolerance. This is mainly due to the high defect densities that are expected for these devices. Among different approaches, reconfiguration-based defect avoidance has proven to be a practical solution. However, configuration time, test time, and defect-map size remain among the major challenges for these new devices. In this article, we propose a new approach (called SCT) that simultaneously performs test and configuration. The proposed method uses a built-in self-test (BIST) scheme for test and defect tolerance. The method is based on testing reconfigurable nanoblocks at the time of implementing a function of a desired application on that block. The SCT method considerably reduces the total test and configuration time. It also eliminates the need for storing the location of defects in a defect map on- or off-chip. The presented probabilistic analysis results show the effectiveness of this method in terms of test and configuration time for architectures with rich interconnect resources. Also, a Verilog simulation model is developed for crossbar-based nano-architectures. This model is used to implement several MCNC benchmarks based on the proposed SCT method. The simulation results demonstrate efficiency of the method in terms of test time and yield under different defect rates.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "configuration and testing; crossbar; fault tolerance; nanowire; reconfigurable nanoscale devices", } @Article{Xie:2008:ESI, author = "Yuan Xie and Jason Cong and Paul Franzon", title = "Editorial: {Special} issue on {$3$D} integrated circuits and microarchitectures", journal = j-JETC, volume = "4", number = "4", pages = "15:1--15:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1412587.1412588", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:22:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kgil:2008:PUS, author = "Taeho Kgil and Ali Saidi and Nathan Binkert and Steve Reinhardt and Krisztian Flautner and Trevor Mudge", title = "{PicoServer}: {Using} {$3$D} stacking technology to build energy efficient servers", journal = j-JETC, volume = "4", number = "4", pages = "16:1--16:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1412587.1412589", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:22:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article extends our prior work to show that a straightforward use of 3D stacking technology enables the design of compact energy-efficient servers. Our proposed architecture, called PicoServer, employs 3D technology to bond one die containing several simple, slow processing cores to multiple memory dies sufficient for a primary memory. The multiple memory dies are composed of DRAM. This use of 3D stacks readily facilitates wide low-latency buses between processors and memory. These remove the need for an L2 cache allowing its area to be re-allocated to additional simple cores. The additional cores allow the clock frequency to be lowered without impairing throughput. Lower clock frequency means that thermal constraints, a concern with 3D stacking, are easily satisfied. We extend our original analysis on PicoServer to include: (1) a wider set of server workloads, (2) the impact of multithreading, and (3) the on-chip DRAM architecture and system memory usage. PicoServer is intentionally simple, requiring only the simplest form of 3D technology where die are stacked on top of one another. Our intent is to minimize risk of introducing a new technology (3D) to implement a class of low-cost, low-power compact server architectures.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D stacking technology; chip multiprocessor; full-system simulation; Low power; Tier-1/2/3 server", } @Article{Ma:2008:IEF, author = "Yuchun Ma and Yongxiang Liu and Eren Kursun and Glenn Reinman and Jason Cong", title = "Investigating the effects of fine-grain three-dimensional integration on microarchitecture design", journal = j-JETC, volume = "4", number = "4", pages = "17:1--17:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1412587.1412590", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:22:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article we propose techniques that enable efficient exploration of the 3D design space, where each logical block can span more than one silicon layer. Fine-grain 3D integration provides reduced intrablock wire delay as well as improved power consumption. However, the corresponding power and performance advantage is usually underutilized, since various implementations of multilayer blocks require novel physical design and microarchitecture infrastructure to explore 3D microarchitecture design space. We develop a cubic packing engine which can simultaneously optimize physical and architectural design for efficient vertical integration. This technique selects the individual unit designs from a set of single-layer or multilayer implementations to get the best microarchitectural design in terms of performance, temperature, or both. Our experimental results using a design driver of a high-performance superscalar processor show a 36\% performance improvement over traditional 2D for 2--4 layers and 14\% over 3D with single-layer unit implementations. Since thermal characteristics of 3D integrated circuits are among the main challenges, thermal-aware floorplanning and thermal via insertion techniques are employed to keep the peak temperatures below threshold.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D integration; 3D packing; microarchitecture; thermal", } @Article{Zhan:2008:AMA, author = "Yong Zhan and Sachin S. Sapatnekar", title = "Automated module assignment in stacked-{Vdd} designs for high-efficiency power delivery", journal = j-JETC, volume = "4", number = "4", pages = "18:1--18:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1412587.1412591", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:22:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With aggressive reductions in feature sizes and the integration of multiple functionalities on the same die, bottlenecks due to I/O pin limitations have become a critical issue in today's VLSI designs, especially for 3D IC technologies. To alleviate the pin limitation problem, a stacked-Vdd circuit paradigm has recently been proposed in the literature. However, for a circuit designed using this paradigm, a significant amount of power may be wasted if modules are not carefully assigned to different Vdd domains. In this article, we present a partition-based algorithm for efficiently assigning modules at the floorplanning level, so as to reuse currents between Vdd domains and minimize the power wasted during the operation of the circuit. Experimental results on both 3D and 2D ICs show that compared with assigning modules to different Vdd domains using enumeration and simulated annealing, our algorithm can generate circuits with competitive power and IR noise performance, while being orders of magnitude faster.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ferri:2008:PYM, author = "Cesare Ferri and Sherief Reda and R. Iris Bahar", title = "Parametric yield management for {$3$D} {ICs}: {Models} and strategies for improvement", journal = j-JETC, volume = "4", number = "4", pages = "19:1--19:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1412587.1412592", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:22:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Three-Dimensional (3D) Integrated Circuits (ICs) that integrate die with Through-Silicon Vias (TSVs) promise to continue system and functionality scaling beyond the traditional geometric 2D device scaling. 3D integration also improves the performance of ICs by reducing the communication time between different chip components through the use of short TSV-based vertical wires. This reduction is particularly attractive in processors where it is desirable to reduce the access time between the main logic die and the L2 cache or the main memory die. Process variations in 2D ICs lead to a drop in parametric yield (as measured by speed, leakage and sales profits), which forces manufacturers to speed bin their chips and to sell slow chips at reduced prices. In this paper we develop a model to quantify the impact of process variations on the parametric yield of 3D ICs, and then we propose a number of integration strategies that use a graph-theoretic framework to maximize the performance, parametric yield and profits of 3D ICs. Comparing our proposed strategies to current yield-oblivious methods, it is demonstrated that it is possible to increase the number of 3D ICs in the fastest speed bins by almost $ 2 \times $, while simultaneously reducing the number of slow ICs by 29.4\%. This leads to an improvement in performance by up to 6.45\% and an increase of about 12.48\% in total sales revenue using up-to-date market price models.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D integration; leakage; performance; process variations; yield management", } @Article{Miyakawa:2008:MST, author = "Nobuaki Miyakawa and Eiri Hashimoto and Takanori Maebashi and Natsuo Nakamura and Yutaka Sacho and Shigeto Nakayama and Shinjiro Toyoda", title = "Multilayer stacking technology using wafer-to-wafer stacked method", journal = j-JETC, volume = "4", number = "4", pages = "20:1--20:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1412587.1412593", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:22:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We have developed a new three-dimensional stacking technology using the wafer-to-wafer stacked method. Electrical conductivity between each wafer is almost 100\% and contact resistance is less than 0.7\Omega between a through-silicon via (TSV) and a microbump. We have also created a prototype of a three-layer stacking device using our technology, where each wafer for the stacking is fabricated by using 0.18um CMOS technology based on 8-inch wafers. The device is operated by two times the frequency of the multichip module (MCM) device case using a two-dimensional device with identical functions and minimally different power consumption. The yields obtained from the results comprising all functional tests are over 60\%.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D integration; design; hardware; stacking process", } @Article{Shukla:2009:GEI, author = "Sandeep Shukla", title = "Guest editorial: {IEEE\slash ACM} Symposium on Nanoscale Architectures {(NANOARCH07)}", journal = j-JETC, volume = "5", number = "1", pages = "1:1--1:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1482613.1482614", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:14 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2009:TAR, author = "Shuo Wang and Lei Wang and Faquir Jain", title = "Towards achieving reliable and high-performance nanocomputing via dynamic redundancy allocation", journal = j-JETC, volume = "5", number = "1", pages = "2:1--2:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1482613.1482615", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:14 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nanoelectronic devices are considered to be the computational fabrics for the emerging nanocomputing systems due to their ultra-high speed and integration density. However, the imperfect bottom-up self-assembly fabrication leads to excessive defects that have become a barrier for achieving reliable computing. In addition, transient errors continue to be a problem. The massive parallelism rendered by nanoscale integration opens up new opportunities but also poses challenges on how to manage such massive resources for reliable and high-performance computing. In this paper, we propose a nanoarchitecture solution to address these emerging challenges. By using dynamic redundancy allocation, the massive parallelism is exploited to jointly achieve fault (defect/error) tolerance and high performance. Simulation results demonstrate the effectiveness of the proposed technique under a range of fault rates and operating conditions.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "hardware reliability; Nanoscale architecture; performance; redundancy allocation; redundant design", } @Article{Wang:2009:ENP, author = "Z. F. Wang and Huaixiu Zheng and Q. W. Shi and Jie Chen", title = "Emerging nanodevice paradigm: {Graphene-based} electronics for nanoscale computing", journal = j-JETC, volume = "5", number = "1", pages = "3:1--3:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1482613.1482616", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:14 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The continued miniaturization of silicon-based electronic circuits is fast approaching its physical limitations. It is unlikely that advances in miniaturization, following the so-called Moore's Law, can continue in the foreseeable future. Nanoelectronics has to go beyond silicon technology. New device paradigms based on nanoscale materials, such as molecular electronic devices, spin devices and carbon-based devices, will emerge. In this article, we introduce a nanodevice paradigm: graphene nanoelectronics. Due to its unique quantum effects and electronic properties, researchers predict that graphene-based devices may replace carbon nanotube devices and become major building blocks for future nanoscale computing. To manifest its unique electronic properties, we present some of our recent designs, namely a graphene-based switch, a negative differential resistance (NDR) device and a random access memory array (RAM). Since these basic devices are the building blocks for large-scale circuits, our findings can help researchers construct useful computing systems and study graphene-based circuit performance in the future.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Graphene device; memory structure; negative differential resistance; tight-binding model", } @Article{Taskin:2009:SRB, author = "Baris Taskin and Andy Chiu and Jonathan Salkind and Daniel Venutolo", title = "A shift-register-based {QCA} memory architecture", journal = j-JETC, volume = "5", number = "1", pages = "4:1--4:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1482613.1482617", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:14 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A quantum-dot cellular automata (QCA) design of an $ n \times m$-bit, shift-register-based memory architecture is presented. The architecture maintains data at a stable conformation, which is contrary to traditional data in-motion concept for QCA architectures. The memory architecture is based on an existing dual-phase-synchronized, line-based, one-bit QCA memory cell building block that provides size and latency improvements over other known one-bit memory cells through its novel clocking scheme. Read/write latencies up to \sim 2X lower than the existing tile-based architecture with three-phase, line-based memory cells are obtained. Simulations with QCADesigner and HDLQ are performed on a sample $ 4 \times 8$ bit memory architecture implementation.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "clocking; memory design; Quantum-dot cellular automata", } @Article{Huo:2009:SBN, author = "Dennis Huo and Qiaoyan Yu and David Wolpert and Paul Ampadu", title = "A simulator for ballistic nanostructures in a {$2$-D} electron gas", journal = j-JETC, volume = "5", number = "1", pages = "5:1--5:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1482613.1482618", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:14 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A multipurpose simulator for ballistic nanostructures, based on classical mechanics of electrons at the Fermi level, has been successfully implemented. Despite the simplicity of the model, the simulator successfully reproduces a number of experimental results, and is shown to consistently match observed current-voltage characteristics and magnetoresistance phenomena. The simulator results provide design guidelines for devices which operate on ballistic transport principles. Using the simulator, preliminary logic structures have been designed based on the ballistic deflection transistor.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "2DEG; Ballistic transport; nanoelectronic device; transistor", } @Article{Bahar:2009:ISS, author = "R. Iris Bahar", title = "Introduction to special section: {Best} of {NANOARCH 2008}", journal = j-JETC, volume = "5", number = "2", pages = "6:1--6:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1543438.1543439", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:24 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mishra:2009:LPF, author = "Prateek Mishra and Anish Muttreja and Niraj K. Jha", title = "Low-power {FinFET} circuit synthesis using multiple supply and threshold voltages", journal = j-JETC, volume = "5", number = "2", pages = "7:1--7:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1543438.1543440", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:24 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "According to Moore's law, the number of transistors in a chip doubles every 18 months. The increased transistor-count leads to increased power density. Thus, in modern circuits, power efficiency is a central determinant of circuit efficiency. With scaling, leakage power accounts for an increasingly larger portion of the total power consumption in deep submicron technologies (>40\%).\par FinFET technology has been proposed as a promising alternative to deep submicron bulk CMOS technology, because of its better scalability, short-channel characteristics, and ability to suppress leakage current and mitigate device-to-device variability when compared to bulk CMOS. The subthreshold slope of a FinFET is approximately 60mV which is close to ideal.\par In this article, we propose a methodology for low-power FinFET based circuit synthesis. A mechanism called TCMS (Threshold Control through Multiple Supply Voltages) was previously proposed for improving the power efficiency of FinFET based global interconnects. We propose a significant generalization of TCMS to the design of any logic circuit. This scheme represents a significant divergence from the conventional multiple supply voltage schemes considered in the past. It also obviates the need for voltage level-converters. We employ accurate delay and power estimates using table look-up methods based on HSPICE simulations for supply voltage and threshold voltage optimization. Experimental results demonstrate that TCMS can provide power savings of 67.6\% and device area savings of 65.2\% under relaxed delay constraints. Two other variants of TCMS are also proposed that yield similar benefits. We compare our scheme to extended cluster voltage scaling (ECVS), a popular dual- {\em V$_{dd}$ \/} scheme presented in the literature. ECVS makes use of voltage level-converters. Even when it is assumed that these level-converters have zero delay, thus significantly favoring ECVS in time-constrained power optimization, TCMS still outperforms ECVS.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "linear programming; Low-power; synthesis; TCMS", } @Article{Crocker:2009:DFQ, author = "Michael Crocker and X. Sharon Hu and Michael Niemier", title = "Defects and faults in {QCA}-based {PLAs}", journal = j-JETC, volume = "5", number = "2", pages = "8:1--8:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1543438.1543441", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:24 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Defect tolerance will be critical in any system with nanoscale feature sizes. This article examines some fundamental aspects of defect tolerance for a reconfigurable system based on Quantum-dot Cellular Automata (QCA). We analyze a novel, QCA-based, Programmable Logic Array (PLA) structure, develop an implementation independent fault model, and discuss how expected defects and faults might affect yield. Within this context, we introduce techniques for mapping Boolean logic functions to a defective QCA-based PLA. Simulation results show that our new mapping techniques can achieve higher yields than existing techniques.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "defects; faults; logic mapping; Nanotechnology; quantum-dot cellular automata", } @Article{Wu:2009:SCD, author = "Xiaoxia Wu and Paul Falkenstern and Krishnendu Chakrabarty and Yuan Xie", title = "Scan-chain design and optimization for three-dimensional integrated circuits", journal = j-JETC, volume = "5", number = "2", pages = "9:1--9:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1543438.1543442", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:24 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Scan chains are widely used to improve the testability of integrated circuit (IC) designs and to facilitate fault diagnosis. For traditional 2D IC design, a number of design techniques have been proposed in the literature for scan-chain routing and scan-cell partitioning. However, these techniques are not effective for three-dimensional (3D) technologies, which have recently emerged as a promising means to continue technology scaling. In this article, we propose two techniques for designing scan chains in 3D ICs, with given constraints on the number of through-silicon-vias (TSVs). The first technique is based on a genetic algorithm (GA), and it addresses the ordering of cells in a single scan chain. The second optimization technique is based on integer linear programming (ILP); it addresses single-scan-chain ordering as well as the partitioning of scan flip-flops into multiple scan chains. We compare these two methods by conducting experiments on a set of ISCAS'89 benchmark circuits. The first conclusion obtained from the results is that 3D scan-chain optimization achieves significant wire-length reduction compared to 2D counterparts. The second conclusion is that the ILP-based technique provides lower bounds on the scan-chain interconnect length for 3D ICs, and it offers considerable reduction in wire-length compared to the GA-based heuristic method.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D ICs; genetic algorithm; integer linear programming; LP relaxation; randomized rounding; scan-chain design", } @Article{Datta:2009:EPT, author = "Siddhartha Datta and Bharat Joshi and Arun Ravindran and Arindam Mukherjee", title = "Efficient parallel testing and diagnosis of digital microfluidic biochips", journal = j-JETC, volume = "5", number = "2", pages = "10:1--10:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1543438.1543443", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:24 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidics-based biochips consist of microfluidic arrays on rigid substrates through which movement of fluids is tightly controlled to facilitate biological reactions. Biochips are soon expected to revolutionize biosensing, clinical diagnostics, environmental monitoring, and drug discovery. Critical to the deployment of the biochips in such diverse areas is the dependability of these systems. Thus robust testing and diagnosis techniques are required to ensure adequate level of system dependability. Due to the underlying mixed technology and mixed energy domains, such biochips exhibit unique failure mechanisms and defects. In this article efficient parallel testing and diagnosis algorithms are presented that can detect and locate single as well as multiple faults in a microfluidic array without flooding the array, a problem that has hampered realistic implementation of several existing strategies. The fault diagnosis algorithms are well suited for built-in self-test that could drastically reduce the operating cost of microfluidic biochip. Also, the proposed alogirthms can be used both for testing and fault diagnosis during field operation as well as increasing yield during the manufacturing phase of the biochip. Furthermore, these algorithms can be applied to both online and offline testing and diagnosis. Analytical results suggest that these strategies that can be used to design highly dependable biochip systems.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "defect tolerance; droplet flooding; fault tolerance; Microfluidic biochip; microfluidics; multiple faults; reconfigurability; testing", } @Article{Tahoori:2009:LOD, author = "Mehdi B. Tahoori", title = "Low-overhead defect tolerance in crossbar nanoarchitectures", journal = j-JETC, volume = "5", number = "2", pages = "11:1--11:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1543438.1543444", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:24 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "It is anticipated that the number of defects in nanoscale devices fabricated using bottom-up self-assembly process is significantly higher than that for CMOS devices fabricated by conventional top-down lithography patterning. This is mainly because of inherent lack of control in self-assembly fabrication as well as atomic scale of devices. The goal of defect tolerance, as an integral part of nano computing, is to obtain error-free computation from such fabrics containing defective elements.\par In this article, an application-independent defect tolerant scheme for reconfigurable crossbar array nanoarchitectures is presented. The main feature of this approach is that the existence and location of defective resources within the nano-fabric are hidden from the entire design flow, resulting in minimum post-fabrication customization per chip and minimum changes to the entire design and synthesis flow. It is also shown how to drastically minimize the area overhead associated with this flow. The proposed technique requires extraction of regular yet incomplete defect-free subsets, in contrast to previously proposed complete defect-free subsets. This can greatly reduce the area overhead required for defect tolerance while not sacrificing logic mapping or signal routing capabilities. Extensive simulation results confirm considerable reduction in the area overhead without any negative impact on the usability of modified defect-free subsets.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Defect tolerance; nanotechnology; reconfigurable architectures", } @Article{Chakraborty:2009:SAD, author = "Rajat Subhra Chakraborty and Swarup Bhunia", title = "A study of asynchronous design methodology for robust {CMOS}-nano hybrid system design", journal = j-JETC, volume = "5", number = "3", pages = "12:1--12:??", month = aug, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1568485.1568486", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:41 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Among the emerging alternatives to CMOS, molecular electronics based diode-resistor crossbar fabric has generated considerable interest in recent times. Logic circuit design with future nano-scale molecular devices using dense and regular crossbar fabrics is promising in terms of integration density, performance and power dissipation. However, circuit design using molecular switches involve some major challenges: (1) lack of voltage gain of these switches that prevents logic cascading; (2) large output voltage level degradation; (3) vulnerability to parameter variations that affect yield and robustness of operation; and (4) high defect rate. In this article, we analyze some of the above challenges and investigate the effectiveness of asynchronous design methodology in a hybrid system design platform using molecular crossbar and CMOS interfacing elements. We explore different approaches of asynchronous circuit design and compare their suitability in terms of several circuit design parameters. We then develop the methodology and an automated synthesis flow to support two different asynchronous design approaches ({\em Micropipelines\/} and {\em Four phase Dual-rail\/}) for system designs using nano-crossbar logic stages and CMOS interface data-storage elements. Circuit-level simulation results for several benchmarks show considerable advantage in terms of performance and robustness at moderate area and power overhead compared to two different synchronous implementations.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Asynchronous design; CMOS-nano co-design; dual-rail circuits; logic degradation; micropipelines; nano-scale crossbar; robust design", } @Article{Zhang:2009:HNCa, author = "Wei Zhang and Niraj K. Jha and Li Shang", title = "A hybrid {Nano\slash CMOS} dynamically reconfigurable system --- {Part II}: {Design} optimization flow", journal = j-JETC, volume = "5", number = "3", pages = "13:1--13:??", month = aug, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1568485.1568487", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:41 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In Part I of this work, a hybrid nano/CMOS reconfigurable architecture, called NATURE, was described. It is composed of CMOS reconfigurable logic and interconnect fabric, and nonvolatile nano on-chip memory. Through its support for cycle-by-cycle runtime reconfiguration and a highly-efficient computation model, temporal logic folding, NATURE improves logic density and area-delay product by more than an order of magnitude compared to existing CMOS-based field-programmable gate arrays (FPGAs). NATURE can be fabricated using mainstream photo-lithography fabrication techniques. Thus, it offers a currently commercially feasible architecture with high performance, superior logic density, and excellent runtime design flexibility.\par In Part II of this work, we present an integrated design and optimization flow for NATURE, called NanoMap. Given an input design specified in register-transfer level (RTL) and/or gate-level VHDL, NanoMap optimizes and implements the design on NATURE through logic mapping, temporal clustering, temporal placement, and routing. As opposed to other design tools for traditional FPGAs, NanoMap supports and leverages temporal logic folding by integrating novel mapping techniques. It can automatically explore and identify the best temporal logic folding configuration, targeting area, delay or area-delay product optimization. A force-directed scheduling technique is used to optimize and balance resource usage across different folding cycles. By supporting logic folding, NanoMap can provide significant design flexibility in performing area-delay trade-offs under various user-specified constraints. We present details of the mapping procedure and results for different architectural instances. Experimental results demonstrate that NanoMap can judiciously trade off area and delay targeting different optimization goals, and effectively exploit the advantages of NATURE.\par Part I of this work will appear in JETC Vol. 5, No. 4.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "design optimization flow; Dynamic reconfiguration; logic folding; NATURE", } @Article{Simsir:2009:HNC, author = "Muzaffer O. Simsir and Srihari Cadambi and Franjo Ivanv{\v{c}}i{\'c} and Martin Roetteler and Niraj K. Jha", title = "A hybrid nano-{CMOS} architecture for defect and fault tolerance", journal = j-JETC, volume = "5", number = "3", pages = "14:1--14:??", month = aug, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1568485.1568488", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:41 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As the end of the semiconductor roadmap for CMOS approaches, architectures based on nanoscale molecular devices are attracting attention. Among several alternatives, silicon nanowires and carbon nanotubes are the two most promising nanotechnologies according to the ITRS. These technologies may enable scaling deep into the nanometer regime. However, they suffer from very defect-prone manufacturing processes. Although the reconfigurability property of the nanoscale devices can be used to tolerate high defect rates, it may not be possible to locate all defects. With very high device densities, testing each component may not be possible because of time or technology restrictions. This points to a scenario in which even though the devices are tested, the tests are not very comprehensive at locating defects, and hence the shipped chips are still defective. Moreover, the devices in the nanometer range will be susceptible to transient faults which can produce arbitrary soft errors. Despite these drawbacks, it is possible to make nanoscale architectures practical and realistic by introducing defect and fault tolerance. In this article, we propose and evaluate a hybrid nanowire-CMOS architecture that addresses all three problems --- namely high defect rates, unlocated defects, and transient faults --- at the same time. This goal is achieved by using multiple levels of redundancy and majority voters. A key aspect of the architecture is that it contains a judicious balance of both nanoscale and traditional CMOS components. A companion to the architecture is a compiler with heuristics to quickly determine if logic can be mapped onto partially defective nanoscale elements. The heuristics make it possible to introduce defect-awareness in placement and routing. The architecture and compiler are evaluated by applying the complete design flow to several benchmarks.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Defect tolerance; nanotechnology; nanowires", } @Article{Wang:2009:UQD, author = "Shuo Wang and Jianwei Dai and El-Sayed Hasaneen and Lei Wang and Faquir Jain", title = "Utilizing quantum dot transistors with programmable threshold voltages for low-power mobile computing", journal = j-JETC, volume = "5", number = "3", pages = "15:1--15:??", month = aug, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1568485.1568489", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:41 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Power consumption poses one of the fundamental barriers for deploying mobile computing devices in energy-constrained situations with varying operation conditions. In particular, leakage power is projected to increase exponentially in future semiconductor process nodes. This challenging problem is pressing for renewed focus on power-performance optimization at all levels of design abstract, from novel device structures to fundamental shifts in design paradigm. In this article, we propose to exploit the programmable threshold voltage quantum dot (QD) transistors to reduce leakage thereby improving the energy efficiency for mobile computing. The unique programmability and reconfigurability enabled by QD transistors extend our capability in design optimization for new power-performance trade-offs. Simulation results demonstrate the significant leakage reduction over conventional techniques.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Low power; threshold voltage and quantum dot transistor", } @Article{Zhang:2009:HNCb, author = "Wei Zhang and Niraj K. Jha and Li Shang", title = "A hybrid {nano\slash CMOS} dynamically reconfigurable system --- {Part I}: {Architecture}", journal = j-JETC, volume = "5", number = "4", pages = "16:1--16:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629091.1629092", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Rapid progress on nanodevices points to a promising direction for future circuit design. However, since nanofabrication techniques are not yet mature, implementation of nanocircuits, at least on a large scale, in the near future is infeasible. To ease fabrication and overcome the problem of high defect levels in nanotechnology, hybrid nano/CMOS reconfigurable architectures are attractive choices. Moreover, if the current photolithography fabrication process can be used to manufacture the hybrid chips, the benefits of nanotechnologies can be realized today.\par Traditional reconfigurable architectures can only support partial or coarse-grain runtime reconfiguration due to their limited on-chip storage and long off-chip reconfiguration latency. Recent progress on nano Random Access Memories (RAMs), such as carbon nanotube-based RAM (NRAM), Phase-Change Memory (PCM), magnetoresistive RAM (MRAM), etc., provides us with a chance to realize on-chip fine-grain runtime reconfiguration. These nano RAMs have good compatibility with the current fabrication process. By utilizing them in the hybrid design, we can take advantage of both CMOS and nanotechnology, and greatly improve the logic density, resource utilization, and performance of our design.\par In this article, we propose a high-performance reconfigurable architecture, called NATURE, that utilizes CMOS logic and nano RAMs. An automatic design flow for NATURE is presented in Part II of the article. In NATURE, the highly dense nonvolatile nano RAMs are distributed throughout the chip to allow large embedded on-chip configuration storage, which enables fast reading and hence supports fine-grain runtime reconfiguration and temporal logic folding of a circuit before being mapped to the architecture. Temporal logic folding can significantly increase the logic density of NATURE (by over an order of magnitude for large circuits) while remaining competitive in performance and power consumption. For ease of exposition, we use NRAMs to illustrate various concepts in this article due to the excellent properties of NRAMs. However, other nano RAMs can also be used instead. Experimental results based on NRAMs establish the efficacy of NATURE.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "logic folding; NRAM; runtime reconfiguration", } @Article{Zhang:2009:DSE, author = "Wei Zhang and Niraj K. Jha and Li Shang", title = "Design space exploration and data memory architecture design for a hybrid {nano\slash CMOS} dynamically reconfigurable architecture", journal = j-JETC, volume = "5", number = "4", pages = "17:1--17:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629091.1629093", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In recent years, research on nanotechnology has advanced rapidly. Novel nanodevices have been developed, such as those based on carbon nanotubes, nanowires, etc. Using these emerging nanodevices, diverse nanoarchitectures have been proposed. Among them, hybrid nano/CMOS reconfigurable architectures have attracted attention because of their advantages in performance, integration density, and fault tolerance. Recently, a high-performance hybrid nano/CMOS reconfigurable architecture, called NATURE, was presented. NATURE comprises CMOS reconfigurable logic and interconnect fabric, and CMOS-fabrication-compatible nanomemory. High-density, fast nano RAMs are distributed in NATURE as on-chip storage to store multiple reconfiguration copies for each reconfigurable element. It enables cycle-by-cycle runtime reconfiguration and a highly efficient computational model, called temporal logic folding. Through logic folding, NATURE provides more than an order of magnitude improvement in logic density and area-delay product, and significant design flexibility in performing area-delay trade-offs, at the same technology node. Moreover, NATURE can be fabricated using mainstream photolithography fabrication techniques. Hence, it offers a currently commercially viable reconfigurable architecture with high performance, superior logic density, and outstanding design flexibility, which is very attractive for deployment in cost-conscious embedded systems.\par In order to fully explore the potential of NATURE and further improve its performance, in this article, a thorough design space exploration is conducted to optimize its architecture. Investigations in terms of different logic element architectures, interconnect designs, and various technologies for nano RAMs are presented. Nano RAMs can not only be used as storage for configuration bits, but the high density of nano RAMs also makes them excellent candidates for large-capacity on-chip data storage in NATURE. Many logic- and memory-intensive applications, such as video and image processing, require large storage of temporal results. To enhance the capability of NATURE for implementing such applications, we investigate the design of nano data memory structures in NATURE and explore the impact of memory density. Experimental results demonstrate significant throughput improvements due to area saving from logic folding and parallel data processing.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "logic folding; Nano data RAM; runtime reconfiguration", } @Article{Tang:2009:DET, author = "Weiguo Tang and Lei Wang and Fabrizio Lombardi", title = "A defect\slash error-tolerant nanosystem architecture for {DSP}", journal = j-JETC, volume = "5", number = "4", pages = "18:1--18:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629091.1629094", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Emerging technologies such as silicon NanoWires (NW) and Carbon NanoTubes (CNT) have shown great potential for building the next generation of computing systems in the nano ranges. However, the excessive number of defects originating from bottom-up fabrication (such as a self-assembly process) poses a pressing challenge for achieving scalable system integration. This article proposes a new nanosystem architecture that employs nanowire crossbars for Digital Signal Processing (DSP) applications. Distributed arithmetic is utilized such that complex signal processing computation can be mapped into regular memory operations, thus making this architecture well suited for implementation by nanowire crossbars. Furthermore, the inherent features of DSP-type computation provide new insights to remedy errors (as logic/computational manifestation of defects). A new defect/error-tolerant technique that exploits algorithmic error compensation is proposed; at system level different trade-offs between correctness in output and performance are established while retaining low overhead in its implementation. As an instance of its application, the proposed approach has been utilized to a generic DSP nanosystem performing frequency-selective filtering. Simulation results show that the proposed nanoDSP introduces only a minor performance degradation under high defect rates and at a range of operational conditions. The proposed technique also features good scalability and viability for various DSP applications.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "algorithmic error compensation; Distributed arithmetic; DSP nanosystem; inner product", } @Article{Dysart:2009:OWR, author = "Timothy J. Dysart and Peter M. Kogge", title = "Organizing wires for reliability in magnetic {QCA}", journal = j-JETC, volume = "5", number = "4", pages = "19:1--19:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1629091.1629095", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:23:55 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article investigates, via analytic modeling, how a magnetic QCA wire should be organized to provide the highest reliability. We compare a nonredundant wire and two redundant wire organizations. For all three organizations, a fault rate per unit length is used for comparison; additionally, since extra components are necessary to implement the redundant organizations, these components are faulty as well. We show that the difference between these two fault rates is the main driver for selecting a wire organization. Lastly, we develop a guideline for selecting the most reliable wire organization during the circuit design process.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "modular redundancy; nanomagnet logic; QCA", } @Article{Chakrabarty:2010:E, author = "Krishnendu Chakrabarty", title = "Editorial", journal = j-JETC, volume = "6", number = "1", pages = "1:1--1:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721650.1721651", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:24:05 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lee:2010:FBP, author = "Chun-Yi Lee and Niraj K. Jha", title = "{FinFET}-based power simulator for interconnection networks", journal = j-JETC, volume = "6", number = "1", pages = "2:1--2:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721650.1721652", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:24:05 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Double-gate FETs, specifically FinFETs, are emerging as promising substitutes for bulk CMOS at the 32nm technology node and beyond because of the various obstacles to scaling faced by CMOS, such as short-channel effects, leakage power, and process variations. Another trend in chip multiprocessor design is incorporation of sophisticated on-chip interconnection networks. However, such networks are significant power-consumers. In this article, we address these two trends by presenting a power simulator for FinFET-based on-chip interconnection networks. It estimates both dynamic and leakage power. We present results for various FinFET design styles and temperatures (since leakage power changes drastically with temperature), and show that one FinFET design style may be much superior to another from the power consumption point of view.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "FinFETs; interconnection network; power consumption; power simulator", } @Article{Liu:2010:RSO, author = "Yang Liu and Chris Dwyer and Alvin R. Lebeck", title = "Routing in self-organizing nano-scale irregular networks", journal = j-JETC, volume = "6", number = "1", pages = "3:1--3:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721650.1721653", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 17 14:24:05 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The integration of novel nanotechnologies onto silicon platforms is likely to increase fabrication defects compared with traditional CMOS technologies. Furthermore, the number of nodes connected with these networks makes acquiring a global defect map impractical. As a result, on-chip networks will provide defect tolerance by self-organizing into irregular topologies. In this scenario, simple static routing algorithms based on regular physical topologies, such as meshes, will be inadequate. Additionally, previous routing approaches for irregular networks assume abundant resources and do not apply to this domain of resource-constrained self-organizing nano-scale networks. Consequently, routing algorithms that work in irregular networks with limited resources are needed.\par In this article, we explore routing for self-organizing nano-scale irregular networks in the context of a Self-Organizing SIMD Architecture (SOSA). Our approach trades configuration time and a small amount of storage for reduced communication latency. We augment an Euler path-based routing technique for trees to generate static shortest paths between certain pairs of nodes while remaining deadlock free. Simulations of several applications executing on SOSA show our proposed routing algorithm can reduce execution time by 8\% to 30\%.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "data parallel; DNA; nanocomputing; Self-organizing; SIMD", } @Article{Kocak:2010:IDT, author = "Taskin Kocak and Dhiraj Pradhan", title = "Introduction to design techniques for energy harvesting", journal = j-JETC, volume = "6", number = "2", pages = "4:1--4:??", month = jun, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1773814.1773815", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wenck:2010:SST, author = "Justin Wenck and Jamie Collier and Jeff Siebert and Rajeevan Amirtharajah", title = "Scaling self-timed systems powered by mechanical vibration energy harvesting", journal = j-JETC, volume = "6", number = "2", pages = "5:1--5:??", month = jun, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1773814.1773816", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Passive energy harvesting from mechanical vibration has wide application in wearable devices and wireless sensors to complement or replace batteries. Energy harvesting efficiency can be increased by eliminating AC/DC conversion. A test chip demonstrating self-timing, power-on reset circuitry, and dynamic memory for energy harvesting AC voltages has been designed in 180 nm CMOS and tested. An energy scalable DSP architecture implements FIR filters that consume as little as 170 pJ per output sample. The on-chip DRAM retains data for up to 28 ms while register data is retained down to a supply voltage of 153 mV. Circuit operation is confirmed for supply frequencies between 60 Hz and 1 kHz with power consumption below 130$ \mu $W. Reaching the limits of miniaturization will require approaching the limits of power dissipation. We extrapolate from this DSP architecture to find the minimum volume required for mechanical vibration energy harvesting sensors.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "AC power supply; DRAM; energy harvesting; energy-aware systems; integrated circuits; low-power design; power-on reset; scaling; self-timed", } @Article{Wang:2010:DCS, author = "W. S. Wang and T. O'Donnell and N. Wang and M. Hayes and B. O'Flynn and C. O'Mathuna", title = "Design considerations of sub-{mW} indoor light energy harvesting for wireless sensor systems", journal = j-JETC, volume = "6", number = "2", pages = "6:1--6:??", month = jun, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1773814.1773817", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "For most wireless sensor networks, one common and major bottleneck is the limited battery lifetime. The frequent maintenance efforts associated with battery replacement significantly increase the system operational and logistics cost. Unnoticed power failures on nodes will degrade the system reliability and may lead to system failure. In building management applications, to solve this problem, small energy sources such as indoor light energy are promising to provide long-term power to these distributed wireless sensor nodes. This article provides comprehensive design considerations for an indoor light energy harvesting system for building management applications. Photovoltaic cells characteristics, energy storage units, power management circuit design, and power consumption pattern of the target mote are presented. Maximum power point tracking circuits are proposed which significantly increase the power obtained from the solar cells. The novel fast charge circuit reduces the charging time. A prototype was then successfully built and tested in various indoor light conditions to discover the practical issues of the design. The evaluation results show that the proposed prototype increases the power harvested from the PV cells by 30\% and also accelerates the charging rate by 34\% in a typical indoor lighting condition. By entirely eliminating the rechargeable battery as energy storage, the proposed system would expect an operational lifetime 10--20 years instead of the current less than 6 months battery lifetime.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "Design consideration; energy harvesting; indoor light illuminance; maximum power point tracking; PV cells wireless sensor node; supercapacitor", } @Article{Moser:2010:EMF, author = "Clemens Moser and Jian-Jia Chen and Lothar Thiele", title = "An energy management framework for energy harvesting embedded systems", journal = j-JETC, volume = "6", number = "2", pages = "7:1--7:??", month = jun, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1773814.1773818", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Energy harvesting (also known as energy scavenging) is the process of generating electrical energy from environmental energy sources. There exists a variety of different energy sources such as solar energy, kinetic energy, or thermal energy. In recent years, this term has been frequently applied in the context of small autonomous devices such as wireless sensor nodes. In this article, a framework for energy management in energy harvesting embedded systems is presented. As a possible scenario, we focus on wireless sensor nodes that are powered by solar cells. We demonstrate that classical power management solutions have to be reconceived and/or new problems arise if perpetual operation of the system is required. In particular, we provide a set of algorithms and methods for various application scenarios, including real-time scheduling, application rate control, as well as reward maximization. The goal is to optimize the performance of the application subject to given energy constraints. Our methods optimize the system performance which, for example, allows the usage of smaller solar cells and smaller batteries. Furthermore, we show how to dimension important system parameters like the minimum battery capacity or a sufficient prediction horizon. Our theoretical results are supported by simulations using long-term measurements of solar energy in an outdoor environment. In contrast to previous works, we present a formal framework which is able to capture the performance, the parameters, and the energy model of various energy harvesting systems. We combine different viewpoints, include corresponding simulation results, and provide a thorough discussion of implementation aspects.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "embedded systems; energy harvesting; model predictive control; Power management; real-time scheduling; reward maximization", } @Article{Mohanty:2010:UDS, author = "Saraju P. Mohanty and Dhiraj K. Pradhan", title = "{ULS}: a dual-{$ V_{th} $} \slash high-$ \kappa $ nano-{CMOS} universal level shifter for system-level power management", journal = j-JETC, volume = "6", number = "2", pages = "8:1--8:??", month = jun, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1773814.1773819", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Power dissipation is a major bottleneck for emerging applications, such as implantable systems, digital cameras, and multimedia processors. Each of these applications is essentially designed as an Analog/Mixed-Signal System-on-a-Chip (AMS-SoC). These AMS-SoCs are typically operated from a single power-supply source which is a battery providing a constant supply voltage. In order to reduce power dissipation of the AMS-SoCs, multiple-supply voltage and/or variable-supply voltage is used as an attractive low-power design approach. In the multiple-/variable-supply voltage AMS-SoCs the use of a DC-to-DC voltage-level shifter is critical. The voltage-level shifter is an overhead when its own power dissipation is high. In this article a new DC-to-DC voltage-level shifter is introduced that performs level-up shifting, level-down shifting, and blocking of voltages and is called Universal Level Shifter (ULS). The ULS is a unique component that reduces dynamic power and leakage of the AMS-SoCs while facilitating their reconfigurability. The system-level architectures for three AMS-SoCs, such as Drug Delivery Nano-Electro-Mechanical-System (DDNEMS), Secure Digital Camera (SDC), and Net-centric Multimedia Processor (NMP) are introduced to demonstrate the use the ULS for system-level power management. The article presents a design flow and an algorithm for optimal design of the ULS using a dual- $ V_{th} $ high-$ \kappa $ technique for efficient realization of ULS. A prototype ULS is presented for 32nm nano-CMOS technology node. The robustness of the ULS design is examined by performing three types of analysis, such as parametric, load, and power. It is observed that the ULS produces a stable output for voltages as low as 0.35 V and loads varying from 50 {\em fF\/} to 120 {\em fF}. The average power dissipation of the ULS with a 82 {\em fF\/} capacitive load is 5 $ \mu ${\em W}.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "/metal-gate nano-CMOS; Analog/Mixed-Signal System-on-a-Chip (AMS-SoC); DC-to-DC voltage-level shifter; dual-threshold voltage; high-\kappa low-power design; nanoscale CMOS; Power management; system-level power management", } @Article{Dai:2010:ITA, author = "Jianwei Dai and Lei Wang and Fabrizio Lombardi", title = "An information-theoretic analysis of quantum-dot cellular automata for defect tolerance", journal = j-JETC, volume = "6", number = "3", pages = "9:1--9:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1777401.1777402", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:31 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum-dot cellular automata (QCA) has been advocated as a promising emerging nanotechnology for designing future nanocomputing systems. However, at device level, the large number of expected defects represents a significant hurdle for reliable computation in QCA-based systems. In this paper, we present an information-theoretic approach to investigate the relationship between defect tolerance and redundancy in QCA devices. By modeling defect-prone QCA devices as unreliable information processing media, we determine the information transfer capacity, as bound on the reliability that QCA devices can achieve. The proposed method allows to evaluate the effectiveness of redundancy-based defect tolerance in an effective and quantitative manner.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "defect tolerance; information theoretic analysis; QCA; reliability", } @Article{Zhang:2010:LPN, author = "Wei Zhang and Niraj K. Jha and Li Shang", title = "Low-power {$3$D} nano\slash {CMOS} hybrid dynamically reconfigurable architecture", journal = j-JETC, volume = "6", number = "3", pages = "10:1--10:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1777401.1777403", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:31 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In order to continue technology scaling beyond CMOS, diverse nanoarchitectures have been proposed in recent years based on emerging nanodevices, such as nanotubes, nanowires, etc. Among them, some hybrid nano/CMOS reconfigurable architectures enjoy the advantage that they can be fabricated using photolithography. NATURE is one such architecture that we have proposed recently. It comprises CMOS reconfigurable logic and CMOS fabrication-compatible nano RAMs. It uses distributed high-density and fast nano RAMs as on-chip storage for storing multiple reconfiguration copies, enabling fine-grain cycle-by-cycle reconfiguration. It supports a highly efficient computational model, called temporal logic folding, which makes possible more than an order of magnitude improvement in logic density and area-delay product, significant power reduction, and significant design flexibility in performing area-delay trade-offs.\par In this article, we extend NATURE in various dimensions, evaluating various FPGA approaches in the context of today's emerging technologies. First, we explore the introduction of embedded coarse-grain modules in the fine-grain NATURE architecture and present a unified dynamically reconfigurable architecture, which can significantly enhance NATURE's computation power for data-dominated applications. Second, we explore a 3D architecture for NATURE in which the nano RAM for reconfiguration storage is on one layer and the rest of the CMOS logic on another layer. This leads to further improvements in logic density and performance. Finally, we explore the possibility of using FinFETs, an emerging double-gate CMOS technology, to implement NATURE. Since power consumption is an important consideration in the deep nanometer regime, especially for FPGAs, we present a back-gate biasing methodology for flexible threshold voltage adjustment in FinFETs to significantly reduce NATURE's power consumption. Simulation results demonstrate the efficacy of the proposed methods.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "3D design; Coarse-grain; FinFET; runtime reconfiguration", } @Article{Zhao:2010:ICP, author = "Yang Zhao and Tao Xu and Krishnendu Chakrabarty", title = "Integrated control-path design and error recovery in the synthesis of digital microfluidic lab-on-chip", journal = j-JETC, volume = "6", number = "3", pages = "11:1--11:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1777401.1777404", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 7 08:33:31 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent advances in digital microfluidics have led to tremendous interest in miniaturized lab-on-chip devices for biochemical analysis. Synthesis tools have also emerged for the automated design of lab-on-chip from the specifications of laboratory protocols. However, none of these tools consider control flow or address the problem of recovering from fluidic errors that can occur during on-chip bioassay execution. We present a synthesis method that incorporates control paths and an error-recovery mechanism in the design of a digital microfluidic lab-on-chip. Based on error-propagation estimates, we determine the best locations for fluidic checkpoints during biochip synthesis. A microcontroller coordinates the implementation of the control-flow-based bioassay by intercepting the synthesis results that are mapped to the software programs. Real-life bioassay applications are used as case studies to evaluate the proposed design method. For a representative protein assay, compared to a baseline chip design, the biochip with a control path can reduce the completion time by 30\% when errors occur during the implementation of the bioassay.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", keywords = "biochips; Error recovery; microfluidics; synthesis", } @Article{Bhoj:2010:GDF, author = "Ajay N. Bhoj and Niraj K. Jha", title = "Gated-diode {FinFET DRAMs}: Device and circuit design-considerations", journal = j-JETC, volume = "6", number = "4", pages = "12:1--12:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1877745.1877746", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:02 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Saeedi:2010:RCS, author = "Mehdi Saeedi and Morteza Saheb Zamani and Mehdi Sedighi and Zahra Sasanian", title = "Reversible circuit synthesis using a cycle-based approach", journal = j-JETC, volume = "6", number = "4", pages = "13:1--13:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1877745.1877747", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:02 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Thapliyal:2010:DRS, author = "Himanshu Thapliyal and Nagarajan Ranganathan", title = "Design of reversible sequential circuits optimizing quantum cost, delay, and garbage outputs", journal = j-JETC, volume = "6", number = "4", pages = "14:1--14:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1877745.1877748", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:02 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Singh:2010:CPD, author = "Montek Singh and Steven M. Nowick", title = "Call for Papers: Deadline: {March 15, 2011}", journal = j-JETC, volume = "6", number = "4", pages = "15:1--15:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1877745.1877749", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:02 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Das:2011:ISI, author = "Shamik Das and Garrett S. Rose", title = "Introduction to Special Issue: Highlights of {NANOARCH'09}", journal = j-JETC, volume = "7", number = "1", pages = "1:1--1:??", month = jan, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1899390.1899391", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:03 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Dingler:2011:PEI, author = "Aaron Dingler and Michael T. Niemier and Xiaobo Sharon Hu and Evan Lent", title = "Performance and Energy Impact of Locally Controlled {NML} Circuits", journal = j-JETC, volume = "7", number = "1", pages = "2:1--2:??", month = jan, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1899390.1899392", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:03 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Gaillardon:2011:MNB, author = "P.-E. Gaillardon and F. Clermidy and I. O'Connor and J. Liu and M. Amadou and G. Nicolescu", title = "Matrix Nanodevice-Based Logic Architectures and Associated Functional Mapping Method", journal = j-JETC, volume = "7", number = "1", pages = "3:1--3:??", month = jan, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1899390.1899393", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:03 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Haron:2011:RRN, author = "Nor Zaidi Haron and Said Hamdioui", title = "Redundant Residue Number System Code for Fault-Tolerant Hybrid Memories", journal = j-JETC, volume = "7", number = "1", pages = "4:1--4:??", month = jan, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1899390.1899394", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Mar 28 12:17:03 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Shang:2011:INC, author = "Li Shang and Qianfan Xu", title = "Introduction to nanophotonic communication technology integration", journal = j-JETC, volume = "7", number = "2", pages = "5:1--5:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970406.1970407", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:12 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Beausoleil:2011:LSI, author = "Raymond G. Beausoleil", title = "Large-scale integrated photonics for high-performance interconnects", journal = j-JETC, volume = "7", number = "2", pages = "6:1--6:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970406.1970408", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:12 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Biberman:2011:PNC, author = "Aleksandr Biberman and Kyle Preston and Gilbert Hendry and Nicol{\'a}s Sherwood-Droz and Johnnie Chan and Jacob S. Levy and Michal Lipson and Keren Bergman", title = "Photonic network-on-chip architectures using multilayer deposited silicon materials for high-performance chip multiprocessors", journal = j-JETC, volume = "7", number = "2", pages = "7:1--7:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970406.1970409", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:12 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2011:IHN, author = "Zheng Li and Moustafa Mohamed and Xi Chen and Hongyu Zhou and Alan Mickelson and Li Shang and Manish Vachharajani", title = "{Iris}: a hybrid nanophotonic network design for high-performance and low-power on-chip communication", journal = j-JETC, volume = "7", number = "2", pages = "8:1--8:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970406.1970410", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:12 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cianchetti:2011:LLH, author = "Mark J. Cianchetti and David H. Albonesi", title = "A low-latency, high-throughput on-chip optical router architecture for future chip multiprocessors", journal = j-JETC, volume = "7", number = "2", pages = "9:1--9:??", month = jun, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1970406.1970411", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:12 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhang:2011:FBP, author = "Meng Zhang and Niraj K. Jha", title = "{FinFET}-Based Power Management for Improved {DPA} Resistance with Low Overhead", journal = j-JETC, volume = "7", number = "3", pages = "10:1--10:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2000502.2000503", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:13 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Differential power analysis (DPA) is a side-channel attack that statistically analyzes the power consumption of a cryptographic system to obtain secret information. This type of attack is well known as a major threat to information security. Effective solutions with low energy and area cost for improved DPA resistance are urgently needed, especially for energy-constrained modern devices that are often in the physical proximity of attackers. This article presents a novel countermeasure against DPA attacks on smart cards and other digital ICs based on FinFETs, an emerging substitute for bulk CMOS at the 22nm technology node and beyond. We exploit the adaptive power management characteristic of FinFETs to generate a high level of noise at critical moments in the execution of a cryptosystem to thwart DPA attacks.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Choi:2011:EQI, author = "Byung-Soo Choi and Rodney {Van Meter}", title = "On the Effect of Quantum Interaction Distance on Quantum Addition Circuits", journal = j-JETC, volume = "7", number = "3", pages = "11:1--11:17", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2000502.2000504", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:13 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We investigate the theoretical limits of the effect of the quantum interaction distance on the speed of exact quantum addition circuits. For this study, we exploit graph embedding for quantum circuit analysis. We study a logical mapping of qubits and gates of any $ \Omega (\log n)$-depth quantum adder circuit for two $n$-qubit registers onto a practical architecture, which limits interaction distance to the nearest neighbors only and supports only one- and two-qubit logical gates. Unfortunately, on the chosen $k$-dimensional practical architecture, we prove that the depth lower bound of any exact quantum addition circuits is no longer $ \Omega (\log n)$, but $ \Omega (\root k \of n)$.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Goren:2011:DAN, author = "Sezer G{\"o}ren and H. Fatih Ugurdag and Okan Palaz", title = "Defect-Aware Nanocrossbar Logic Mapping through Matrix Canonization Using Two-Dimensional Radix Sort", journal = j-JETC, volume = "7", number = "3", pages = "12:1--12:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2000502.2000505", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:13 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nanocrossbars (i.e., nanowire crossbars) offer extreme logic densities but come with very high defect rates; stuck-open/closed, broken nanowires. Achieving reasonable yield and utilization requires logic mapping that is defect-aware even at the crosspoint level. Such logic mapping works with a defect map per each manufactured chip. The problem can be expressed as matching of two bipartite graphs; one for the logic to be implemented and other for the nanocrossbar. This article shows that the problem becomes a Bipartite SubGraph Isomorphism (BSGI) problem within sub-nanocrossbars free of stuck-closed faults. Our heuristic KNS-2DS is an iterative rough canonizer with approximately O(N2) complexity followed by an O(N3) matching algorithm.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Devadoss:2011:PQT, author = "Rajeswari Devadoss and Kolin Paul and M. Balakrishnan", title = "{p-QCA}: a Tiled Programmable Fabric Architecture Using Molecular Quantum-Dot Cellular Automata", journal = j-JETC, volume = "7", number = "3", pages = "13:1--13:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2000502.2000506", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Aug 18 12:25:13 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum-dot cellular automata is an interesting computation fabric with many never-seen-before properties. However, no programmable fabric scheme has utilized all these properties effectively. We propose an architecture for a programmable device using molecular QCA which exploits all the specialities of the fabric. The architecture taps the flexibility provided by the clocking system of molecular QCA to build a simple tile-based programmable device with the 3-input Majority gate as the fundamental logic element.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Singh:2011:ISI, author = "Montek Singh and Steven M. Nowick", title = "Introduction to Special Issue: Asynchrony in System Design", journal = j-JETC, volume = "7", number = "4", pages = "14:1--14:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043644", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Vacca:2011:ASN, author = "Marco Vacca and Mariagrazia Graziano and Maurizio Zamboni", title = "Asynchronous Solutions for Nanomagnetic Logic Circuits", journal = j-JETC, volume = "7", number = "4", pages = "15:1--15:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043645", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In the years to come new solutions will be required to overcome the limitations of scaled CMOS technology. One approach is to adopt Nano-Magnetic Logic Circuits, highly appealing for their extremely reduced power consumption. Despite the interesting nature of this approach, many problems arise when this technology is considered for real designs. The wire is the most critical of these problems from the circuit implementation point of view. It works as a pipelined interconnection, and its delay in terms of clock cycles depends on its length. Serious complications arise at the design phase, both in terms of synthesis and of physical design.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhang:2011:NPD, author = "Xuefu Zhang and Delong Shang and Fei Xia and Alex Yakovlev", title = "A Novel Power Delivery Method for Asynchronous Loads in Energy Harvesting Systems", journal = j-JETC, volume = "7", number = "4", pages = "16:1--16:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043646", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "For systems depending on power harvesting, a fundamental contradiction in the power delivery chain has existed between conventional synchronous computational loads requiring relatively stable Vdd and power harvesters unable to supply it. DC/DC conversion has therefore been an integral part of such systems to resolve this contradiction. On the other hand, asynchronous computational loads, in addition to their potential power-saving capabilities, can be made tolerant to a much wider range of Vdd variance. This may open up opportunities for much more energy efficient methods of power delivery. This article presents in-depth investigations into the behavior and performance of different on-chip power delivery methods driving both asynchronous and synchronous loads directly from a harvester source.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Plana:2011:SDI, author = "Luis A. Plana and David Clark and Simon Davidson and Steve Furber and Jim Garside and Eustace Painkras and Jeffrey Pepper and Steve Temple and John Bainbridge", title = "{SpiNNaker}: Design and Implementation of a {GALS} Multicore {System-on-Chip}", journal = j-JETC, volume = "7", number = "4", pages = "17:1--17:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043647", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The design and implementation of globally asynchronous locally synchronous systems-on-chip is a challenging activity. The large size and complexity of the systems require the use of computer-aided design (CAD) tools but, unfortunately, most tools do not work adequately with asynchronous circuits. This article describes the successful design and implementation of SpiNNaker, a GALS multicore system-on-chip. The process was completed using commercial CAD tools from synthesis to layout. A hierarchical methodology was devised to deal with the asynchronous sections of the system, encapsulating and validating timing assumptions at each level. The crossbar topology combined with a pipelined asynchronous fabric implementation allows the on-chip network to meet the stringent requirements of the system.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Galceran-Oms:2011:MTU, author = "Marc Galceran-Oms and Alexander Gotmanov and Jordi Cortadella and Mike Kishinevsky", title = "Microarchitectural Transformations Using Elasticity", journal = j-JETC, volume = "7", number = "4", pages = "18:1--18:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043648", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Elasticity is a paradigm that tolerates the variations in computation and communication delays. By applying elastic transformations that allow varying the original timing, circuits can be optimized beyond the conventional rigid transformations that do not modify the external timing. Pipelining is one of the classical techniques to improve the throughput of a circuit. This article reveals how elasticity can be effectively and practically used to derive pipelined circuits by using correct-by-construction transformations that can be fully automated. Two designs, one of them industrial, are used to demonstrate how the area-performance trade-off can be explored using elasticity.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sheikh:2011:EEP, author = "Basit Riaz Sheikh and Rajit Manohar", title = "{Energy-Efficient} Pipeline Templates for {High-Performance} Asynchronous Circuits", journal = j-JETC, volume = "7", number = "4", pages = "19:1--19:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043649", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We present two novel energy-efficient pipeline templates for high throughput asynchronous circuits. The proposed templates, called N-P and N-Inverter pipelines, use a single-track handshake protocol. There are multiple stages of logic within each pipeline. The proposed techniques minimize handshake overheads associated with input tokens and intermediate logic nodes within a pipeline template. Each template can pack a significant amount of logic in a single stage, while still maintaining a fast cycle time of only 18 transitions. Noise and timing robustness constraints of our pipelined circuits are quantified across all process corners. We present completion detection scheme based on wide NOR gates, which results in significant latency and energy savings especially as the number of outputs increase.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Matherat:2011:RCC, author = "Philippe Matherat and Marc-Thierry Jaekel", title = "Relativistic Causality and Clockless Circuits", journal = j-JETC, volume = "7", number = "4", pages = "20:1--20:??", month = dec, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043643.2043650", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 15 09:46:08 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Time plays a crucial role in the performance of computing systems. The accurate modelling of logical devices, and of their physical implementations, requires an appropriate representation of time and of all properties that depend on this notion. The need for a proper model, particularly acute in the design of clockless delay-insensitive (DI) circuits, leads one to reconsider the classical descriptions of time and of the resulting order and causal relations satisfied by logical operations. This questioning meets the criticisms of classical spacetime formulated by Einstein when founding relativity theory and is answered by relativistic conceptions of time and causality. Applying this approach to clockless circuits and considering the trace formalism, we rewrite Udding's rules, which characterize communications between DI components. We exhibit their intrinsic relation with relativistic causality. For that purpose, we introduce relativistic generalizations of traces, called R-traces, which provide a pertinent description of communications and compositions of DI components.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Crocker:2012:RPA, author = "Michael Crocker and Michael Niemier and X. Sharon Hu", title = "A Reconfigurable {PLA} Architecture for Nanomagnet Logic", journal = j-JETC, volume = "8", number = "1", pages = "1:1--1:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093145.2093146", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Feb 28 16:37:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In order to continue the performance and scaling trends that we have come to expect from Moore's Law, many emergent computational models, devices, and technologies are actively being studied to either replace or augment CMOS technology. Nanomagnet Logic (NML) is one such alternative. NML operates at room temperature, it has the potential for low power consumption, and it is CMOS compatible. In this article, we present an NML programmable logic array (PLA) based on a previously proposed reprogrammable quantum-dot cellular automata PLA design. We also discuss the fabrication and simulation validation of the circuit structures unique to the NML PLA, present area, energy, and delay estimates for the NML PLA, compare the area of NML PLAs to other reprogrammable nanotechnologies, and analyze how architectural-level redundancy will affect performance and defect tolerance in NML PLAs.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Henry:2012:TNH, author = "Michael B. Henry and Leyla Nazhandali", title = "From Transistors to {NEMS}: Highly Efficient Power-Gating of {CMOS} Circuits", journal = j-JETC, volume = "8", number = "1", pages = "2:1--2:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093145.2093147", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Feb 28 16:37:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A rapidly growing class of battery constrained electronic applications are those with very long sleep periods, such as structural health monitoring systems, biomedical implants, and wireless border security cameras. The traditional method for sleep-mode power reduction, transistor power gating, has drawbacks, including performance loss and residual leakage. This article presents a thorough evaluation of a new nanotechnology-enabled power gating structure, CMOS-compatible NEMS switches, in the presence of aggressive supply voltage scaling. Due to the infinite off-resistance of the NEMS switches, the average power consumption of an FFT processor performing 1 FFT per hour drops by around 30 times compared to a transistor-based power gating implementation.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tolbert:2012:MDA, author = "Jeremy R. Tolbert and Pratik Kabali and Simeranjit Brar and Saibal Mukhopadhyay", title = "Modeling and Designing for Accuracy and Energy Efficiency in Wireless Electroencephalography Systems", journal = j-JETC, volume = "8", number = "1", pages = "3:1--3:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093145.2093148", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Feb 28 16:37:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Remote wireless monitoring of physiological signals has emerged as a key enabler for biotelemetry and can significantly improve the delivery of healthcare. Improving the energy efficiency and battery lifetime of the monitoring units without sacrificing the acquired signal quality is a key challenge in large-scale deployment of bioelectronic systems for remote wireless monitoring. In this article, we present a design methodology for accuracy aware, energy efficient wireless monitoring of electroencephalography (EEG) data. The proposed design performs a real-time accuracy energy trade-off by controlling the volume of transmitted data based on the information content in the EEG signal. We consider the effect of different system parameters in order to design an optimal system.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Naruse:2012:SDN, author = "Makoto Naruse and Ferdinand Peper and Kouichi Akahane and Naokatsu Yamamoto and Tadashi Kawazoe and Naoya Tate and Motoichi Ohtsu", title = "Skew Dependence of Nanophotonic Devices Based on Optical Near-Field Interactions", journal = j-JETC, volume = "8", number = "1", pages = "4:1--4:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093145.2093149", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Feb 28 16:37:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We examine the timing dependence of nanophotonic devices based on optical excitation transfer via optical near-field interactions at the nanometer scale. We theoretically analyze the dynamic behavior of a two-input nanophotonic switch composed of three quantum dots based on a density matrix formalism while assuming arrival-time differences, or skew, between the inputs. The analysis reveals that the nanophotonic switch is resistant to a skew longer than the input signal duration, and the tolerance to skew is asymmetric with respect to the two inputs. The skew dependence is also experimentally examined based on near-field spectroscopy of InGaAs quantum dots, showing good agreement with the theory.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ye:2012:TBH, author = "Yaoyao Ye and Jiang Xu and Xiaowen Wu and Wei Zhang and Weichen Liu and Mahdi Nikdast", title = "A Torus-Based Hierarchical Optical-Electronic {Network-on-Chip} for Multiprocessor {System-on-Chip}", journal = j-JETC, volume = "8", number = "1", pages = "5:1--5:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093145.2093150", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Feb 28 16:37:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Networks-on-chip (NoCs) are emerging as a key on-chip communication architecture for multiprocessor systems-on-chip (MPSoCs). Optical communication technologies are introduced to NoCs in order to empower ultra-high bandwidth with low power consumption. However, in existing optical NoCs, communication locality is poorly supported, and the importance of floorplanning is overlooked. These significantly limit the power efficiency and performance of optical NoCs. In this work, we address these issues and propose a torus-based hierarchical hybrid optical-electronic NoC, called THOE. THOE takes advantage of both electrical and optical routers and interconnects in a hierarchical manner. It employs several new techniques including floorplan optimization, an adaptive power control mechanism, low-latency control protocols, and hybrid optical-electrical routers with a low-power optical switching fabric.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Manem:2012:DCM, author = "H. Manem and J. Rajendran and G. S. Rose", title = "Design Considerations for Multilevel {{CMOS\slash} Nano} Memristive Memory", journal = j-JETC, volume = "8", number = "1", pages = "6:1--6:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2093145.2093151", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Feb 28 16:37:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With technology migration into nano and molecular scales several hybrid CMOS/nano logic and memory architectures have been proposed that aim to achieve high device density with low power consumption. The discovery of the memristor has further enabled the realization of denser nanoscale logic and memory systems by facilitating the implementation of multilevel logic. This work describes the design of such a multilevel nonvolatile memristor memory system, and the design constraints imposed in the realization of such a memory. In particular, the limitations on load, bank size, number of bits achievable per device, placed by the required noise margin for accurately reading and writing the data stored in a device are analyzed.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bhunia:2012:ISI, author = "Swarup Bhunia and Darrin J. Young", title = "Introduction to Special Issue on Implantable Electronics", journal = j-JETC, volume = "8", number = "2", pages = "7:1--7:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180879", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ko:2012:EHC, author = "Wen H. Ko", title = "Early History and Challenges of Implantable Electronics", journal = j-JETC, volume = "8", number = "2", pages = "8:1--8:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180880", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Implantable systems for biomedical research and clinical care are now a flourishing field of activities in academia as well as industrial institutions. The broad field includes experimental explorations in electronics, mechanical, chemical, and biological components and systems, and the combination of all these. Today virtually all implants involve both electronic circuits and micro-electro-mechanical-systems (MEMS). This article offers a very brief glance back at the early history of implant electronics in the period from the 1950s to the 1970s, by employing selected examples from the author's research. This short review also discusses the challenges of implantable electronics at present, and suggests some potentially important trends in the future research and development of implantable microsystems. It is aimed as an introduction of implantable/attached electronic systems to research engineers that are interested in implantable systems as a section of Biomedical Instrumentations.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Salam:2012:ICL, author = "Muhammad Tariqus Salam and Mohamad Sawan and Dang Khoa Nguyen", title = "Implantable Closed-Loop Epilepsy Prosthesis: Modeling, Implementation and Validation", journal = j-JETC, volume = "8", number = "2", pages = "9:1--9:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180881", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we present an implantable closed-loop epilepsy prosthesis, which is dedicated to automatically detect seizure onsets based on intracerebral electroencephalographic (icEEG) recordings from intracranial electrode contacts and provide an electrical stimulation feedback to the same contacts in order to disrupt these seizures. A novel epileptic seizure detector and a dedicated electrical stimulator were assembled together with common recording electrodes to complete the proposed prosthesis. The seizure detector was implemented in CMOS 0.18-$ \mu $ m by incorporating a new seizure detection algorithm that models time-amplitude and -frequency relationship in icEEG. The detector was validated offline on ten patients with refractory epilepsy and showed excellent performance for early detection of seizures. The electrical stimulator, used for suppressing the developing seizure, is composed of two biphasic channels and was assembled with embedded FPGA in a miniature PCB. The stimulator efficiency was evaluated on cadaveric animal brain tissue in an in vitro morphologic electrical model. Spatial characteristics of the voltage distribution in cortex were assessed in an attempt to identify optimal stimulation parameters required to affect the suspected epileptic focus. The experimental results suggest that lower frequency stimulation parameters cause significant amount of shunting of current through the cerebrospinal fluid; however higher frequency stimulation parameters produce effective spatial voltage distribution with lower stimulation charge.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sharad:2012:LPA, author = "Mrigank Sharad and Sumeet K. Gupta and Shriram Raghunathan and Pedro P. Irazoqui and Kaushik Roy", title = "Low-Power Architecture for Epileptic Seizure Detection Based on Reduced Complexity {DWT}", journal = j-JETC, volume = "8", number = "2", pages = "10:1--10:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180882", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we present a low-power, user-programmable architecture for discrete wavelet transform (DWT) based epileptic seizure detection algorithm. A simplified, low-pass filter (LPF)-only-DWT technique is employed in which energy contents of different frequency bands are obtained by subtracting quasi-averaged, consecutive LPF outputs. Training phase is used to identify the range of critical DWT coefficients that are in turn used to set patient-specific system level parameters for minimizing power consumption. The proposed optimizations allow the design to work at significantly lower power in the normal operation mode. The system has been tested on neural data obtained from kainate-treated rats. The design was implemented in TSMC-65nm technology and consumes less than 550-nW power at 250-mV supply.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Majerus:2012:WUL, author = "Steve J. A. Majerus and Steven L. Garverick and Michael A. Suster and Paul C. Fletter and Margot S. Damaser", title = "Wireless, Ultra-Low-Power Implantable Sensor for Chronic Bladder Pressure Monitoring", journal = j-JETC, volume = "8", number = "2", pages = "11:1--11:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180883", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The wireless implantable/intracavity micromanometer (WIMM) system was designed to fulfill the unmet need for a chronic bladder pressure sensing device in urological fields such as urodynamics for diagnosis and neuromodulation for bladder control. Neuromodulation in particular would benefit from a wireless bladder pressure sensor which could provide real-time pressure feedback to an implanted stimulator, resulting in greater bladder capacity while using less power. The WIMM uses custom integrated circuitry, a MEMS transducer, and a wireless antenna to transmit pressure telemetry at a rate of 10 Hz. Aggressive power management techniques yield an average current draw of $ 9 \mu $A from a 3.6-Volt micro-battery, which minimizes the implant size. Automatic pressure offset cancellation circuits maximize the sensing dynamic range to account for drifting pressure offset due to environmental factors, and a custom telemetry protocol allows transmission with minimum overhead. Wireless operation of the WIMM has demonstrated that the external receiver can receive the telemetry packets, and the low power consumption allows for at least 24 hours of operation with a 4-hour wireless recharge session.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Huang:2012:IRD, author = "Yu-Jie Huang and Hsin-Hung Liao and Pen-Li Huang and Tao Wang and Yao-Joe Yang and Yao-Hong Wang and Shey-Shi Lu", title = "An Implantable Release-on-Demand {CMOS} Drug Delivery {SoC} Using Electrothermal Activation Technique", journal = j-JETC, volume = "8", number = "2", pages = "12:1--12:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180884", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "An implantable system-on-a-chip (SoC) integrating controller/actuation circuitry and 8 individually addressable drug reservoirs is proposed for on-demand drug delivery. It is implemented by standard 0.35- \mu m CMOS technology and post-IC processing. The post-IC processing includes deposition of metallic membranes (200{\AA} Pt/3000{\AA} Ti/200{\AA} Pt) to cap the drug reservoirs, deep dry etching to carve drug reservoirs in silicon as drug containers, and PDMS layer bonding to enlarge the drug storage. Based on electrothermal activation technique, drug releases can be precisely controlled by wireless signals. The wireless controller/actuation circuits including on-off keying (OOK) receiver, microcontroller unit, clock generator, power-on-reset circuit, and switch array are integrated on the same chip, providing patients the ability of remote drug activation and noninvasive therapy modification. Implanted by minimally invasive surgery, this SoC can be used for the precise drug dosing of localized treatment, such as the cancer therapy, or the immediate medication to some emergent diseases, such as heart attack. In vitro experimental results show that the reservoir content can be released successfully through the rupture of the membrane which is appointed by received wireless commands.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sun:2012:NMD, author = "Zhenyu Sun and Xiang Chen and Yaojun Zhang and Hai Li and Yiran Chen", title = "Nonvolatile Memories as the Data Storage System for Implantable {ECG} Recorder", journal = j-JETC, volume = "8", number = "2", pages = "13:1--13:??", month = jun, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180878.2180885", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 23 12:02:51 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we propose a data storage system with the emerging nonvolatile memory technologies used for the implantable electrocardiography (ECG) recorder. The proposed storage system can record the digitalized real-time ECG waveforms continuously inside the implantable device and export the stored data to external reader periodically to obtain a long-term backup. Spin transfer torque random access memory (STT-RAM) and spintronic memristor are selected as the storage elements for their nonvolatility, high density, high reliability, low power consumption, good scalability, and CMOS technology compatibility. The new read and write schemes of STT-RAM and spintronic memristors are presented and optimized to fit the specific application scenario. The tradeoffs among data accuracy, chip area, and read/write energy for the different technologies are thoroughly analyzed and compared. Our simulation results show the configuration with a data sampling rate (e.g., 128 Hz) and a quantization resolution (e.g., 12 bits) can record 18-hour real-time data within $ \approx 3.6$-mm$^2$ chip area when the data storage is built with single-level cell (SLC) STT-RAMs. Daily energy consumption is $ 5.46$ mJ. Utilizing the multilevel cell (MLC) STT-RAMs or the spintronic memristors as the storage elements can further reduce the chip area and decrease energy dissipation.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mohanty:2012:SSN, author = "Saraju P. Mohanty", title = "Special section on new circuit and architecture-level solutions for multidiscipline systems", journal = j-JETC, volume = "8", number = "3", pages = "14:1--14:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287697", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Srivastava:2012:CLV, author = "Ashok Srivastava and Yao Xu and Yang Liu and Ashwani K. Sharma and Clay Mayberry", title = "{CMOS LC} voltage controlled oscillator design using multiwalled and single-walled carbon nanotube wire inductors", journal = j-JETC, volume = "8", number = "3", pages = "15:1--15:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287698", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We have utilized our Multiwalled Carbon NanoTube (MWCNT) and Single-Walled Carbon NanoTube (SWCNT) bundle interconnects model in a widely used $ \pi $ model to study the performances of MWCNT and SWCNT bundle wire inductors and compared these with copper (Cu) inductors. The calculation results show that the Q-factors of Carbon NanoTube (CNT) wire (SWCNT bundle and MWCNT) inductors are higher than that of the Cu wire inductor. This is mainly due to much lower resistance of CNT and negligible skin effect in carbon nanotubes at higher frequencies. The application of CNT wire inductor in LC VCO is also studied and the Cadence/Spectre simulations show that VCOs with CNT bundle wire inductors have significantly improved performance such as the higher oscillation frequency and lower phase noise due to their smaller resistances and higher Q-factors. It is also noticed that CMOS LC VCO using a SWCNT bundle wire inductor has better performance when compared with the performance of LC VCO using the MWCNT wire inductor due to its lower resistance and higher Q-factor.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mahalingam:2012:DCS, author = "Venkataraman Mahalingam and Nagarajan Ranganathan and Ransford {Hyman, Jr.}", title = "Dynamic clock stretching for variation compensation in {VLSI} circuit design", journal = j-JETC, volume = "8", number = "3", pages = "16:1--16:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287699", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In the nanometer era, process, voltage, and temperature variations are dominating circuit performance, power, and yield. Over the past few years, statistical optimization methods have been effective in improving yield in the presence of uncertainty due to process variations. However, statistical methods overconsume resources, even in the absence of variations. Hence, to facilitate a better performance-power-yield trade-off, techniques that can dynamically enable variation compensation are becoming necessary. In this article, we propose a dynamic technique that controls the instance of data capture in critical path memory flops, by delaying the clock edge trigger. The methodology employs a dynamic delay detection circuit to identify the uncertainty in delay due to variations and stretches the clock in the destination flip-flops. The delay detection circuit uses a latch and set of combinational gates to dynamically detect and create the slack needed to accommodate the delay due to variations. The Clock Stretching Logic (CSL) is added only to paths, which have a high probability of failure in the presence of variations. The proposed methodology improves the timing yield of the circuit without significant overcompensation. The methodology approach was simulated using Synopsys design tools for circuit synthesis and Cadence tools for placement and routing of the design. Extraction of parasitic of timing information was parsed using Perl scripts and simulated using a simulation program generated in C++. Experimental results based on Monte-Carlo simulations on benchmark circuits indicate considerable improvement in timing yield with negligible area overhead.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Roy:2012:CAL, author = "Sudip Roy and Debasis Mitra and Bhargab B. Bhattacharya and Krishnendu Chakrabarty", title = "Congestion-aware layout design for high-throughput digital microfluidic biochips", journal = j-JETC, volume = "8", number = "3", pages = "17:1--17:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287700", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Potential applications of digital microfluidic (DMF) biochips now include several areas of real-life applications like environmental monitoring, water and air pollutant detection, and food processing to name a few. In order to achieve sufficiently high throughput for these applications, several instances of the same bioassay may be required to be executed concurrently on different samples. As a straightforward implementation, several identical biochips can be integrated on a single substrate as a multichip to execute the assay for various samples concurrently. Controlling individual electrodes of such a chip by independent pins may not be acceptable since it increases the cost of fabrication. Thus, in order to keep the overall pin-count within an acceptable bound, all the respective electrodes of these individual pieces are connected internally underneath the chip so that they can be controlled with a single external control pin. In this article, we present an orientation strategy for layout of a multichip that reduces routing congestion and consequently facilitates wire routing for the electrode array. The electrode structure of the individual pieces of the multichip may be either direct-addressable or pin-constrained. The method also supports a hierarchical approach to wire routing that ensures scalability. In this scheme, the size of the biochip in terms of the total number of electrodes may be increased by a factor of four by increasing the number of routing layers by only one. In general, for a multichip with 4 $^n$ identical blocks, ( n + 1) layers are sufficient for wire routing.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Komerath:2012:RBP, author = "Narayanan Komerath and Aravinda Kar", title = "Retail beamed power using millimeter waves: Survey", journal = j-JETC, volume = "8", number = "3", pages = "18:1--18:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287701", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Retail delivery of electric power through millimeter waves is relevant in developing areas where the market for communication devices outpaces the power grid infrastructure. It is also a critical component of an evolutionary path towards terrestrial and space-based renewable power generation. Narrow-band power can be delivered as focused beams to receivers near end-users, from central power plants, rural distribution points, UAVs, tethered aerostats, stratospheric airship platforms, or space satellites. The article surveys the available knowledge base on millimeter wave beamed power delivery. It then considers design requirements for a retail beamed power architecture, in the context of rural India where power delivery is lagging behind the demand growth for connectivity. A survey of technology developments relevant to millimeter wave beaming is conducted, and indicates that massive, mass-produced solid-state arrays capable of achieving good efficiency and cost effectiveness are possible in the near term to enable such retail power beaming architectures.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Palaniswamy:2012:EHI, author = "Ashok Kumar Palaniswamy and Spyros Tragoudas", title = "An efficient heuristic to identify threshold logic functions", journal = j-JETC, volume = "8", number = "3", pages = "19:1--19:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287702", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A fast method to identify the given Boolean function as a threshold function with weight assignment is introduced. It characterizes the function based on the parameters that have been defined in the literature. The proposed method is capable to quickly characterize all functions that have less than eight inputs and has been shown to operate fast for functions with as many as forty inputs. Furthermore, comparisons with other existing heuristic methods show huge increase in the number of threshold functions identified, and drastic reduction in time and complexity.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2012:EPV, author = "Hu Xu and Vasilis F. Pavlidis and Giovanni {De Micheli}", title = "Effect of process variations in {$3$D} global clock distribution networks", journal = j-JETC, volume = "8", number = "3", pages = "20:1--20:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287703", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In three-dimensional (3D) integrated circuits, the effect of process variations on clock skew differs from 2D circuits. The combined effect of inter-die and intra-die process variations on the design of 3D clock distribution networks is considered in this article. A statistical clock skew model incorporating both the systematic and random components of process variations is employed to describe this effect. Two regular 3D clock tree topologies are investigated and compared in terms of clock skew variation. The statistical skew model used to describe clock skew variations is verified through Monte-Carlo simulations. The clock skew is shown to change in different ways with the number of planes forming the 3D IC and the clock network architecture. Simulations based on a 45-nm CMOS technology show that the maximum standard deviation of clock skew can vary from 15 ps to 77 ps. Results indicate that simply increasing the number of planes of a 3D IC does not necessarily lead to lower skew variation and higher operating frequencies. A multigroup 3D clock tree topology is proposed to effectively mitigate the variability of clock skew. Tradeoffs between the investigated 3D clock distribution networks and the number of planes comprising a 3D circuit are discussed and related design guidelines are offered. The skew variation in 3D clock trees is also compared with the skew variation of clock grids.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kursun:2012:STT, author = "Eren Kursun and Jamil Wakil and Mukta Farooq and Robert Hannon", title = "Spatial and temporal thermal characterization of stacked multicore architectures", journal = j-JETC, volume = "8", number = "3", pages = "21:1--21:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287704", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Three-dimensional integration provides a new way of performance growth for microprocessor architectures. While a recent studies report promising performance improvement numbers, majority of the processor stacking options are thermally-limited. Elevated stack temperatures have significant effect on the overall energy efficiency and reliability of the processor; they also limit the potential peak performance improvement from the 3D implementation. Thermal characteristics of 3D stacks differ from 2D processors in various ways including: the nature of heat dissipation throughout the stack, thermal conductivity of the 3D structures such as micro-C4 layers, and hotspot interactions among layers. The intensity of the corresponding thermal problems is highly dependent on the 3D technology, processor and stack parameters. In this study we focus on spatial and temporal thermal characteristics of 3D multicore architectures using high-fidelity technology and processor models. Our experimental results highlight the need for integrating detailed thermal models in the design flow, starting with the early design stages. In addition, the reduced time constants and elevated on-chip temperatures indicate faster response time requirements for dynamic thermal management in processor stacking options.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2012:RAP, author = "Bao Liu and Xuemei Chen and Fiona Teshome", title = "Resilient and adaptive performance logic", journal = j-JETC, volume = "8", number = "3", pages = "22:1--22:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287705", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As VLSI technology continues scaling, increasingly significant parametric variations and increasingly prevalent defects present unprecedented challenges to VLSI design at nanometer scale. Specifically, performance variability has hindered performance scaling, while soft errors become an emerging problem for logic computation at recent technology nodes. In this article, we leverage the existing Totally Self-Checking (TSC)/Strongly Fault-Secure (SFS) logic design techniques, and propose Resilient and Adaptive Performance (RAP) logic for maximum adaptive performance and soft error resilience in nanoscale computing. RAP logic clears all timing errors in the absence of external soft errors, albeit at a higher area/power cost compared with Razor logic. Our experimental results further show that dual-rail static (Domino) RAP logic outperforms alternative Delay-Insensitive (DI) code-based static (Domino) RAP logic with less area, higher performance, and lower power consumption for the large test cases, and achieves an average of 2.29(2.41)$ \times $ performance boost, 2.12(1.91)$ \times $ layout area, and 2.38(2.34)$ \times $ power consumption compared with the traditional minimum area static logic based on the Nangate 45-nm open cell library.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chang:2012:PED, author = "Kevin Chang and Sujay Deb and Amlan Ganguly and Xinmin Yu and Suman Prasad Sah and Partha Pratim Pande and Benjamin Belzer and Deukhyoun Heo", title = "Performance evaluation and design trade-offs for wireless network-on-chip architectures", journal = j-JETC, volume = "8", number = "3", pages = "23:1--23:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287706", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Massive levels of integration are making modern multicore chips all pervasive in several domains. High performance, robustness, and energy-efficiency are crucial for the widespread adoption of such platforms. Networks-on-Chip (NoCs) have emerged as communication backbones to enable a high degree of integration in multicore Systems-on-Chip (SoCs). Despite their advantages, an important performance limitation in traditional NoCs arises from planar metal interconnect-based multihop links with high latency and power consumption. This limitation can be addressed by drawing inspiration from the evolution of natural complex networks, which offer great performance-cost trade-offs. Analogous with many natural complex systems, future multicore chips are expected to be hierarchical and heterogeneous in nature as well. In this article we undertake a detailed performance evaluation for hierarchical small-world NoC architectures where the long-range communications links are established through the millimeter-wave wireless communication channels. Through architecture-space exploration in conjunction with novel power-efficient on-chip wireless link design, we demonstrate that it is possible to improve performance of conventional NoC architectures significantly without incurring high area overhead.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Choi:2012:DQA, author = "Byung-Soo Choi and Rodney {Van Meter}", title = "A {$ \Theta (\sqrt n) $}-depth quantum adder on the {$2$D NTC} quantum computer architecture", journal = j-JETC, volume = "8", number = "3", pages = "24:1--24:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287707", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this work, we propose an adder for the 2-Dimensional Nearest-Neighbor, Two-Qubit gate, Concurrent (2D NTC) architecture, designed to match the architectural constraints of many quantum computing technologies. The chosen architecture allows the layout of logical qubits in two dimensions with {\&}sqrt; n columns where each column has {\&}sqrt; n qubits and the concurrent execution of one- and two-qubit gates with nearest-neighbor interaction only. The proposed adder works in three phases. In the first phase, the first column generates the summation output and the other columns do the carry-lookahead operations. In the second phase, these intermediate values are propagated from column to column, preparing for computation of the final carry for each register position. In the last phase, each column, except the first one, generates the summation output using this column-level carry. The depth and the number of qubits of the proposed adder are $ \Theta (\sqrt n) $ and $ O(n) $, respectively. The proposed adder executes faster than the adders designed for the 1D NTC architecture when the length of the input registers $n$ is larger than 51.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Huang:2012:PDT, author = "Jiale Huang and Minhao Zhu and Shengqi Yang and Pallav Gupta and Wei Zhang and Steven M. Rubin and Gilda Garret{\'o}n and Jin He", title = "A physical design tool for carbon nanotube field-effect transistor circuits", journal = j-JETC, volume = "8", number = "3", pages = "25:1--25:??", month = aug, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2287696.2287708", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Aug 20 15:17:55 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we present a graphical Computer-Aided Design (CAD) environment for the design, analysis, and layout of Carbon NanoTube (CNT) Field-Effect Transistor (CNFET) circuits. This work is motivated by the fact that such a tool currently does not exist in the public domain for researchers. Our tool has been integrated within Electric a very powerful, yet free CAD system for custom design of Integrated Circuits (ICs). The tool supports CNFET schematic and layout entry, rule checking, and HSpice/VerilogA netlist generation. We provide users with a customizable CNFET technology library with the ability to specify $ \lambda $ -based design rules. We showcase the capabilities of our tool by demonstrating the design of a large CNFET standard cell and components library. Meanwhile, HSPICE simulations also have been presented for cell library characterization. We hope that the availability of this tool will invigorate the CAD community to explore novel ideas in CNFET circuit design.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Pande:2012:ISI, author = "Partha Pratim Pande and Amlan Ganguly", title = "Introduction to the special issue on sustainable and green computing systems", journal = j-JETC, volume = "8", number = "4", pages = "26:1--26:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367737", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Banerjee:2012:TNZ, author = "Prithviraj Banerjee and Chandrakant Patel and Cullen Bash and Amip Shah and Martin Arlitt", title = "Towards a net-zero data center", journal = j-JETC, volume = "8", number = "4", pages = "27:1--27:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367738", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A world consisting of billions of service-oriented client devices and thousands of data centers can deliver a diverse range of services, from social networking to management of our natural resources. However, these services must scale in order to meet the fundamental needs of society. To enable such scaling, the total cost of ownership of the data centers that host the services and comprise the vast majority of service delivery costs will need to be reduced. As energy drives the total cost of ownership of data centers, there is a need for a new paradigm in design and management of data centers that minimizes energy used across their lifetimes, from ``cradle to cradle''. This tutorial article presents a blueprint for a ``net-zero data center'': one that offsets any electricity used from the grid via adequate on-site power generation that gets fed back to the grid at a later time. We discuss how such a data center addresses the total cost of ownership, illustrating that contrary to the oft-held view of sustainability as ``paying more to be green'', sustainable data centers-built on a framework that focuses on integrating supply and demand management from end-to-end-can concurrently lead to lowest cost and lowest environmental impact.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Garg:2012:TDL, author = "Siddharth Garg and Diana Marculescu and Radu Marculescu", title = "Technology-driven limits on runtime power management algorithms for multiprocessor systems-on-chip", journal = j-JETC, volume = "8", number = "4", pages = "28:1--28:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367739", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Runtime power management is a critical technique for reducing the energy footprint of digital electronic devices and enabling sustainable computing, since it allows electronic devices to dynamically adapt their power and energy consumption to meet performance requirements. In this article, we consider the case of MultiProcessor Systems-on-Chip (MPSoC) implemented using multiple Voltage and Frequency Islands (VFIs) relying on fine-grained Dynamic Voltage and Frequency Scaling (DVFS) to reduce the system power dissipation. In particular, we present a framework to theoretically analyze the impact of three important technology-driven constraints; (i) reliability-driven upper limits on the maximum supply voltage; (ii) inductive noise-driven constraints on the maximum rate of change of voltage/frequency; and (iii) the impact of manufacturing process variations on the performance of DVFS control for multiple VFI MPSoCs. The proposed analysis is general, in the sense that it is not bound to a specific DVFS control algorithm, but instead focuses on theoretically bounding the performance that any DVFS controller can possibly achieve. Our experimental results on real and synthetic benchmarks show that in the presence of reliability- and temperature-driven constraints on the maximum frequency and maximum frequency increment, any DVFS control algorithm will lose up to 87\% performance in terms of the number of steps required to reach a reference steady state. In addition, increasing process variations can lead to up to 60\% of fabricated chips being unable to meet the specified DVFS control specifications, irrespective of the DVFS algorithm used. Nonetheless, we note that although conventional DVFS might become less effective with technology scaling, it will continue to play an important role in the context of emerging power management techniques, for example, for massively parallel multiprocessor systems where only a subset of cores can be turned on at any given point of time due to total power constraints.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ghidini:2012:EEM, author = "Giacomo Ghidini and Sajal K. Das", title = "Energy-efficient {Markov} chain-based duty cycling schemes for greener wireless sensor networks", journal = j-JETC, volume = "8", number = "4", pages = "29:1--29:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367740", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "To extend the lifetime of a wireless sensor network, sensor nodes usually duty cycle between dormant and active states. Duty cycling schemes are often evaluated in terms of connection delay, connection duration, and duty cycle. In this article, we show with experiments on Sun SPOT sensors that duty cycling time (energy) efficiency, that is, the ratio of time (energy) employed in ancillary operations when switching from and into deep sleep mode, is an important performance metric too. We propose a novel randomized duty cycling scheme based on Markov chains with the goal of (i) reducing the connection delay, while maintaining a given time (energy) efficiency, or (ii) keeping a constant connection delay, while increasing the time (energy) efficiency. Analytical and experimental results demonstrate that the Markov chain-based scheme can improve the performance in terms of connection delay without affecting the time efficiency, or vice versa, as opposed to the trade-off observed in traditional schemes. We extend the proposed duty cycling scheme to a partially randomized scheme, where wireless nodes can switch into active state beyond their schedules when their neighbors are active to anticipate message forwarding. The analytical and experimental results confirm the relationship between connection delay and time efficiency also for this scheme.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sego:2012:IDC, author = "Landon H. Sego and Andr{\'e}s M{\'a}rquez and Andrew Rawson and Tahir Cader and Kevin Fox and William I. {Gustafson, Jr.} and Christopher J. Mundy", title = "Implementing the data center energy productivity metric", journal = j-JETC, volume = "8", number = "4", pages = "30:1--30:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367741", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As data centers proliferate in size and number, the endeavor to improve their energy efficiency and productivity is becoming increasingly important. We discuss the properties of a number of the proposed metrics of energy efficiency and productivity. In particular, we focus on the Data Center Energy Productivity (DCeP) metric, which is the ratio of useful work produced by the data center to the energy consumed performing that work. We describe our approach for using DCeP as the principal outcome of a designed experiment using a highly instrumented, high-performance computing data center. We found that DCeP was successful in clearly distinguishing different operational states in the data center, thereby validating its utility as a metric for identifying configurations of hardware and software that would improve (or even maximize) energy productivity. We also discuss some of the challenges and benefits associated with implementing the DCeP metric, and we examine the efficacy of the metric in making comparisons within a data center and among data centers.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Anagnostopoulou:2012:BAM, author = "Vlasia Anagnostopoulou and Susmit Biswas and Heba Saadeldeen and Alan Savage and Ricardo Bianchini and Tao Yang and Diana Franklin and Frederic T. Chong", title = "Barely alive memory servers: Keeping data active in a low-power state", journal = j-JETC, volume = "8", number = "4", pages = "31:1--31:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367742", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Current resource provisioning schemes in Internet services leave servers less than 50\% utilized almost all the time. At this level of utilization, the servers' energy efficiency is substantially lower than at peak utilization. A solution to this problem could be dynamically consolidating workloads into fewer servers and turning others off. However, services typically resist doing so, because of high response times during reactivation in handling traffic spikes. Moreover, services often want the memory and/or storage of all servers to be readily available at all times. In this article, we propose a family of barely alive active low-power server states that facilitates both fast reactivation and access to memory while in a low-power state. We compare these states to previously proposed active and idle states. In particular, we investigate the impact of load bursts in each energy-saving scheme. We also evaluate the additional benefits of memory access under low-power states with a study of a search service using a cooperative main-memory cache. Finally, we propose a system that combines a barely-alive state with the off state. We find that the barely alive states can reduce service energy consumption by up to 38\%, compared to an energy-oblivious system. We also find that these energy savings are consistent across a large parameter space.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sheikh:2012:EPA, author = "Hafiz Fahad Sheikh and Hengxing Tan and Ishfaq Ahmad and Sanjay Ranka and Phanisekhar Bv", title = "Energy- and performance-aware scheduling of tasks on parallel and distributed systems", journal = j-JETC, volume = "8", number = "4", pages = "32:1--32:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367743", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Enabled by high-speed networking in commercial, scientific, and government settings, the realm of high performance is burgeoning with greater amounts of computational and storage resources. Large-scale systems such as computational grids consume a significant amount of energy due to their massive sizes. The energy and cooling costs of such systems are often comparable to the procurement costs over a year period. In this survey, we will discuss allocation and scheduling algorithms, systems, and software for reducing power and energy dissipation of workflows on the target platforms of single processors, multicore processors, and distributed systems. Furthermore, recent research achievements will be investigated that deal with power and energy efficiency via different power management techniques and application scheduling algorithms. The article provides a comprehensive presentation of the architectural, software, and algorithmic issues for energy-aware scheduling of workflows on single, multicore, and parallel architectures. It also includes a systematic taxonomy of the algorithms developed in the literature based on the overall optimization goals and characteristics of applications.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kant:2012:EDC, author = "Krishna Kant and Muthukumar Murugan and David H. C. Du", title = "Enhancing data center sustainability through energy-adaptive computing", journal = j-JETC, volume = "8", number = "4", pages = "33:1--33:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367744", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The sustainability concerns of Information Technology (IT) go well beyond energy-efficient computing and require techniques for minimizing environmental impact of IT infrastructure over its entire life-cycle. Traditionally, IT infrastructure is overdesigned at all levels from chips to entire data centers and ecosystem; the paradigm explored in this article is to replace overdesign with rightsizing coupled with smarter control, henceforth referred to as Energy-Adaptive Computing or EAC. The article lays out the challenges of EAC in various environments in terms of the adaptation of the workload and the infrastructure to cope with energy and cooling deficiencies. The article then focuses on implementing EAC in a data center environment, and addresses the problem of simultaneous energy demand and energy supply regulation at multiple levels, work, from servers to the entire data center. The proposed control scheme adapts the assignments of tasks to servers in a way that can cope with the varying energy limitations. The article also presents some experimental results to show how the scheme can continue to meet Quality of Service (QoS) requirements of tasks under energy limitations.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Abbasi:2012:DGD, author = "Zahra Abbasi and Tridib Mukherjee and Georgios Varsamopoulos and Sandeep K. S. Gupta", title = "{DAHM}: a green and dynamic {Web} application hosting manager across geographically distributed data centers", journal = j-JETC, volume = "8", number = "4", pages = "34:1--34:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2367736.2367745", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 28 17:25:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Dynamic Application Hosting Management (DAHM) is proposed for geographically distributed data centers, which decides on the number of active servers and on the workload share of each data center. DAHM achieves cost-efficient application hosting by taking into account: (i) the spatio-temporal variation of energy cost, (ii) the data center computing and cooling energy efficiency, (iii) the live migration cost, and (iv) any SLA violations due to migration overhead or network delay. DAHM is modeled as fixed-charge min-cost flow and mixed integer programming for stateless and stateful applications, respectively, and it is shown NP-hard. We also develop heuristic algorithms and prove, when applications are stateless and servers have an identical power consumption model, that the approximation ratio on the minimum total cost is bounded by the number of data centers. Further, the heuristics are evaluated in a simulation study using realistic parameter data; compared to a performance-oriented application assignment, that is, hosting at the data center with the least delay, the potential cost savings of DAHM reaches 33\%. The savings come from reducing the total number of active servers as well as leveraging the cost efficiency of data centers. Through the simulation study, the article further explores how relaxing the delay requirement for a small fraction of users can increase the cost savings of DAHM.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Srinivasan:2013:NAF, author = "S. Srinivasan and V. Kamakoti and A. Bhattacharya", title = "A Novel Algorithm for Fast Synthesis of {DNA} Probes on Microarrays", journal = j-JETC, volume = "9", number = "1", pages = "1:1--1:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422095", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "DNA microarrays are used extensively for biochemical analysis that includes genomics and drug discovery. This increased usage demands large microarrays, thus complicating their computer aided design (CAD) and manufacturing methodologies. One such time-consuming design problem is to minimize the border length of masks used during the manufacture of microarrays. From the manufacturing point of view the border length of masks is one of the crucial parameters determining the reliability of the microarray. This article presents a novel algorithm for synthesis (placement and embedding) of microarrays, which consumes significantly less time than the best algorithm reported in the literature, while maintaining the quality (border length of masks) of the result. The proposed technique uses only a part of each probe to decide on the placement and the remaining parts for deciding on the embedding sequence. This is in contrast to the earlier methods that considered the entire probe for both placement and embedding. The second novelty of the proposed technique is the preclassification (prior to placement and embedding) of probes based on their prefixes. This decreases the complexity of the problem of deciding the next probe to be placed from that involving computation of Hamming distance between all probes (as used in earlier approaches) to the one involving searching of nonempty cells on a constant size grid array. The proposed algorithm is $ 43 \times $ faster than the best reported in the literature for the case of synthesizing a microarray with 250,000 probes and further exhibits linear behavior in terms of computation time for larger microarrays.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Maftei:2013:MBS, author = "Elena Maftei and Paul Pop and Jan Madsen", title = "Module-Based Synthesis of Digital Microfluidic Biochips with Droplet-Aware Operation Execution", journal = j-JETC, volume = "9", number = "1", pages = "2:1--2:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422096", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidic biochips represent an alternative to conventional biochemical analyzers. A digital biochip manipulates liquids not as continuous flow, but as discrete droplets on a two-dimensional array of electrodes. Several electrodes are dynamically grouped to form a virtual device, on which operations are executed by moving the droplets. So far, researchers have ignored the locations of droplets inside devices, considering that all the electrodes forming the device are occupied throughout the operation execution. In this article, we consider a droplet-aware execution of microfluidic operations, which means that we know the exact position of droplets inside the modules at each time-step. We propose a Tabu Search-based metaheuristic for the synthesis of digital biochips with droplet-aware operation execution. Experimental results show that our approach can significantly reduce the application completion time, allowing us to use smaller area biochips and thus reduce costs.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Peper:2013:BCF, author = "Ferdinand Peper and Jia Lee and Josep Carmona and Jordi Cortadella and Kenichi Morita", title = "{Brownian} Circuits: Fundamentals", journal = j-JETC, volume = "9", number = "1", pages = "3:1--3:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422097", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Random fluctuations will be a major factor interfering with the operation of nanometer scale electronic devices. This article presents circuit architectures that can exploit such fluctuations, if signals have a particle-like (discrete, token-based) character. We define an abstract circuit primitive that, though lacking functionality when used with fluctuation-free signals, becomes universal when fluctuations are allowed. Key to the power of a signal's fluctuations is the ability to explore the state space of a circuit. This ability is used to resolve deadlock situations, which could otherwise only be averted by increased design complexity. The results in this article suggest that in the design of future computers, signal fluctuations, rather than being an impediment to be avoided at any cost, may be an important ingredient to achieve efficient operation.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ghavami:2013:DAR, author = "Behnam Ghavami and Mohsen Raji and Hossein Pedram and Mehdi B. Tahoori", title = "Design and Analysis of a Robust Carbon Nanotube-Based Asynchronous Primitive Circuit", journal = j-JETC, volume = "9", number = "1", pages = "4:1--4:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422098", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Carbon Nanotube Field Effect Transistors (CNFETs) show great promise as extensions to silicon CMOS. However, CNFET-based circuits will face great fabrication challenges that will translate into important parameter variations and decreased reliability. Hence, asynchronous logic, which is intrinsically more robust to variability, seems an ideal and perhaps unavoidable choice for digital circuits in CNFET technology. This article presents the results on the design and analysis of a CNFET-based implementation of an asynchronous circuit primitive: the Muller C-element. Using a CNFET SPICE model, we evaluate the robustness of CNFET-based C-element in the presence of CNT fabrication-related nonidealities. We investigate a quantitative evaluation of how timing variability impacts the functionality of a C-element and then, extract the necessary delay constraints of the C-element circuit from the signal transition graph specification. Considering the large degrees of spatial correlation observed between the CNFETs fabricated on directionally grown CNTs, a layout technique is exploited to overcome the robustness challenges of a CNFET-based C-element. Extensive Monte Carlo simulations on the proposed technique have demonstrated the effectiveness of the proposed CNFET-based C-element by improving approximately 50X in its robustness in expense of 65\% area, 47\% delay, and 56\% power consumption overheads. Experimental results indicate that implementation of some CNFET-based Quasi Delay Insensitive (QDI) benchmark circuits using the proposed C-element results in significant robustness improvement with negligible power and throughput overheads. As a promising step toward CNFET-based giga-scale integrated circuits, this article shows that the asynchronous logic is an effective approach to design robust integrated circuits in CNFET technology with inherent extreme physical variations.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2013:SAR, author = "Yung-Chih Chen and Soumya Eachempati and Chun-Yao Wang and Suman Datta and Yuan Xie and Vijaykrishnan Narayanan", title = "A Synthesis Algorithm for Reconfigurable Single-Electron Transistor Arrays", journal = j-JETC, volume = "9", number = "1", pages = "5:1--5:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422099", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reducing power consumption has become one of the primary challenges in chip design, and therefore significant efforts are being devoted to find holistic solutions on power reduction from the device level up to the system level. Among a plethora of low power devices that are being explored, single-electron transistors (SETs) at room temperature are particularly attractive. Although prior work has proposed a binary decision diagram-based reconfigurable logic architecture using SETs, it lacks an automatic synthesis algorithm for the architecture. Consequently, in this work, we develop a product-term-based approach that synthesizes a logic circuit by mapping all its product terms into the SET architecture. The experimental results show the effectiveness and efficiency of the proposed approach on a set of MCNC benchmarks.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tang:2013:TCT, author = "Aoxiang Tang and Niraj K. Jha", title = "Thermal Characterization of Test Techniques for {FinFET} and {$3$D} Integrated Circuits", journal = j-JETC, volume = "9", number = "1", pages = "6:1--6:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422100", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Power consumption has become a very important consideration during integrated circuit (IC) design and test. During test, it can far exceed the values reached during normal operation and, thus, lead to temperatures above the allowed threshold. Without appropriate temperature reduction, permanent damage may be caused to the IC or invalid test results may be obtained. FinFET is a double-gate field-effect transistor (DG-FET) that was introduced commercially in 2012. Due to the vertical nature of FinFETs and, hence, weaker ability to dissipate heat, this problem is likely to get worse for FinFET circuits. Another technology rapidly gaining popularity is 3D IC integration. Unfortunately, the compact nature of a multidie 3D IC is likely to aggravate the temperature-during-test problem even further. Hence, before temperature-aware test methodologies can be developed, it is important to thermally analyze both FinFET and 3D circuits under test. In this article, we present a methodology for thermal characterization of various test techniques, such as scan and built-in self-test (BIST), for FinFET and 3D ICs. FinFET thermal characterization makes use of a FinFET standard cell library that is characterized with the help of the University of Florida double-gate (UFDG) SPICE model. Thermal profiles for circuits under test are produced by ISAC2 from University of Colorado for FinFET circuits and HotSpot from University of Virginia for 3D ICs. Experimental results indicate that high temperatures result under BIST and much less often under scan, and that both power consumption and test application time should be reduced to lower the temperature of circuits under test, just reducing the power consumption is not enough.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2013:HRD, author = "Shuo Wang and Jianwei Dai and Lei Wang", title = "Hybrid Redundancy for Defect Tolerance in Molecular Crossbar Memory", journal = j-JETC, volume = "9", number = "1", pages = "7:1--7:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422101", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nano/molecular technologies have emerged as the potential fabrics for building future integrated systems. However, due to the imperfect fabrication process, these extremely scaled devices are vulnerable to a large number of defects and transient faults. Memory systems, which are the primary application targeted by these technologies, are particularly exposed to this problem due to the ultra-high integration density and elevated error sensitivity. In this article, we propose a defect-tolerant technique, referred to as hybrid redundancy allocation, for the design of molecular crossbar memory systems. By using soft redundancy (runtime exploitation of memory spatial/temporal locality) in combination with hardware redundancy (spare memory cells), the proposed technique can achieve better error management at a low cost as compared with conventional techniques. Simulation results demonstrate the significant improvement in defect tolerance, efficiency, and scalability of the proposed technique.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Narayanan:2013:VNF, author = "Pritish Narayanan and Michael Leuchtenburg and Jorge Kina and Prachi Joshi and Pavan Panchapakeshan and Chi On Chui and C. Andras Moritz", title = "Variability in Nanoscale Fabrics: Bottom-up Integrated Analysis and Mitigation", journal = j-JETC, volume = "9", number = "1", pages = "8:1--8:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422102", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Emerging nanodevice-based architectures will be impacted by parameter variation in conjunction with high defect rates. Variations in key physical parameters are caused by manufacturing imprecision as well as fundamental atomic scale randomness. In this article, the impact of parameter variation on nanoscale computing fabrics is extensively studied through a novel integrated methodology across device, circuit and architectural levels. This integrated approach enables to study in detail the impact of physical parameter variation across all fabric layers. A final contribution of the article includes novel techniques to address this impact. The variability framework, while generic, is explored extensively on the Nanoscale Application Specific Integrated Circuits (NASICs) nanowire fabric. For variation of $ \sigma = 10 $ in key physical parameters, the on current is found to vary by up to 3.5X. Circuit-level delay shows up to 118\% deviation from nominal. Monte Carlo simulations using an architectural simulator found 67\% nanoprocessor chips to operate below nominal frequencies due to variation. New built-in variation mitigation and fault-tolerance schemes, leveraging redundancy, asymmetric delay paths and biased voting schemes, were developed and evaluated to mitigate these effects. They are shown to improve performance by up to 7.5X on a nanoscale processor design with variation, and improve performance in designs relying on redundancy for defect tolerance, without variation assumed. Techniques show up to 3.8X improvement in effective-yield performance products even at a high 12\% defect rate. The suite of techniques provides a design space across key system-level metrics such as performance, yield and area.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liang:2013:EWB, author = "Jiale Liang and Stanley Yeh and S. Simon Wong and H.-S. Philip Wong", title = "Effect of Wordline\slash Bitline Scaling on the Performance, Energy Consumption, and Reliability of Cross-Point Memory Array", journal = j-JETC, volume = "9", number = "1", pages = "9:1--9:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2422094.2422103", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Feb 20 16:42:57 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The impact of wordline/bitline metal wire scaling on the write/read performance, energy consumption, speed, and reliability of the cross-point memory array is quantitatively studied for technology nodes down to single-digit nm. The impending resistivity increase in the Cu wires is found to cause significant decrease of both write and read window margins at the regime when electron surface scattering and grain boundary scattering are substantial. At deeply-scaled device dimensions, the wire energy dissipation and wire latency become comparable to or even exceed the intrinsic values of memory cells. The large current density flowing through the wordlines/bitlines raises additional reliability concerns for the cross-point memory array. All these issues are exacerbated at smaller memory resistance values and larger memory array sizes. They thereby impose strict constraints on the memory device design and preclude the realization of large-scale cross-point memory array with minimum feature sizes beyond the 10 nm node. A rethink in the design methodology of cross-point memory to incorporate and mitigate the scaling effects of wordline/bitline is necessary. Possible solutions include the use of memory wires with better conductivity and scalability, memory arrays with smaller partition sizes, and memory elements with larger resistance values and resistance ratios.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Paul:2013:ISI, author = "Bipul C. Paul and Arijit Raychowdhury", title = "Introduction to the special issue on memory technologies", journal = j-JETC, volume = "9", number = "2", pages = "10:1--10:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yang:2013:MDC, author = "J. Joshua Yang and R. Stanley Williams", title = "Memristive devices in computing system: Promises and challenges", journal = j-JETC, volume = "9", number = "2", pages = "11:1--11:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Memristive devices with a simple structure are not only very small but also very versatile, which makes them an ideal candidate used for the next generation computing system in the post-Si era. The working mechanism of the devices and a family of nanodevices built based on this working mechanism are introduced first followed by some proposed applications of these novel devices. The promises and challenges of these devices are then discussed, together with the significant progresses made recently in dealing with these challenges.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Jackson:2013:NES, author = "Bryan L. Jackson and Bipin Rajendran and Gregory S. Corrado and Matthew Breitwisch and Geoffrey W. Burr and Roger Cheek and Kailash Gopalakrishnan and Simone Raoux and Charles T. Rettner and Alvaro Padilla and Alex G. Schrott and Rohit S. Shenoy and B{\"u}lent N. Kurdi and Chung H. Lam and Dharmendra S. Modha", title = "Nanoscale electronic synapses using phase change devices", journal = j-JETC, volume = "9", number = "2", pages = "12:1--12:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The memory capacity, computational power, communication bandwidth, energy consumption, and physical size of the brain all tend to scale with the number of synapses, which outnumber neurons by a factor of 10,000. Although progress in cortical simulations using modern digital computers has been rapid, the essential disparity between the classical von Neumann computer architecture and the computational fabric of the nervous system makes large-scale simulations expensive, power hungry, and time consuming. Over the last three decades, CMOS-based neuromorphic implementations of ``electronic cortex'' have emerged as an energy efficient alternative for modeling neuronal behavior. However, the key ingredient for electronic implementation of any self-learning system-programmable, plastic Hebbian synapses scalable to biological densities-has remained elusive. We demonstrate the viability of implementing such electronic synapses using nanoscale phase change devices. We introduce novel programming schemes for modulation of device conductance to closely mimic the phenomenon of Spike Timing Dependent Plasticity (STDP) observed biologically, and verify through simulations that such plastic phase change devices should support simple correlative learning in networks of spiking neurons. Our devices, when arranged in a crossbar array architecture, could enable the development of synaptronic systems that approach the density ($ \approx 10^{11} $ synapses per sq cm) and energy efficiency (consuming $ \approx 1 $ pJ per synaptic programming event) of the human brain.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Apalkov:2013:STT, author = "Dmytro Apalkov and Alexey Khvalkovskiy and Steven Watts and Vladimir Nikitin and Xueti Tang and Daniel Lottis and Kiseok Moon and Xiao Luo and Eugene Chen and Adrian Ong and Alexander Driskill-Smith and Mohamad Krounbi", title = "Spin-transfer torque magnetic random access memory {(STT-MRAM)}", journal = j-JETC, volume = "9", number = "2", pages = "13:1--13:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spin-transfer torque magnetic random access memory (STT-MRAM) is a novel, magnetic memory technology that leverages the base platform established by an existing 100+nm node memory product called MRAM to enable a scalable nonvolatile memory solution for advanced process nodes. STT-MRAM features fast read and write times, small cell sizes of 6F$^2$ and potentially even smaller, and compatibility with existing DRAM and SRAM architecture with relatively small associated cost added. STT-MRAM is essentially a magnetic multilayer resistive element cell that is fabricated as an additional metal layer on top of conventional CMOS access transistors. In this review we give an overview of the existing STT-MRAM technologies currently in research and development across the world, as well as some specific discussion of results obtained at Grandis and with our foundry partners. We will show that in-plane STT-MRAM technology, particularly the DMTJ design, is a mature technology that meets all conventional requirements for an STT-MRAM cell to be a nonvolatile solution matching DRAM and/or SRAM drive circuitry. Exciting recent developments in perpendicular STT-MRAM also indicate that this type of STT-MRAM technology may reach maturity faster than expected, allowing even smaller cell size and product introduction at smaller nodes.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mojumder:2013:DPS, author = "Niladri N. Mojumder and Xuanyao Fong and Charles Augustine and Sumeet K. Gupta and Sri Harsha Choday and Kaushik Roy", title = "Dual pillar spin-transfer torque {MRAMs} for low power applications", journal = j-JETC, volume = "9", number = "2", pages = "14:1--14:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Electron-spin based data storage for on-chip memories has the potential for ultra-high density, low power consumption, very high endurance, and reasonably low read/write latency. In this article, we discuss the design challenges associated with spin-transfer torque (STT) MRAM in its state-of-the-art configuration. We propose an alternative bit cell configuration and three new genres of magnetic tunnel junction (MTJ) structures to improve STT-MRAM bit cell stabilities, write endurance, and reduce write energy consumption. The proposed multi-port, multi-pillar MTJ structures offer the unique possibility of electrical and spatial isolation of memory read and write. In order to realize ultralow power under process variations, we propose device, bit-cell and architecture level design techniques. Such design alternatives at multiple levels of design abstraction has been found to achieve substantially enhanced robustness, density, reliability and low power as compared to their charge-based counterparts for future embedded applications.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chatterjee:2013:EAS, author = "Subho Chatterjee and Sayeef Salahuddin and Satish Kumar and Saibal Mukhopadhyay", title = "Electrothermal analysis of spin-transfer-torque random access memory arrays", journal = j-JETC, volume = "9", number = "2", pages = "15:1--15:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spin Transfer Torque RAM (STTRAM) is a promising candidate for fast, scalable, high-density, nonvolatile memory in nanometer technology. However, relatively high write current density and small volume of the memory device indicate the possibility of significant self-heating in the STTRAM structure. This article performs a critical analysis of the self-heating induced temperature variations in STTRAM. We perform a 3D finite volume method based study to characterize self-heating effect in a single cell. The analysis is extended for STTRAM arrays by developing a computationally efficient RC compact model based thermal analyzer. The analysis shows that self-heating can results in considerable increase in both steady-state value and transient change in temperature of individual cells. The effect is less pronounced at the array level and depends on the activity level, that is, number of active cells within an array size. The analysis further illustrates that self-heating negatively impacts electrical reliability metrics namely, read margin and detection accuracy; degrades cell performance; and modulates energy dissipation.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2013:CCB, author = "Yiran Chen and Weng-Fai Wong and Hai Li and Cheng-Kok Koh and Yaojun Zhang and Wujie Wen", title = "On-chip caches built on multilevel spin-transfer torque {RAM} cells and its optimizations", journal = j-JETC, volume = "9", number = "2", pages = "16:1--16:??", month = may, year = "2013", CODEN = "????", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 1 11:19:09 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "It has been predicted that a processor's caches could occupy as much as 90\% of chip area a few technology nodes from the current ones. In this article, we investigate the use of multilevel spin-transfer torque RAM (STT-RAM) cells in the design of processor caches. We start with examining the access (read and write) scheme for multilevel cell (MLC) STT-RAM from a circuit design perspective, detailing the read and write circuits. Compared to traditional SRAM caches, a multilevel cell (MLC) STT-RAM cache design is denser, fast, and requires less energy. However, a number of critical architecture-level issues remain to be solved before MLC STT-RAM technology can be deployed in processor caches. We shall offer solutions to the issue of bit encoding as well as tackle the write endurance problem. In particular, the latter has been neglected in previous works on STT-RAM caches. We propose a set remapping scheme that can potentially prolong the lifetime of a MLC STT-RAM cache by 80$ \times $ on average. Furthermore, a method for recovering the performance that may be lost in some applications due to set remapping is proposed. The impacts of process variations of the MLC STT-RAM cell on the robustness of the memory hierarchy is also discussed, together with various enhancement techniques, namely, ECC and design redundancy.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Thapliyal:2013:DER, author = "Himanshu Thapliyal and Nagarajan Ranganathan", title = "Design of efficient reversible logic-based binary and {BCD} adder circuits", journal = j-JETC, volume = "9", number = "3", pages = "17:1--17:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491682", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reversible logic is gaining significance in the context of emerging technologies such as quantum computing since reversible circuits do not lose information during computation and there is one-to-one mapping between the inputs and outputs. In this work, we present a class of new designs for reversible binary and BCD adder circuits. The proposed designs are primarily optimized for the number of ancilla inputs and the number of garbage outputs and are designed for possible best values for the quantum cost and delay. In reversible circuits, in addition to the primary inputs, some constant input bits are used to realize different logic functions which are referred to as ancilla inputs and are overheads that need to be reduced. Further, the garbage outputs which do not contribute to any useful computations but are needed to maintain reversibility are also overheads that need to be reduced in reversible designs. First, we propose two new designs for the reversible ripple carry adder: (i) one with no input carry$ c_0 $ and no ancilla input bits, and (ii) one with input carry$ c_0 $ and no ancilla input bits. The proposed reversible ripple carry adder designs with no ancilla input bits have less quantum cost and logic depth (delay) compared to their existing counterparts in the literature. In these designs, the quantum cost and delay are reduced by deriving designs based on the reversible Peres gate and the TR gate. Next, four new designs for the reversible BCD adder are presented based on the following two approaches: (i) the addition is performed in binary mode and correction is applied to convert to BCD when required through detection and correction, and (ii) the addition is performed in binary mode and the result is always converted using a binary to BCD converter. The proposed reversible binary and BCD adders can be applied in a wide variety of digital signal processing applications and constitute important design components of reversible computing.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lee:2013:CIP, author = "Woo Hyung Lee and Pinaki Mazumder", title = "Color image processing with multi-peak resonant tunneling diodes", journal = j-JETC, volume = "9", number = "3", pages = "18:1--18:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2503128", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The article introduces a novel approach to color image processing that utilizes multi-peak resonant tunneling diodes for encoding color information in quantized states of the diodes. The Multi-Peak Resonant Tunneling Diodes (MPRTDs) are organized as a two-dimensional array of vertical pillars which are locally connected by programmable passive and active elements with a view to realizing a wide variety of color image processing functions such as quantization, color extraction, image smoothing, edge detection, and line detection. In order to process color information in the input images, two different methods for color representation schemes have been used: one using color mapping and the other using direct RGB representation. Finally, the article uses HSPICE simulation methods for the nestlist of the proposed RTD-based nanoarchitecture in order to verify a candidate of image functions by using the afore-mentioned representation methods.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bobba:2013:CTP, author = "Shashikanth Bobba and Ashutosh Chakraborty and Olivier Thomas and Perrine Batude and Giovanni de Micheli", title = "Cell transformations and physical design techniques for {$3$D} monolithic integrated circuits", journal = j-JETC, volume = "9", number = "3", pages = "19:1--19:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491675", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "3D Monolithic Integration (3DMI), also termed as sequential integration, is a potential technology for future gigascale circuits. In 3DMI technology the 3D contacts, connecting different active layers, are in the order of few 100nm. Given the advantage of such small contacts, 3DMI enables fine-grain (gate-level) partitioning of circuits. In this work we present three cell transformation techniques for standard cell-based ICs with 3DMI technology. As a major contribution of this work, we propose a design flow comprising of a cell transformation technique, cell-on-cell stacking, and a physical design technique ({CELONCEL$_{PD}$} ) aimed at placing cells transformed with cell-on-cell stacking. We analyze and compare various cell transformation techniques for 3DMI technology without disrupting the regularity of the IC design flow. Our experiments demonstrate the effectiveness of CELONCEL design technique, yielding us an area reduction of 37.5\%, 16.2\% average reduction in wirelength, and 6.2\% average improvement in overall delay, compared with a 2D case when benchmarked across various designs in 45nm technology node.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tang:2013:DSE, author = "Aoxiang Tang and Niraj K. Jha", title = "Design space exploration of {FinFET} cache", journal = j-JETC, volume = "9", number = "3", pages = "20:1--20:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491678", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Integration of cache on-chip has significantly improved the performance of modern processors. The relentless demand for ever-increasing performance has led to the need to increase the cache capacity and number of cache levels. However, the performance improvement is accompanied by an increase in chip's power dissipation, requiring the use of more expensive cooling technologies to ensure chip reliability and long product life. The emergence of FinFETs as the technology of choice for high-performance computing poses new challenges to processor designers. With the introduction of new features in FinFETs, for example, independently controllable back gates, researchers have proposed several innovative memory cells that can reduce leakage power significantly, making the integration of a larger cache more practical. In this article, we comprehensively evaluate and compare the performance, power consumption (both dynamic and leakage), area, and temperature of different FinFET SRAM caches by exploring common configurations with varying cache size, block size, associativity, and number of banks. We evaluate caches based on four well-known FinFET SRAM cells: Pass-Gate FeedBack (PGFB), Row-based Back-Gate Biasing (RBGB), 8T, and 4T. We show how the caches can be simulated at self-consistent temperatures (at which leakage and temperature are in equilibrium). Drowsy and decay caches are two well-known leakage reduction techniques. We implement them in the context of FinFET caches to investigate their impact. We show that the RBGB cell-based cache is far superior in leakage and Power-Delay Product (PDP) to those based on the other three cells, sometimes by an order of magnitude. This superiority is maintained even when drowsy or decay leakage reduction techniques are applied to caches based on the other three cells, but not to the one based on the RBGB cell. This significantly diminishes the importance of drowsy or decay cache techniques, at least when the RBGB cell is used.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zamani:2013:IFV, author = "Masoud Zamani and Hanieh Mirzaei and Mehdi B. Tahoori", title = "{ILP} formulations for variation\slash defect-tolerant logic mapping on crossbar nano-architectures", journal = j-JETC, volume = "9", number = "3", pages = "21:1--21:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491680", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Several emerging nano-technologies, including crossbar nano-architectures, have recently been studied as possible replacement or supplement to CMOS technology in the future. However, extreme process variation and high failure rates, mainly due to atomic device sizes, are major challenges for crossbar nano-architectures. This article presents variation- and defect-tolerant logic mapping on crossbar nano-architectures. Since variation/defect-aware mapping is an NP-hard problem, we introduce a set of Integer Linear Programming (ILP) formulations to effectively solve the problem in a reasonable time. The proposed ILP formulations can be used for both diode-based and FET-based crossbars. Experimental results on benchmark circuits show that our approach can reduce the critical-path delay 39\% compared to the Simulated Annealing (SA) method. It can also successfully achieve 97\% defect-free mapping with 40\% defect density. It can tolerate process variations to meet timing constraints in 95\% of the cases, compared to only 77\% achieved by SA.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sun:2013:EVC, author = "Guangyu Sun and Eren Kursun and Jude A. Rivers and Yuan Xie", title = "Exploring the vulnerability of {CMPs} to soft errors with {$3$D} stacked nonvolatile memory", journal = j-JETC, volume = "9", number = "3", pages = "22:1--22:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491679", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Improving the vulnerability to soft errors is one of the important design goals for future architecture design of Chip-MultiProcessors (CMPs). In this study, we explore the soft error characteristics of CMPs with 3D stacked NonVolatile Memory (NVM), in particular, the Spin-Transfer Torque Random Access Memory (STT-RAM), whose cells are immune to radiation-induced soft errors and do not have endurance problems. We use 3D stacking as an enabler for modular integration of STT-RAM memories with minimum disruption in the baseline processor design flow, while providing further interconnection and capacity advantages. We take an in-depth look at alternative replacement schemes to explore the soft error resilience benefits and design trade-offs of 3D stacked STT-RAM and capture the multivariable optimization challenges microprocessor architectures face. We propose a vulnerability metric, with respect to the instruction and data in the core pipeline and through the cache hierarchy, to present a comprehensive system evaluation with respect to reliability, performance, and power consumption for our CMP architectures. Our experimental results show that, for the average workload, replacing memories with an STT-RAM alternative significantly mitigates soft errors on-chip, improves the performance by 14.15\%, and reduces power consumption by 13.44\%.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yang:2013:NAC, author = "Shengqi Yang and Wenping Wang and Mark Hagan and Wei Zhang and Pallav Gupta and Yu Cao", title = "{NBTI}-aware circuit node criticality computation", journal = j-JETC, volume = "9", number = "3", pages = "23:1--23:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491681", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "For sub-65nm technology nodes, Negative Bias Temperature Instability (NBTI) has become a primary limiting factor of circuit lifetime. During the past few years, researchers have spent considerable effort on accurate modeling and characterization of circuit delay degradation caused by NBTI at different design levels. The search for techniques and methodologies which can aid in effectively minimizing the NBTI effect on circuit delay is still underway. In this work, we present the usage of node criticality computation to drive NBTI-aware timing analysis and optimization. Circuits that have undergone this optimization flow show strong resistance to NBTI delay degradation. For the first time, this work proposes a node criticality computation algorithm under an NBTI-aware timing analysis and optimization framework. Our work provides answers to the following yet unaddressed questions: (a) what is the definition of node criticality in a circuit under the NBTI effect? (b) how do we identify the critical nodes that, once protected, will be immune to NBTI timing degradation? and (c) what are the NBTI effect attenuation approaches? Experimental results indicate that by protecting the critical nodes found by our proposed methodology, circuit delay degradation can be reduced by up to 50\%. Combined with peak temperature reduction, the delay degradation can be further improved.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wettin:2013:CNE, author = "Paul Wettin and Anuroop Vidapalapati and Amlan Gangul and Partha Pratim Pande", title = "Complex network-enabled robust wireless network-on-chip architectures", journal = j-JETC, volume = "9", number = "3", pages = "24:1--24:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491676", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The Network-on-Chip (NoC) paradigm has emerged as a scalable interconnection infrastructure for modern multicore chips. However, with growing levels of integration, the traditional NoCs suffer from high latency and energy dissipation in on-chip data transfer due to conventional multihop metal/dielectric-based interconnects. Three-dimensional integration, on-chip photonics, RF, and wireless links have been proposed as radical low-power and low-latency alternatives to the conventional planar wire-based designs. Wireless NoCs with Carbon NanoTube (CNT) antennas are shown to outperform traditional wire-based NoCs significantly in achievable data rate and energy dissipation. However, such emerging and transformative technologies will be prone to high levels of failures due to various issues related to manufacturing challenges and integration. On the other hand, several naturally occurring complex networks such as colonies of microbes and the World Wide Web are known to be inherently robust against high rates of failures and harsh environments. This article advocates adoption of such complex network-based architectures to minimize the effect of wireless link failures on the performance of the NoC. Through cycle-accurate simulations it is shown that the wireless NoC architectures inspired by natural complex networks perform better than their conventional wired counterparts even in the presence of high degrees of link failures. We demonstrate the robustness of the proposed wireless NoC architecture by incorporating both uniform and application-specific traffic patterns.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhang:2013:DTU, author = "Xuehui Zhang and Andrew Ferraiuolo and Mohammad Tehranipoor", title = "Detection of {Trojans} using a combined ring oscillator network and off-chip transient power analysis", journal = j-JETC, volume = "9", number = "3", pages = "25:1--25:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2491677", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Oct 1 18:20:25 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Verifying the trustworthiness of Integrated Circuits (ICs) is of utmost importance, as hardware Trojans may destroy ICs bound for critical applications. A novel methodology combining on-chip structure with external current measurements is proposed to verify whether or not an IC is Trojan free. This method considers Trojans' impact on neighboring cells and on the entire IC's power consumption, and effectively localizes the measurement of dynamic power. To achieve this, we develop a new on-chip ring oscillator network structure distributed across the entire chip and place each ring oscillator's components in different rows of a standard-cell design. By developing novel statistical data analysis, the effect of process variations on the ICs' transient power will be separated from the effect of Trojans. Simulation results using 90nm technology and experimental results on Xilinx Spartan-6 FPGAs demonstrate the efficiency of our proposed method.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Guiducci:2013:ISI, author = "Carlotta Guiducci", title = "Introduction to Special Issue on Bioinformatics", journal = j-JETC, volume = "9", number = "4", pages = "26:1--26:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536744.2536745", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 27 17:50:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Piovesan:2013:ERP, author = "Damiano Piovesan and Giuseppe Profiti and Pier Luigi Martelli and Piero Fariselli and Rita Casadio", title = "Extended and Robust Protein Sequence Annotation over Conservative Nonhierarchical Clusters: The Case Study of the {ABC} Transporters", journal = j-JETC, volume = "9", number = "4", pages = "27:1--27:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2504729", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 27 17:50:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Genome annotation is one of the most important issues in the genomic era. The exponential growth rate of newly sequenced genomes and proteomes urges the development of fast and reliable annotation methods, suited to exploit all the information available in curated databases of protein sequences and structures. To this aim we developed BAR+, the Bologna Annotation Resource. The basic notion is that sequences with high identity value to a counterpart can inherit the same function/s and structure, if available. As a case study we describe how the ATP-binding domain of the ABC transporters can be found and modeled in over 30,000 new sequences not annotated before. We also mapped into BAR+ all the ABC transporters listed in the Transporter Classification DataBase and found that within our environment annotation could be extended to another 256,866 sequences.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Abate:2013:ILH, author = "Francesco Abate and Andrea Acquaviva and Elisa Ficarra and Enrico Macii", title = "Integration of Literature with Heterogeneous Information for Genes Correlation Scoring", journal = j-JETC, volume = "9", number = "4", pages = "28:1--28:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2504728", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 27 17:50:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Determining the correlation between biomedical terms is a powerful instrument to help scientist research activity, both to understand experimental results and to design new ones. In particular, a great potential comes from the integration of the many heterogeneous information sources currently available on the Web. In this article we focus on the correlation between genes and biological processes. In this context, we present a methodology for integrating information from biomedical literature with other heterogeneous types of structured information. In particular, the information sources integrated in this work are PubMed abstracts, pathway databases, and NCI thesaurus definitions. The integration is performed at the semantic analysis level using a customized approach we developed to modulate the impact of the different sources on the correlation score. We report the results of a study concerning the impact of the information integration on the correlation score and of the user-level parameters we introduced to modulate the impact of pathway data or NCI definitions with respect to biomedical literature information, depending on the context of the search. To evaluate the methodology, we performed correlation measures on six biological processes and nine genes by comparing the results with and without the integration of pathways and NCI definitions.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Graziano:2013:HVB, author = "Mariagrazia Graziano and Stefano Frache and Maurizio Zamboni", title = "A Hardware Viewpoint on Biosequence Analysis: What's Next?", journal = j-JETC, volume = "9", number = "4", pages = "29:1--29:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2504774", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 27 17:50:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Biosequence alignment recently received an increasing support from both commodity and dedicated hardware platforms. Processing capabilities are constantly rising, but still not satisfying the limitless requirements of this application. We give an insight on the contribution to this need that can possibly be expected from emerging technology devices and architectures, focusing as an example on nanofabrics based on silicon nanowires. By varying a few parameters we explore the solution space, and demonstrate with proper figures of merit how this family of beyond CMOS structures could be considered as the effective disruptive technology for biosequence analysis applications.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Venken:2013:SBM, author = "Lyn Venken and Kathleen Marchal and Jos Vanderleyden", title = "Synthetic Biology and Microdevices: a Powerful Combination", journal = j-JETC, volume = "9", number = "4", pages = "30:1--30:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2504775", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 27 17:50:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent developments demonstrate that the combination of microbiology with micro- and nanoelectronics is a successful approach to develop new miniaturized sensing devices and other technologies. In the last decade, there has been a shift from the optimization of the abiotic components, for example, the chip, to the improvement of the processing capabilities of cells through genetic engineering. The synthetic biology approach will not only give rise to systems with new functionalities, but will also improve the robustness and speed of their response towards applied signals. To this end, the development of new genetic circuits has to be guided by computational design methods that enable to tune and optimize the circuit response. As the successful design of genetic circuits is highly dependent on the quality and reliability of its composing elements, intense characterization of standard biological parts will be crucial for an efficient rational design process in the development of new genetic circuits. Microengineered devices can thereby offer a new analytical approach for the study of complex biological parts and systems. By summarizing the recent techniques in creating new synthetic circuits and in integrating biology with microdevices, this review aims at emphasizing the power of combining synthetic biology with microfluidics and microelectronics.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Editors:2014:ISI, author = "Editors", title = "Introduction to special issue on reliability and device degradation in emerging technologies", journal = j-JETC, volume = "10", number = "1", pages = "1:1--1:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2543749.2543750", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kufluoglu:2014:RMN, author = "Haldun K{\"u}fl{\"u}oglu and Cathy Chancellor and Min Chen and Claude Cirba and Vijay Reddy", title = "Recovery modeling of negative bias temperature instability {(NBTI)} for {SPICE}-compatible circuit aging simulators", journal = j-JETC, volume = "10", number = "1", pages = "2:1--2:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2517648", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A feasible computational framework that enables improved predictability of NBTI degradation within commercially available tools is discussed. The NBTI model is used for real-time circuit operation where recovery is present. The complementary nature of implementation is readily incorporated into existing model extraction and verification tools. The method provides significantly enhanced accuracy in simulations when compared to circuit data, yet retains practicality and flexibility.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Arasu:2014:RIL, author = "Senthil Arasu and Mehrdad Nourani and Vijay Reddy and John M. {Carulli Jr.} and Gautam Kapila and Min Chen", title = "Reliability improvement of logic and clock paths in power-efficient designs", journal = j-JETC, volume = "10", number = "1", pages = "3:1--3:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2543749.2543751", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Performance degradation due to transistor aging is a significant impediment to high-performance IC design due to increasing concerns of reliability mechanisms such as negative-bias-temperature-instability (NBTI). The concern only grows with technology scaling as the effects of positive bias temperature instability (PBTI) is becoming prominent in future technologies and compounding with the effects of NBTI. Although aging of transistor is inevitable and the magnitude of degradation due to aging varies depending upon the context. Specifically, in power-efficient systems designs, the logic and clock paths are susceptible to static stress resulting in peak degradation due to BTI occurrence when clock is gated. In this article, we present the reliability impact of making systems power efficient and propose a design-for-reliability methodology that can be used in conjunction with low-power design techniques to alleviate the stress conditions caused by rendering circuits in idle state. The technique- BTI-Refresh, is shown to be applicable to both logic and clock paths alike and focuses on preventing prolonged static stress using periodic refreshes to achieve alternating stress. The mechanism is shown to integrate seamlessly into the design at gate-level without requiring any architectural or RT-level changes. Using ISCAS benchmarks and Kogge-Stone-Adder circuits, it is shown to reduce the aging effect in logic path delay due to static stress by up to 50\% with negligible area and power overhead. BTI-Refresh is extended to clock-paths to prevent pulse-width degradation due to static aging and with minimal clock-skew.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sun:2014:WAC, author = "Jin Sun and Roman Lysecky and Karthik Shankar and Avinash Kodi and Ahmed Louri and Janet Roveda", title = "Workload assignment considering {NBTI} degradation in multicore systems", journal = j-JETC, volume = "10", number = "1", pages = "4:1--4:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2539124", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With continuously shrinking technology, reliability issues such as Negative Bias Temperature Instability (NBTI) has resulted in considerable degradation of device performance, and eventually the short mean-time-to-failure (MTTF) of the whole multicore system. This article proposes a new workload balancing scheme based on device-level fractional NBTI model to balance the workload among active cores while relaxing stressed ones. Starting with NBTI-induced threshold voltage degradation, we define a concept of Capacity Rate (CR) as an indication of one core's ability to accept workload. Capacity rate captures core's performance variability in terms of delay and power metrics under the impact of NBTI aging. The proposed workload balancing framework employs the capacity rates as workload constraints, applies a Dynamic Zoning (DZ) algorithm to group cores into zones to process task flows, and then uses Dynamic Task Scheduling (DTS) to allocate tasks in each zone with balanced workload and minimum communication cost. Experimental results on a 64-core system show that by allowing a small part of the cores to relax over a short time period, the proposed methodology improves multicore system yield (percentage of core failures) by 20\%, while extending MTTF by 30\% with insignificant degradation in performance (less than 3\%).", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chabi:2014:RLA, author = "Djaafar Chabi and Damien Querlioz and Weisheng Zhao and Jacques-Olivier Klein", title = "Robust learning approach for neuro-inspired nanoscale crossbar architecture", journal = j-JETC, volume = "10", number = "1", pages = "5:1--5:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2539123", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Scaling beyond CMOS require a new combination of computing paradigm and new devices. In this context, memristor are often considered as best candidate to implement efficiently synapses in hardware neural networks. In this article, we analyze the impact of memristor parameter variability. We build an analytical model of the global reliability at the crossbar level. It is based on a supervised learning method with multilayer and redundancy extensions. Comparisons with Monte Carlo simulations of small neural network validate our analytical model. It can be used to extrapolate directly the reliability of large-scale neural system. Our extrapolations show that high defect rate and important parameter variability can be handle efficiency with a moderate amount of redundancy.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Frache:2014:NAM, author = "Stefano Frache and Mariagrazia Graziano and Maurizio Zamboni", title = "Nanoarray architectures multilevel simulation", journal = j-JETC, volume = "10", number = "1", pages = "6:1--6:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2541882", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Density and regularity are deemed as the major advantages of nanoarray architectures based on nanowires. Literature demonstrated that proper reliability analyzes must be performed and solutions have to be devised to improve nanoarrays yield. Their complexity and high-fault probability claim for specific design automation tools able to explore circuit solutions, performance and fault-tolerant approaches. We envision a simulator conceived to carry on characterizations in terms of logic behavior, defect-induced output error rate assessment, switching activity, power and timing performance. Though already existing for traditional technology, a simulator based on specific technological and topological tiled nanoarray descriptions, and conceived to join both device and architecture levels, has never been attempted at the degree of accuracy we present. Our contribution is twofold. First, marking a difference with respect to the state of the art, we developed an algorithm based on an event-driven engine which works at switch level and is not simply built on top of cost functions evaluations. The straightforward advantage is the possibility to follow the evolution of dynamic control sequences throughout all the inner components of the nanoarray, and, as a consequence, to obtain circuit level characterization as a projection of the real internal parameters. Second, we added to our simulator the capability to inject faults with specific statistical distributions associated to the nanoarray topology. Here we extract output error rates and yield for one of the possible nanoarray structures proposed in literature, the NASIC. Results specificity and accuracy demonstrate the simulator trustworthiness, its effectiveness for extensive nanoarrays characterization and its suitability as a foundation for both higher architectural and lower device simulation levels. The aim of this work, then, is to provide insights into the intertwined relation between actual technology and circuit design for these emerging fabrics, and, as a consequence, to clarify how defects and variability affect circuits and systems performance.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Avritzer:2014:ISI, author = "Alberto Avritzer and Tadashi Dohi", title = "Introduction to special issue on {WoSAR 2011}", journal = j-JETC, volume = "10", number = "1", pages = "7:1--7:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2543749.2543752", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cotroneo:2014:SSA, author = "Domenico Cotroneo and Roberto Natella and Roberto Pietrantuono and Stefano Russo", title = "A survey of software aging and rejuvenation studies", journal = j-JETC, volume = "10", number = "1", pages = "8:1--8:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2539117", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Software aging is a phenomenon plaguing many long-running complex software systems, which exhibit performance degradation or an increasing failure rate. Several strategies based on the proactive rejuvenation of the software state have been proposed to counteract software aging and prevent failures. This survey article provides an overview of studies on Software Aging and Rejuvenation (SAR) that have appeared in major journals and conference proceedings, with respect to the statistical approaches that have been used to forecast software aging phenomena and to plan rejuvenation, the kind of systems and aging effects that have been studied, and the techniques that have been proposed to rejuvenate complex software systems. The analysis is useful to identify key results from SAR research, and it is leveraged in this article to highlight trends and open issues.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhao:2014:SRS, author = "Jing Zhao and Yuliang Jin and Kishor S. Trivedi and Rivalino {Matias Jr.} and Yanbin Wang", title = "Software rejuvenation scheduling using accelerated life testing", journal = j-JETC, volume = "10", number = "1", pages = "9:1--9:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2539118", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A number of studies have reported the phenomenon of ``Software aging'', caused by resource exhaustion and characterized by progressive software performance degradation. In this article, we carry out an experimental study of software aging and rejuvenation for an on-line bookstore application, following the standard configuration of TPC-W benchmark. While real website is used for the bookstore, the clients are emulated. In order to reduce the time to application failures caused by memory leaks, we use the accelerated life testing (ALT) approach. We then select the Weibull time to failure distribution at normal level, to be used in a semi-Markov process, to compute the optimal software rejuvenation trigger interval. Since the validation of optimal rejuvenation trigger interval with emulated browsers will take an inordinate long time, we develop a simulation model to validate the ALT experimental results, and also estimate the steady-state availability to cross-validate the results of the semi-Markov availability model.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Machida:2014:JCT, author = "Fumio Machida and Victor F. Nicola and Kishor S. Trivedi", title = "Job completion time on a virtualized server with software rejuvenation", journal = j-JETC, volume = "10", number = "1", pages = "10:1--10:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2539121", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article analyzes the completion time of a job running on a virtualized server subject to software aging and rejuvenation in a virtual machine monitor (VMM). A job running on the server may be interrupted by virtual machine (VM) failure, VMM failure or VMM rejuvenation. The job interruption is categorized as either preemptive-repeat ( prt ), in which case the interrupted job needs to restart from the beginning, or preemptive-resume ( prs ), in which case the job resumes execution from the point of interruption. Using a semi-Markov process (SMP) to model the server behavior, the steady-state server availability is computed and the theory developed in Kulkarni et al. [1987] is used to obtain the Laplace--Stieltjes transform (LST) of the job completion time. In the numerical experiments, we introduce four types of aging behavior of VMM. The effectiveness of VMM rejuvenation on job completion time is discussed in association with the type of interruption it causes and the VMM aging type. With our parameter settings, VMM rejuvenation with prs job interruption improves the performance of job execution regardless of the aging type, with performance degradation is taken into account.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Araujo:2014:SAE, author = "Jean Araujo and Rubens Matos and Vandi Alves and Paulo Maciel and F. Vieira de Souza and Rivalino {Matias Jr.} and Kishor S. Trivedi", title = "Software aging in the {Eucalyptus} cloud computing infrastructure: Characterization and rejuvenation", journal = j-JETC, volume = "10", number = "1", pages = "11:1--11:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2539122", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jan 14 19:15:04 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The need for high reliability, availability and performance has significantly increased in modern applications, that handle rapidly growing demands while providing uninterruptible services. Cloud computing systems fundamentally provide access to large pools of data and computational resources. Eucalyptus is a software framework largely used to implement private clouds and hybrid-style Infrastructure as a Service. It implements the Amazon Web Service (AWS) API, allowing interoperability with other AWS-based services. This article investigates the software aging effects in the Eucalyptus framework, considering workloads composed of intensive requests for remote storage attachment and virtual machine instantiations. We found problems that may be harmful to system dependability and performance, specifically regarding to RAM memory and swap space exhaustion, besides highly excessive CPU utilization by the virtual machines. We also present an approach that applies time series analysis to schedule rejuvenation, so as to reduce the downtime by predicting the proper moment to perform the rejuvenation. We experimentally evaluate our approach using an Eucalyptus test bed. The results show that our approach achieves higher availability, when compared to a threshold-triggered rejuvenation method based on continuous monitoring of resources utilization.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2014:CRP, author = "Jifeng Chen and Shuo Wang and Mohammad Tehranipoor", title = "Critical-reliability path identification and delay analysis", journal = j-JETC, volume = "10", number = "2", pages = "12:1--12:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564926", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Circuit reliability analysis at the presilicon stage has become vital for sub-45nm technology designs in particular, due to aging effects, such as Negative Bias Temperature Instability (NBTI) and Hot Carrier Injection (HCI). To avoid potential reliability hazards in the postsilicon stage, current large-scale designs for commercial implementation overpessimistically analyze circuit aging under assumed worst-case workload in order not to violate the corner cases even for low possibilities, thus introducing unnecessary margin in the design timing analysis. The major issue is lack of an effective aging analysis method applicable to large designs with low CPU runtime, which is mainly due to: (1) conventional reliability tools are extremely time-consuming for circuit-level timing analysis and thus are not practical for large designs; (2) mathematical models developed to expedite the process are not accurate due to the high complexity of aging effects. In this article, a comprehensive analysis is presented to highlight the importance of each aging parameter. Then, a novel methodology is developed based on current commercial reliability tools to guarantee its high accuracy on circuit-level aging analysis. Existing proven low-level mathematical models are further enhanced to extensively speed up a higher level analysis by taking advantage of the explicit intermediate conditions stored in a pregenerated lookup table. Our results indicate $ \geq 244 \times $ improved computational efficiency, $ \leq 5 \% $ relative error, and $ \leq 0.7 \% $ absolute error compared with commercial reliability analysis tools (e.g., HSPICE MOSRA).", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Gladshtein:2014:DBP, author = "Michael Gladshtein", title = "Delay-based processing-in-wire for design of {QCA} serial decimal arithmetic units", journal = j-JETC, volume = "10", number = "2", pages = "13:1--13:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564927", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum-dot cellular automata (QCA) technology is now considered to be one of the prospective technologies for a nanocomputer creation. The physical properties of QCA and its expanding range of computer applications make it expedient to use the novel paradigm of nanocomputer architecture: serial decimal storage-transfer-processing. The delay-based encoding of decimal digits allows the use a delay element as a main element of QCA serial arithmetic units. The simple implementation of the delay element by a short length of QCA wire results in reduction of complexity and of the area required for a QCA circuit. The theoretical basics of delay-based processing-in-wire and design examples of QCA serial decimal arithmetic units are presented.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lin:2014:RRM, author = "Chia-Chun Lin and Niraj K. Jha", title = "{RMDDS}: {Reed--Muller} decision diagram synthesis of reversible logic circuits", journal = j-JETC, volume = "10", number = "2", pages = "14:1--14:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564923", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we propose a flexible and efficient reversible logic synthesizer. It exploits the complementary advantages of two methods: Reed--Muller Reversible Logic Synthesis (RMRLS) and Decision Diagram Synthesis (DDS), and is thus called Reed--Muller Decision Diagram Synthesis (RMDDS). RMRLS does not scale to a large number of qubits (i.e., quantum bits). DDS tools, even though efficient, add a large number of ancillary qubits and typically incur much higher quantum cost than necessary. RMDDS overcomes these obstacles. It is flexible in the sense that users can either optimize the number of qubits or the quantum cost in the circuit implementation. It is also efficient because the circuits can be synthesized within user-defined CPU times. This combination of flexibility and efficiency has been missing from synthesizers presented earlier. When used to synthesize reversible functions, RMDDS reduces the number of qubits by up to 79.2\% (average of 54.6\%) when the synthesis objective is to minimize the number of qubits and the quantum cost by up to 71.5\% (average of 35.7\%) when the synthesis objective is to minimize quantum cost, relative to DDS methods. For irreversible functions (which are automatically embedded in reversible functions), the corresponding best (average) reductions in the number of qubits is 42.1\% (22.5\%) when minimizing the number of qubits, and in quantum cost, it is 63.0\% (25.9\%) when minimizing quantum cost.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2014:CSN, author = "Weichen Liu and Xuan Wang and Jiang Xu and Wei Zhang and Yaoyao Ye and Xiaowen Wu and Mahdi Nikdast and Zhehui Wang", title = "On-chip sensor networks for soft-error tolerant real-time multiprocessor systems-on-chip", journal = j-JETC, volume = "10", number = "2", pages = "15:1--15:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564928", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As transistor density continues to increase with the advent of nanotechnology, reliability issues raised by the more frequent appearance of soft errors are becoming critical for future embedded multiprocessor systems design. State-of-the-art techniques for soft error protections targeting multiprocessor systems result either high chip cost and area overhead or high performance degradation and energy consumption, and do not fulfill the increasing requirements for high performance and dependability. In this article we present a systematic approach, that is, the Sensor Networks-on-Chip (SENoC), to collaboratively and efficiently manage on-chip applications and overcome reliability threats to Multiprocessor Systems-on-Chip (MPSoC). A hardware-software collaborative approach is proposed to solve soft error problems: a hardware-based on-chip sensor network is built for soft error detection, and a software-based recovery mechanism is applied for soft error correction. A two-step scheduling scheme is presented for reliable application and chip management, combining an off-line static optimization stage for application performance maximization and an online lightweight dynamic adjustment stage to handle runtime variations and exceptions. This strategy introduces only trivial overhead on hardware design and much lower overhead on software control and execution, and hence performance degradation and energy consumption is greatly reduced. We build a cycle-accurate simulator using SystemC, and verify the effectiveness of our technique by comparing performance with related techniques on several real-world applications.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kim:2014:ICU, author = "Jaeyoon Kim and Sandip Tiwari", title = "Inexact computing using probabilistic circuits: Ultra low-power digital processing", journal = j-JETC, volume = "10", number = "2", pages = "16:1--16:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564925", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Numerous computing applications can tolerate low error rates. In such applications, inexact approaches provide the ability to achieve significantly lower power. This work demonstrates the power-error trade-offs that can be achieved. Using probabilistic modeling in sub-50-nm silicon transistor technology, the relationship between statistical uncertainties and errors are elucidated for different configurations and topologies and the trade-offs quantified. Gate-level implementation of the probabilistic CMOS logic is validated by circuit simulations of a commercial 45-nm SOI CMOS process technology. Using a practical ALU architecture where voltages can be scaled from most significant to least significant bit blocks as an example, the potential benefits of this technique are shown. A calculation error of $ 10^{-6} $, an error rate quite tolerable for many computational tasks, is shown to be possible with a total power reduction of more than 40\%.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Pierce:2014:NTN, author = "Luke Pierce and Spyros Tragoudas", title = "Nanopipelined threshold network synthesis", journal = j-JETC, volume = "10", number = "2", pages = "17:1--17:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564924", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Threshold logic gates allow for complex multiinput functions to be implemented using a single gate thereby reducing the power and area of a circuit. Clocked threshold gates are nanopipelined to increase network throughput. It is shown that synthesis methods that do not consider the synchronization of the nanopipeline can produce an enormous amount of buffers. The proposed algorithm synthesizes a Boolean network into a nanopipelined threshold logic network by minimizing not only the number of combinational clusters but also the associated buffer insertion overhead.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xiang:2014:TDT, author = "Dong Xiang and Kele Shen", title = "A thermal-driven test application scheme for pre-bond and post-bond scan testing of three-dimensional {ICs}", journal = j-JETC, volume = "10", number = "2", pages = "18:1--18:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2564922", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Feb 28 17:06:25 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The three-dimensional (3-D) technology offers a new solution to the increasing density of integrated circuits (ICs). In this work, we propose novel scan architectures for 3-D IC pre-bond and post-bond testing by considering the interconnection overhead of through-silicon-vias (TSVs). Since hotspots in 3-D ICs often cause performance and reliability issues, we also develop different test ordering schemes for prebond and postbond testing to avoid applying test vectors that could worsen the temperature distribution. Experimental results show that the peak temperature can be lowered by 20\% with the 3-D scan tree architecture. When combined with the test ordering scheme, the 3-D scan tree can further reduce peak temperature by over 30\%.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kamal:2014:IPV, author = "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and Massoud Pedram", title = "Impact of Process Variations on Speedup and Maximum Achievable Frequency of Extensible Processors", journal = j-JETC, volume = "10", number = "3", pages = "19:1--19:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567665", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we investigate the impact of process variations on the speedup and maximum frequency of the extended ISA processor. First, without considering process variations, a custom functional unit (CFU) is designed based on nominal timing parameters, then the timing variations of critical paths of the extensible processor, including the baseline processor and the CFU, are investigated by considering both systematic and random variations. Next, the maximum frequency of the extensible processor and the speed enhancement factor of the extended ISA for different benchmarks are investigated. Results show that timing variation could reduce the speedup of the extensible processor. However, this reduction is highly dependent on the baseline processor and the CFU structures. Additionally, the impact of process variations in the worst-case design approach is studied. Results show that the speedup of the extensible processor is reduced more than in the case when custom instructions (CIs) are selected without considering process variations. To study the impact of each variation type, speedup variations due to random and systematic variations are investigated separately. The study reveals that random variation has a similar effect on the CFU and the baseline processor, while the impact of systematic variation on the baseline processor is greater than the CFU.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chung:2014:DET, author = "Haera Chung and Christof Teuscher and Partha Pande", title = "Design and Evaluation of Technology-Agnostic Heterogeneous Networks-on-Chip", journal = j-JETC, volume = "10", number = "3", pages = "20:1--20:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567666", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Traditional metal-wire-based networks-on-chip (NoC) suffer from high latency and power dissipation as the system size scales up in the number of cores. This limitation stems from the inherent multihop communication nature of larger NoCs. It has previously been shown that the performance of NoCs can be significantly improved by introducing long-range, low power, and high-bandwidth single-hop links between distant cores. While previous work has focused on specific NoC architectures and configurations, it remains an open question whether heterogeneous link types are beneficial in a broad range of NoC architectures. In this article, we show that a generic NoC architecture with heterogeneous link types allows for NoCs with higher bandwidth at a lower cost compared to homogeneous networks. We further show that such NoCs scale up significantly better in terms of performance and cost. We demonstrate these broadly-applicable results by using a technology-agnostic complex network approach that targets NoC architectures with various emerging link types.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Palaniswamy:2014:ITL, author = "Ashok Kumar Palaniswamy and Spyros Tragoudas", title = "Improved Threshold Logic Synthesis Using Implicant-Implicit Algorithms", journal = j-JETC, volume = "10", number = "3", pages = "21:1--21:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2597175", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Existing threshold logic synthesis methods decompose larger input functions into smaller input functions and perform synthesis for them. It is shown that significantly larger input functions can be synthesized by implementing the existing methods in an implicant-implicit manner. Experimental results on the ISCAS 85 benchmarks show that this impacts the synthesis cost, which drops significantly. More specifically, as the size of the functions that can be handled by the synthesis algorithm increases, the number of threshold logic gates required to implement very large input functions decreases. In addition, the total weight decreases and the performance is improved.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2014:CTS, author = "Fu-Wei Chen and Tingting Hwang", title = "Clock-Tree Synthesis with Methodology of Reuse in {$3$D-IC}", journal = j-JETC, volume = "10", number = "3", pages = "22:1--22:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567668", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "IP reuse methodology has been used extensively in SoC (system-on-chip) design. In this reuse methodology, while design and implementation costs are saved, manufacturing cost is not. To further reduce the cost, this reuse concept has been proposed at mask and die level in three-dimensional integrated circuits (3D-IC). In order to achieve manufacturing reuse, in this article, we propose a new methodology for designing a global clock tree in 3D-IC. The objective is to extend an existing clock tree in 2D IC to 3D IC, taking into consideration the wirelength, clock skew, and the number of TSVs. Compared with NNG- and 3D-MMM-based methods, our proposed method reduces the wirelength of the new die and the skew of the global 3D clock tree on average, 5.85\% and 2.3\%, and 76.92\% and 48.7\%, respectively. In more than two die design, the average improvements of the wirelength and clock skew of our method as compared with the 3D-MMM-based method are 4.23\% and 46.84\%, respectively.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2014:CHP, author = "Wulong Liu and Yu Wang and Yuchun Ma and Yuan Xie and Huazhong Yang", title = "On-Chip Hybrid Power Supply System for Wireless Sensor Nodes", journal = j-JETC, volume = "10", number = "3", pages = "23:1--23:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2492683", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With the miniaturization of electronic devices, small-size but high-capacity power supply systems appear to be more and more important. A hybrid power source, which consists of a fuel cell (FC) and a rechargeable battery, has the advantages of long lifetime and good load-following capabilities. In this article, we propose the schematic of a hybrid power supply system that can be integrated on a chip compatible with present CMOS processes. For the on-chip, fuel-cell-based hybrid power system in wireless sensor node design, we propose a two steps optimization: (1) dynamic power management (DPM), and (2) adaptive fuel cell optimal power point tracking (AOPPT). Simulation results demonstrate that the on-chip FC-Bat hybrid power system can be used for wireless sensor nodes under different usage scenarios. Our proposed DPM method can achieve 12.9\% more energy savings than the method without DPM. Meanwhile, implementing our AOPPT approach can save about 17\% energy compared with the fixed architecture for the fuel cell system. For an on-chip power system with 1cm$^2$ area consumption, the wafer-level battery can power a typical sensor node for only about five months, while our on-chip hybrid power system will supply the same sensor node for two years steadily.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Grissom:2014:IAC, author = "Daniel Grissom and Christopher Curtis and Philip Brisk", title = "Interpreting Assays with Control Flow on Digital Microfluidic Biochips", journal = j-JETC, volume = "10", number = "3", pages = "24:1--24:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567669", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "BioCoder is a C++ library developed at Microsoft Research, India, for the unambiguous specification of biochemical assays. This article describes language extensions to BioCoder along with a compiler and runtime system that translate and execute assays specified using BioCoder on a software simulator. The simulator mimics the behavior of laboratories-on-a-chip (LoCs) based on a droplet actuation technology called electrowetting on dielectric (EWoD). To date, prior compilers targeting similar EWoD devices are limited to assays specified as directed acyclic graphs (DAGs) and cannot handle arbitrary control flow or feedback from the LoC. The framework presented herein addresses these challenges through dynamic interpretation, thereby enlarging the space of assays that can be compiled onto EWoD devices.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yuan:2014:FEA, author = "Bo Yuan and Bin Li", title = "A Fast Extraction Algorithm for Defect-Free Subcrossbar in Nanoelectronic Crossbar", journal = j-JETC, volume = "10", number = "3", pages = "25:1--25:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2517137", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Due to the super scale, high defect density, and per-chip designing paradigm of emerging nanoelectronics, the runtime of the algorithms for defect-tolerant design is of vital importance from the perspective of practicability. In this article, an efficient and effective heuristic defect-free subcrossbar extraction algorithm is proposed which improves performance by mixing the heuristics from two state-of-the-art algorithms and then is speeded up significantly by considerably reducing the number of major loops. Compared with the current most effective algorithm that improves the solution quality (i.e., size of the defect-free subcrossbar obtained) at the cost of high time complexity O ( n$^3$ ), the time complexity of the proposed heuristic algorithm is proved to be O ( n$^2$ ). Using a large set of instances of various scales and defect densities, the simulation results show that the proposed algorithm can offer similar high-quality solutions as the current most effective algorithm while consuming much shorter runtimes (reduced to about 1/3 to 1/5) than the current most effective algorithm.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chaudhuri:2014:VDS, author = "Sourindra M. Chaudhuri and Niraj K. Jha", title = "{$3$D} vs. {$2$D} Device Simulation of {FinFET} Logic Gates under {PVT} Variations", journal = j-JETC, volume = "10", number = "3", pages = "26:1--26:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567670", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 5 14:50:39 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recently, multigate transistors have been gaining attention as an alternative to conventional MOSFETs. Superior gate control over the channel, smaller subthreshold leakage, and reduced susceptibility to process variations are some of the key features that give multigate structures a competitive edge over MOSFETs. Among various multigate structures, silicon-on-insulator (SOI) FinFETs are promising, owing to their ease of fabrication. However, characterization of SOI FinFET devices/gates needs immediate attention in order for them to gain greater popularity in this decade. Ideally, 3D device simulation should be done for accurate circuit analysis. However, this is impractical due to the huge CPU time required. As a possible alternative, simulating a 2D crosssection of the device yields 10$ \times $ to 100$ \times $ reduction in CPU time. However, this introduces significant error in the range of 7\% to 20\% when evaluating the on/off current ( I$_{ON}$ /I$_{OFF}$ ) for a single device and leakage current or propagation delay ( I$_{LEAK}$ /t$_D$ ) for logic gates. In this work, we first present a methodology to obtain optimized 3D device simulation models for SOI FinFETs. Based on these 3D models, we develop adjusted 2D models to capture 3D simulation accuracy with 2D simulation efficiency. We report results for the 22nm SOI FinFET technology node. We adjust gate underlap ( L$_{UN}$ ) in the 2D cross section of the n/pFinFET devices in order to mimic 3D device behavior. When the adjusted 2D models are employed in mixed-mode simulation of FinFET logic gates, the error in the evaluation of I$_{LEAK}$ /t$_D$ is very small. To the best of our knowledge, this is the first such attempt. We show that 2D device models remain valid even under process, voltage, and temperature (PVT) variations. We target process variations in gate length ( L$_G$ ), fin thickness ( T$_{SI}$ ), gate oxide thickness ( T$_{OX}$ ), and gate workfunction ( \Phi $_G$ ), which are the parameters that have been shown to have the most impact on leakage and delay.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lin:2014:POF, author = "Jiun-Li Lin and Po-Hsun Wu and Tsung-Yi Ho", title = "Placement optimization of flexible {TFT} circuits with mechanical strain and temperature consideration", journal = j-JETC, volume = "11", number = "1", pages = "1:1--1:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629497", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Mobility is the primary device parameter affecting circuit performance in flexible thin-film transistor (TFT) technologies, and is particularly sensitive to the change of mechanical strain and temperature. However, existing algorithms only consider the impact of mechanical strain in cell placement of flexible TFT circuits. Without taking temperature into consideration, mobility may be dramatically decreased which leads to circuit performance degradation. This article presents the first work to minimize the mobility variation caused by the change of both mechanical strain and temperature. Experimental results show that the proposed algorithms can effectively and efficiently reduce the increasing critical path delay.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Roy:2014:TAG, author = "Sudip Roy and Bhargab B. Bhattacharya and Sarmishtha Ghoshal and Krishnendu Chakrabarty", title = "Theory and analysis of generalized mixing and dilution of biochemical fluids using digital microfluidic biochips", journal = j-JETC, volume = "11", number = "1", pages = "2:1--2:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629578", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Digital microfluidic (DMF) biochips are recently being advocated for fast on-chip implementation of biochemical laboratory assays or protocols, and several algorithms for diluting and mixing of reagents have been reported. However, all methods for such automatic sample preparation suffer from a drawback that they assume the availability of input fluids in pure form, that is, each with an extreme concentration factor ( CF ) of 100\%. In many real-life scenarios, the stock solutions consist of samples/reagents with multiple CF s. No algorithm is yet known for preparing a target mixture of fluids with a given ratio when its constituents are supplied with random concentrations. An intriguing question is whether or not a given target ratio is feasible to produce from such a general input condition. In this article, we first study the feasibility properties for the generalized mixing problem under the (1:1) mix-split model with an allowable error in the target CF s not exceeding 1 2d, where the integer d is user specified and denotes the desired accuracy level of CF. Next, an algorithm is proposed which produces the desired target ratio of N reagents in ONd mix-split steps, where N ( {$>$}= 3) denotes the number of constituent fluids in the mixture. The feasibility analysis also leads to the characterization of the total space of input stock solutions from which a given target mixture can be derived, and conversely, the space of all target ratios, which are derivable from a given set of input reagents with arbitrary CF s. Finally, we present a generalized algorithm for diluting a sample S in minimum (1:1) mix-split steps when two or more arbitrary concentrations of S (diluted with the same buffer) are supplied as inputs. These results settle several open questions in droplet-based algorithmic microfluidics and offer efficient solutions for a wider class of on-chip sample preparation problems.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2014:ULL, author = "Xianmin Chen and Niraj K. Jha", title = "Ultra-low-leakage chip multiprocessor design with hybrid {FinFET} logic styles", journal = j-JETC, volume = "11", number = "1", pages = "3:1--3:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629576", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "FinFET has begun replacing CMOS at the 22nm technology node because of its enhanced ability to mitigate short-channel effects. Although leakage power of FinFET logic gates is lower than their CMOS counterparts, it still contributes to a large part of total power consumption. In this article, we show how ultra-low-leakage FinFET chip multiprocessors (CMPs) can be designed using a hybrid logic style. This hybrid style exploits the ultra-low-leakage feature of asymmetric-workfunction shorted-gate (ASG) FinFETs and the high-performance feature of shorted-gate (SG) FinFETs. We explore the impact of the hybrid style at both the module and CMP levels. To do this, we have developed FinFET logic libraries targeted at SG and ASG logic gates, suitably characterized for various parameters of interest. We have also modified existing tools and created a framework to evaluate the hybrid designs of SRAMs, caches, and CMPs. Using the design with SG FinFETs as the baseline for comparison, our experimental results show that the hybrid style can reduce leakage power of execution units to as low as 10.6\% of the baseline without hurting performance, that of SRAMs to between 21.5\% and 4.8\% of the baseline with 0\%-8.3\% delay overhead, and that of CMPs to 10.0\% of the baseline with negligible performance degradation.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lin:2014:NTL, author = "Ing-Chao Lin and Shun-Ming Syu and Tsung-Yi Ho", title = "{NBTI} tolerance and leakage reduction using gate sizing", journal = j-JETC, volume = "11", number = "1", pages = "4:1--4:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629657", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Leakage power is a major design constraint in deep submicron technology and below. Meanwhile, transistor degradation due to Negative Bias Temperature Instability (NBTI) has emerged as one of the main reliability concerns in nanoscale technology. Gate sizing is a widely used technique to reduce circuit leakage, and this approach has recently attracted much attention with regard to improving circuits to tolerate NBTI. However, these studies only consider timing and area constraints, and many other important issues, such as slew and max-load, are missing. In this article, we present an efficient gate sizing framework that can reduce leakage and improve circuit reliability under timing constraints. Our algorithms consider slack, slew and max-load constraints. The benchmarks are those from ISPD 2012, which feature industrial design properties, including discrete cell sizes, nonconvex cell timing models, slew dependencies and constraints, as well as large design sizes. The experimental results obtained from ISPD 2012 benchmark circuits demonstrate that our approach can meet all the constraints and tolerated NBTI degradation with a power savings of 6.54\% as compared with the traditional method.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xie:2014:TCP, author = "Jing Xie and Yang Du and Yuan Xie", title = "Testable cross-power domain interface {(CPDI)} circuit design in monolithic {$3$D} technology", journal = j-JETC, volume = "11", number = "1", pages = "5:1--5:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629516", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Optimizing energy consumption for electronic systems has been an important design consideration. Multipower domain design is widely used for low-power and high-performance applications. Data transfer between power domains needs a cross-power domain interface (CPDI). The existing level-conversion flip-flop (LCFF) structures all need dual power rails, which lead to large area and performance overhead. In this article, we propose a scanable CPDI circuit, utilizing monolithic 3D technology. This interface functions as a flip-flop and provides reliable data conversion from one power domain to another. It has a built-in scan feature, which makes it a testable design. Our design separates power rails in each tier, substantially reducing physical design complexity and area penalty. The design is implemented in a 20nm, 28nm, and 45nm low-power technology. It shows a 20\%--35\% smaller insertion delay compared to normal designs. This proposed design also shows scalability and better energy consumption than previous LCFF circuits.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kumawat:2014:PMA, author = "Renu Kumawat and Vineet Sahula and Manoj S. Gaur", title = "Probabilistic modeling and analysis of molecular memory", journal = j-JETC, volume = "11", number = "1", pages = "6:1--6:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629533", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article investigates the aspects of designing a nanocell based molecular memory. An empirical model for molecular device is developed, based on circuit behavior of nitro-substituted Oligo (Phynylene Ethynylene) molecule (OPE). This device model is subsequently used to design nanocell based 1-bit memory and verified using HSPICE. The approach is extended to train the nanocell for multibit storage capability using external voltage signals. It is observed that to successfully train a 2-bit molecular memory, the number of control signals should be approx. one-fourth of total number of nanoparticles. A computational framework is proposed to compute the probability of retrieving the stored data bits correctly, at the output terminal of the nanocell buffer. This nanocell configuration is simulated by systematically varying number of nanoparticles and molecular switches. It is observed that the probability of the existence of at least one path from input to output approaches close to unity with presence of 20 or more nanoparticles in a nanocell. During memory model validation, 1000 samples of 1-bit memory (consisting of 20 nanoparticles) were generated and verified for read and write operations. The model verification results obtained for this memory cell closely match those obtained using analytical solution of probabilistic graph model.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lin:2014:QQM, author = "Chia-Chun Lin and Amlan Chakrabarti and Niraj K. Jha", title = "{QLib}: Quantum module library", journal = j-JETC, volume = "11", number = "1", pages = "7:1--7:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629430", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 6 16:15:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum algorithms are known for their ability to solve some problems much faster than classical algorithms. They are executed on quantum circuits, which consist of a cascade of quantum gates. However, synthesis of quantum circuits is not straightforward because of the complexity of quantum algorithms. Generally, quantum algorithms contain two parts: classical and quantum. Thus, synthesizing circuits for the two parts separately reduces overall synthesis complexity. In addition, many quantum algorithms use similar subroutines that can be implemented with similar circuit modules. Because of their frequent use, it is important to use automated scripts to generate such modules efficiently. These modules can then be subjected to further synthesis optimizations. This article proposes QLib, a quantum module library, which contains scripts to generate quantum modules of different sizes and specifications for well-known quantum algorithms. Thus, QLib can also serve as a suite of benchmarks for quantum logic and physical synthesis.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wille:2014:ISI, author = "Robert Wille and Rolf Drechsler and Mehdi B. Tahoori", title = "Introduction to the {Special Issue on Reversible Computation}", journal = j-JETC, volume = "11", number = "2", pages = "8:1--8:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2663349", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{DeVos:2014:MCC, author = "Alexis {De Vos} and Stijn {De Baerdemacker}", title = "Matrix Calculus for Classical and Quantum Circuits", journal = j-JETC, volume = "11", number = "2", pages = "9:1--9:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2669370", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum computation on $w$ qubits is represented by the infinite unitary group $ {\rm U}(2^w) $; classical reversible computation on $w$ bits is represented by the finite symmetric group $ {\rm S}_2^w$. In order to establish the relationship between classical reversible computing and quantum computing, we introduce two Lie subgroups $ {\rm XU}(n)$ and $ {\rm ZU}(n)$ of the unitary group $ {\rm U}(n)$. The former consists of all unitary $ n \times n$ matrices with all line sums equal to $1$; the latter consists of all unitary diagonal $ n \times n$ matrices with first entry equal to $1$. Such a group structure also reveals the relationship between matrix calculus and diagrammatic $ z x$-calculus of quantum circuits.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Hanninen:2014:QII, author = "Ismo K. H{\"a}nninen and Craig S. Lent and Gregory L. Snider", title = "Quantifying Irreversible Information Loss in Digital Circuits", journal = j-JETC, volume = "11", number = "2", pages = "10:1--10:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629523", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Heat generation limits the performance of state-of-the-art integrated circuits, originating from the wasteful static CMOS operating principle. Near-term solutions like adiabatic charging for energy recovery and limiting friction-type heat sources provide considerable improvement. However, these methods do not address the ultimate thermodynamic necessity to expel energy related to information loss in the computing process. In emerging beyond-CMOS technologies, this bit erasure heat alone can overwhelm the cooling capacity and set the limits of the computing performance. Therefore, logical information loss is becoming an important factor for digital circuit design, and tools have to be developed for analysis and optimization. This article presents a framework for estimating the amount of information loss in complex logic circuits, demonstrating the method by modeling the irreversible bit erasures in a standard binary adder structure. Binary addition is one of the most often used and highly optimized digital designs, and we estimate the erasure bounds for components on various levels of design abstraction, showing that the actual logic gate implementations have orders of magnitude higher loss than the addition operation itself would require. The method and the results can be used to optimize circuits for a higher degree of logical reversibility and energy conservation.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{DeVos:2014:DGF, author = "Alexis {De Vos} and St{\'e}phane Burignat and Robert Gl{\"u}ck and Torben {\AE}gidius Mogensen and Holger Bock Axelsen and Michael Kirkedal Thomsen and Eva Rotenberg and Tetsuo Yokoyama", title = "Designing Garbage-Free Reversible Implementations of the Integer Cosine Transform", journal = j-JETC, volume = "11", number = "2", pages = "11:1--11:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629532", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Discrete linear transformations are important tools in information processing. Many such transforms are injective and therefore prime candidates for a physically reversible implementation into hardware. We present here reversible integer cosine transformations on $n$ input integers. The resulting reversible circuit is able to perform both the forward transform and the inverse transform. The detailed structure of such a reversible design strongly depends on the odd prime factors of the determinant of the transform: whether those are of the form $ 2^k \pm 1 $ or of the form $ 2^k \pm 2^l \pm 1 $ or neither of these forms.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mogensen:2014:GFR, author = "Torben {\AE}gidius Mogensen", title = "Garbage-Free Reversible Multipliers for Arbitrary Constants", journal = j-JETC, volume = "11", number = "2", pages = "12:1--12:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629515", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We present a method based on Mealy machines for constructing reversible circuitry for multiplying integers by arbitrary integer constants. The circuits generate no garbage and use no ancillae. The circuits are quite compact for small constants and are, in the worst case, bounded by $ O(n^2) $ multi-control Toffoli gates per bit-slice, where $n$ is the number of bits in the constant. These gates will have $ O(n)$ inputs, so the total number of pass-transistors needed to implement the circuit is $ O(n^3) $ transistors per bit slice, and the quantum cost (which is exponential in the number of inputs to a Toffoli gate) is $ O(2^n)$. For some interesting cases, the cost can be reduced to $ O(n)$ gates per bit-slice, reducing the cost to $ O(n^2)$ transistors per bit slice. The quantum cost is still $ O(2^n)$, as the remaining gates have $ O(n)$ inputs. We also look at an alternative construction that, at the cost of adding $ O(n)$ ancillae, reduces the cost for arbitrary constants to $ O(n)$ gates, $ O(n^2)$ transistors, though still with $ O(2^n)$ quantum cost.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Nguyen:2014:RED, author = "Trung Duc Nguyen and Rodney {Van Meter}", title = "A Resource-Efficient Design for a Reversible Floating Point Adder in Quantum Computing", journal = j-JETC, volume = "11", number = "2", pages = "13:1--13:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629525", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reversible logic has applications in low-power computing and quantum computing. However, there are few existing designs for reversible floating-point adders and none suitable for quantum computation. In this article, we propose a resource-efficient reversible floating-point adder, suitable for binary quantum computation, improving the design of Nachtigal et al. [2011]. Our work focuses on improving the reversible designs of the alignment unit and the normalization unit, which are the most expensive parts. By changing a few elements of the existing algorithm, including the circuit designs of the RLZC (reversible leading zero counter) and converter, we have reduced the cost by about 68\%. We also propose quantum designs adapted to use gates from fault-tolerant libraries. The KQ for our fault-tolerant design is almost 60 times as expensive as for a 32-bit fixed-point addition. We note that the floating-point representation makes in-place, truly reversible arithmetic impossible, requiring us to retain both inputs, which limits the sustainability of its use for quantum computation.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Shafaei:2014:CSR, author = "Alireza Shafaei and Mehdi Saeedi and Massoud Pedram", title = "Cofactor Sharing for Reversible Logic Synthesis", journal = j-JETC, volume = "11", number = "2", pages = "14:1--14:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629524", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Improving circuit realization of known quantum algorithms by CAD techniques has benefits for quantum experimentalists. In this article, the problem of synthesizing a given function on a set of ancillea is addressed. The proposed approach benefits from extensive sharing of cofactors among cubes that appear on function outputs. Accordingly, it can be considered a multilevel logic optimization technique for reversible circuits. In particular, the suggested approach can efficiently implement any $n$-input, $m$-output lookup table (LUT) by a reversible circuit. This problem has interesting applications in the Shor's number-factoring algorithm and in quantum walk on sparse graphs. Simulation results reveal that the proposed cofactor-sharing synthesis algorithm has a significant impact on reducing the size of modular exponentiation circuits for Shor's quantum factoring algorithm, oracle circuits in quantum walk on sparse graphs, and the well-known MCNC benchmarks.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Datta:2014:IRC, author = "Kamalika Datta and Gaurav Rathi and Indranil Sengupta and Hafizur Rahaman", title = "An Improved Reversible Circuit Synthesis Approach using Clustering of {ESOP} Cubes", journal = j-JETC, volume = "11", number = "2", pages = "15:1--15:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629543", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The problem of reversible logic synthesis has drawn the attention of many researchers over the last two decades with growing emphasis on low-power design. Among the various synthesis approaches that have been reported, the ones based on compact circuit representations like Binary Decision Diagrams (BDD) and Exclusive-or Sum-Of-Products (ESOP) are interesting in the sense that they can handle large circuits with more than 100 inputs. The drawback of these approaches, however, is that the generated netlists are sub-optimal, and there is lot of scope for optimizing them. One of the best methods in this regard is an approach, where the ESOP cubes are grouped into sublists based on sharing among more than one outputs. In the work reported in this article, in contrast, an approach based on clustering the ESOP cubes based on their similarity with respect to input variables is presented, along with a technique to map each of the clusters into reversible gate netlists. This approach results in a significant reduction in quantum cost of the final netlist, but requires one additional garbage line. Experimental results on a number of reversible circuit benchmarks have been presented in support of the claim and also demonstrate that the method is very fast.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tida:2014:NTS, author = "Umamaheswara Rao Tida and Cheng Zhuo and Yiyu Shi", title = "Novel Through-Silicon-Via Inductor-Based On-Chip {DC--DC} Converter Designs in {$3$D} {ICs}", journal = j-JETC, volume = "11", number = "2", pages = "16:1--16:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2637481", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "There has been a tremendous research effort in recent years to move DC-DC converters on chip for enhanced performance. However, a major limiting factor to implementing on-chip inductive DC-DC converters is the large area overhead induced by spiral inductors. Thus, we propose using through-silicon-vias (TSVs), a critical enabling technique in three-dimensional (3D) integrated systems, to implement on-chip inductors for DC-DC converters. While existing literature show that TSV inductors are inferior compared with conventional spiral inductors due to substrate loss for RF applications, in this article, we demonstrate that it is not the case for DC-DC converters, which operate at relatively low frequencies. Experimental results show that by replacing conventional spiral inductors with TSV inductors, with almost the same efficiency and output voltage, up to $ 4.3 \times $ and $ 3.2 \times $ inductor area reduction can be achieved for the single-phase buck converter and the interleaved buck converter with magnetic coupling, respectively.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Murray:2014:PEC, author = "Jacob Murray and Ryan Kim and Paul Wettin and Partha Pratim Pande and Behrooz Shirazi", title = "Performance Evaluation of Congestion-Aware Routing with {DVFS} on a Millimeter-Wave Small-World Wireless {NoC}", journal = j-JETC, volume = "11", number = "2", pages = "17:1--17:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2644816", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The mm-wave small-world wireless NoC (mSWNoC) has emerged as an enabling interconnection infrastructure for designing high-bandwidth and energy-efficient multicore chips. In this mSWNoC architecture, long-range communication predominately takes place through the wireless shortcuts operating in the range of 10--100GHz, whereas short-range data exchange occurs through conventional metal wires. This results in performance advantages (lower latency and energy dissipation), mainly stemming from using the wireless links as long-range shortcuts between far-apart cores. The performance gain introduced by the wireless channels can be enhanced further if the wireline links of the mSWNoC are optimized according to the traffic patterns arising out of the application workloads. While there is significant energy savings, and hence temperature reduction, in the network due to the mSWNoC architecture, a load-imbalanced network is still susceptible to local temperature hotspots. In this work, we demonstrate that by incorporating congestion-avoidance routing with network-level dynamic voltage and frequency scaling (DVFS) in an mSWNoC, the power and thermal profiles can be improved without a significant impact on the overall network performance. In this work, we demonstrate how novel interconnect architectures enabled by the on-chip wireless links coupled with power management strategies can improve the energy and thermal characteristics of an mSWNoC significantly without introducing any performance degradation with respect to the conventional mesh-based NoC.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mohanty:2014:SOS, author = "Pragyan (Sheela) Mohanty and Spyros Tragoudas", title = "Scalable Offline Searches in {DNA} Sequences", journal = j-JETC, volume = "11", number = "2", pages = "18:1--18:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660774", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", abstract = "Searching for a particular pattern in a very large DNA database is a fundamental and essential component in computational biology. In the biological world, pattern matching is required for finding repeats in a particular DNA sequence, finding motif, aligning sequences, and other similar tasks. Due to an immense amount and continuous increase of biological data, the searching process requires very fast algorithms. A function-based tool set for fast offline pattern searches in large DNA sequences is proposed. The method benefits from the use of Boolean functions, their compact storage using canonical data structure, and the existence of built-in operators for these data structures. Experiments on DNA sequences from the NCBI database show that the proposed approach is scalable. The time complexity depends on the size of the data structure used for storing the function that represents the DNA sequence. It is shown that the presented approach exhibits sublinear time complexity to the DNA sequence size.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chaudhuri:2014:ALD, author = "Sourindra M. Chaudhuri and Prateek Mishra and Niraj K. Jha", title = "Accurate Leakage\slash Delay Estimation for {FinFET} Standard Cells under {PVT} Variations using the Response Surface Methodology", journal = j-JETC, volume = "11", number = "2", pages = "19:1--19:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2665066", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Nov 5 18:01:28 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Among different multi-gate transistors, FinFETs and Trigate FETs have set themselves apart as the most promising candidates for the upcoming 22nm technology node and beyond owing to their superior device performance, lower leakage power consumption, and cost-effective fabrication process. Innovative circuit design and optimization techniques will be required to harness the power of multi-gate transistors, which in turn will depend on accurate leakage and timing characterization of these devices under spatial and environmental variations. Hence, in order to aid circuit designers, we present accurate analytical models using central composite rotatable design (CCRD) based on response surface methodology (RSM) to estimate the leakage current and delay of FinFET standard cells under the effect of variations in gate length ($ L_G$), fin thickness ($ T_{SI}$), gate-oxide thickness ($ T_{OX}$), gate-workfunction ($ \Phi_G$), supply voltage ($ V{_DD}$), and temperature ($T$). To the best of our knowledge, this is the first such attempt to develop analytical models for leakage/delay estimation of FinFET logic gates. To derive these models, we employ TCAD device simulations of adjusted 2D device cross sections that have been shown to track TCAD device simulations of 3D device behavior within a 1--3\% error range. This drastically reduces the CPU time of our modeling technique (by several orders of magnitude) without much loss in accuracy. We present analytical leakage and delay models for different sizes and logic styles (e.g., shorted-gate (SG) and independent-gate (IG) FinFETs at the 22nm technology node). Both leakage and delay estimates derived from the analytical models are in close agreement with quasi-Monte Carlo (QMC) simulation results (QMC simulations track the accuracy of Monte Carlo simulations, but are several orders of magnitude faster) obtained for different adjusted-2D logic gates with a root mean square error (RMSE) in the 0.23\%--5.87\% range.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Myers:2014:ISI, author = "Chris J. Myers and Herbert Sauro and Anil Wipat", title = "Introduction to the Special Issue on Computational Synthetic Biology", journal = j-JETC, volume = "11", number = "3", pages = "20:1--20:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2668126", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The goal of this special issue is to introduce the field of computational synthetic biology to engineers and computer scientists. The first article gives an introduction to the key biological principles and experimental techniques that support synthetic biology, and it draws analogies with the computing field. This issue also includes five original research articles in computational synthetic biology. The first research article discusses how standards can be used to modularize the design process for genetic circuits. The next two articles introduce new abstraction techniques to improve the efficiency of analysis of genetic circuit models. The last two articles introduce new design techniques that help decouple design from construction. We hope this sampling from the field will help to motivate others to join this exciting and rich area of research.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Deans:2014:PNS, author = "Tara L. Deans", title = "Parallel Networks: Synthetic Biology and Artificial Intelligence", journal = j-JETC, volume = "11", number = "3", pages = "21:1--21:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2667229", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Synthetic biology has emerged as an important technology for engineering cells to behave in controllable and predictable ways. The promise of this modern technology is dependent on our understanding of cellular complexity to allow us to engineer cells with novel function. In this regard, the fields of computer science and synthetic biology are critical for accelerating both our understanding of biological systems, and our ability to quantitatively engineer cells. Thus, advances in biology and biotechnology are arising at the intersection of computer science and synthetic biology approaches. This review seeks to introduce the field of synthetic biology to the computer science community, and to ignite a curiosity and interest in fostering a unique synergy for possible collaborations between synthetic biologists and computer scientists.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Misirli:2014:CMM, author = "Goksel Misirli and Jennifer Hallinan and Anil Wipat", title = "Composable Modular Models for Synthetic Biology", journal = j-JETC, volume = "11", number = "3", pages = "22:1--22:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2631921", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Modelling and computational simulation are crucial for the large-scale engineering of biological circuits since they allow the system under design to be simulated prior to implementation in vivo. To support automated, model-driven design it is desirable that in silico models are modular, composable and use standard formats. The synthetic biology design process typically involves the composition of genetic circuits from individual parts. At the most basic level, these parts are representations of genetic features such as promoters, ribosome binding sites (RBSs), and coding sequences (CDSs). However, it is also desirable to model the biological molecules and behaviour that arise when these parts are combined in vivo. Modular models of parts can be composed and their associated systems simulated, facilitating the process of model-centred design. The availability of databases of modular models is essential to support software tools used in the model-driven design process. In this article, we present an approach to support the development of composable, modular models for synthetic biology, termed Standard Virtual Parts. We then describe a programmatically accessible and publicly available database of these models to allow their use by computational design tools.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Madsen:2014:SMC, author = "Curtis Madsen and Zhen Zhang and Nicholas Roehner and Chris Winstead and Chris Myers", title = "Stochastic Model Checking of Genetic Circuits", journal = j-JETC, volume = "11", number = "3", pages = "23:1--23:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2644817", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Synthetic genetic circuits have a number of exciting potential applications such as cleaning up toxic waste, hunting and killing tumor cells, and producing drugs and bio-fuels more efficiently. When designing and analyzing genetic circuits, researchers are often interested in the probability of observing certain behaviors. Discerning these probabilities typically involves simulating the circuit to produce some time series data and computing statistics over the resulting data. However, for very rare behaviors of complex genetic circuits, it becomes computationally intractable to obtain good results as the number of required simulation runs grows exponentially. It is, therefore, necessary to apply numerical methods to determine these probabilities directly. This article describes how stochastic model checking, a method for determining the likelihood that certain events occur in a system, can by applied to models of genetic circuits by translating them into continuous-time Markov chains (CTMCs) and analyzing them using Markov chain analysis to check continuous stochastic logic (CSL) properties. The utility of this approach is demonstrated with several case studies illustrating how this method can be used to perform design space exploration of two genetic oscillators and two genetic state-holding elements. Our results show that this method results in a substantial speedup as compared with conventional simulation-based approaches.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Fellermann:2014:FMD, author = "Harold Fellermann and Maik Hadorn and Rudolf M. F{\"u}chslin and Natalio Krasnogor", title = "Formalizing Modularization and Data Hiding in Synthetic Biology", journal = j-JETC, volume = "11", number = "3", pages = "24:1--24:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2667231", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Biological systems employ compartmentalization and other co-localization strategies in order to orchestrate a multitude of biochemical processes by simultaneously enabling ``data hiding'' and modularization. This article presents recent research that embraces compartmentalization and co-location as an organizational programmatic principle in synthetic biological and biomimetic systems. In these systems, artificial vesicles and synthetic minimal cells are envisioned as nanoscale reactors for programmable biochemical synthesis and as chassis for molecular information processing. We present P systems, brane calculi, and the recently developed chemtainer calculus as formal frameworks providing data hiding and modularization and thus enabling the representation of highly complicated hierarchically organized compartmentalized reaction systems. We demonstrate how compartmentalization can greatly reduce the complexity required to implement computational functionality, and how addressable compartments permit the scaling-up of programmable chemical synthesis.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Oberortner:2014:RBD, author = "Ernst Oberortner and Swapnil Bhatia and Erik Lindgren and Douglas Densmore", title = "A Rule-Based Design Specification Language for Synthetic Biology", journal = j-JETC, volume = "11", number = "3", pages = "25:1--25:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2641571", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Synthetic Biology is an engineering discipline where parts of DNA sequences are composed into novel, complex systems that execute a desired biological function. Functioning and well-behaving biological systems adhere to a certain set of biological ``rules''. Data exchange standards and Bio-Design Automation (BDA) tools support the organization of part libraries and the exploration of rule-compliant compositions. In this work, we formally define a design specification language, enabling the integration of biological rules into the Synthetic Biology engineering process. The supported rules are divided into five categories: Counting, Pairing, Positioning, Orientation, and Interactions. We formally define the semantics of each rule, characterize the language's expressive power, and perform a case study in that we iteratively design a genetic Priority Encoder circuit following two alternative paradigms-rule-based and template-driven. Ultimately, we touch a method to approximate the complexity and time to computationally enumerate all rule-compliant designs. Our specification language may or may not be expressive enough to capture all designs that a Synthetic Biologist might want to describe, or the complexity one might find through experiments. However, computational support for the acquisition, specification, management, and application of biological rules is inevitable to understand the functioning of biology.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Huang:2014:FMD, author = "Haiyao Huang and Douglas Densmore", title = "{Fluigi}: Microfluidic Device Synthesis for Synthetic Biology", journal = j-JETC, volume = "11", number = "3", pages = "26:1--26:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660773", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "One goal of synthetic biology is to design and build genetic circuits in living cells for a range of applications. Our incomplete knowledge of the effects of metabolic load and biological ``crosstalk'' on the host cell make it difficult to construct multilevel genetic logic circuits in a single cell, limiting the scalability of engineered biological systems. Microfluidic technologies provide reliable and scalable construction of synthetic biological systems by allowing compartmentalization of cells encoding simple genetic circuits and the spatiotemporal control of communication among these cells. This control is achieved via valves on the microfluidics chip which restrict fluid flow when activated. We describe a Computer Aided Design (CAD) framework called ``Fluigi'' for optimizing the layout of genetic circuits on a microfluidic chip, generating the control sequence of the associated signaling fluid valves, and simulating the behavior of the configured biological circuits. We demonstrate the capabilities of Fluigi on a set of Boolean algebraic benchmark circuits found in both synthetic biology and electrical engineering and a set of assay-based benchmark circuits. The integration of microfluidics and synthetic biology has the capability to increase the scale of engineered biological systems for applications in DNA assembly, biosensors, and screening assays for novel orthogonal genetic parts.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Hadjam:2014:RED, author = "Fatima Zohra Hadjam and Claudio Moraga", title = "{RIMEP2}: Evolutionary Design of Reversible Digital Circuits", journal = j-JETC, volume = "11", number = "3", pages = "27:1--27:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629534", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "RIMEP (Reversible Improved Multi Expression Programming), is a system that has been developed for designing reversible digital circuits. This article discloses a new version of RIMEP called ``RIMEP2''. The goal was to evolve reversible circuits in a ``fanout free'' search space. The major changes that RIMEP has undergone, are made in the structure of the chromosome and in the fitness calculation. Although the changes seem to be minor, the impact is effective. The execution time has been considerably decreased and optimal competitive solutions were found for a set of 30 selected benchmarks, where a quantum cost reduction up to 96.13\% was reached with an average of 42.17\%.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Houshmand:2014:DDH, author = "Mahboobeh Houshmand and Morteza Saheb Zamani and Mehdi Sedighi and Mona Arabzadeh", title = "Decomposition of Diagonal {Hermitian} Quantum Gates Using Multiple-Controlled {Pauli} {Z} Gates", journal = j-JETC, volume = "11", number = "3", pages = "28:1--28:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629526", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum logic decomposition refers to decomposing a given quantum gate to a set of physically implementable gates. An approach has been presented to decompose arbitrary diagonal quantum gates to a set of multiplexed-rotation gates around z axis. In this article, a special class of diagonal quantum gates, namely diagonal Hermitian quantum gates, is considered and a new perspective to the decomposition problem with respect to decomposing these gates is presented. It is first shown that these gates can be decomposed to a set that solely consists of multiple-controlled Z gates. Then a binary representation for the diagonal Hermitian gates is introduced. It is shown that the binary representations of multiple-controlled Z gates form a basis for the vector space that is produced by the binary representations of all diagonal Hermitian quantum gates. Moreover, the problem of decomposing a given diagonal Hermitian gate is mapped to the problem of writing its binary representation in the specific basis mentioned previously. Moreover, CZ gate is suggested to be the two-qubit gate in the decomposition library, instead of previously used CNOT gate. Experimental results show that the proposed approach can lead to circuits with lower costs in comparison with the previous ones.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2014:SAB, author = "Zhiqiang Li and Hanwu Chen and Xiaoyu Song and Marek Perkowski", title = "A Synthesis Algorithm for $4$-Bit Reversible Logic Circuits with Minimum Quantum Cost", journal = j-JETC, volume = "11", number = "3", pages = "29:1--29:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629542", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents an algorithm which can quickly find the exact minimum solution to almost all of 4-bit reversible functions. We assume minimization of quantum cost (MQC). This algorithm is designed in the most memory-efficient way, or it will quickly run out of memory. Therefore, we construct the shortest coding of permutations, the topological compression and flexible data structures for the memory savings. First, hash tables are used for all 8-gate 4-bit circuits with the minimization of gate count (MGC) by using the GT library (with NOT, CNOT, Toffoli and Toffoli-4 gates). Second, we merge and split the hash tables, thus generating a single longer hash table for high-performance. Third, we synthesize these circuits with MQC by using the GTP library (with GT, Peres, and Inverted Peres gates) based on the hash table. Finally, according to the comparison of the QC of circuits, the algorithm can quickly converge for any 4-bit reversible circuit with MQC. By synthesizing all benchmark functions, in comparison with Szyprowski and Kerntopf [2011], the running time and QC are reduced up to 99.95\% and 18.2\%, respectively.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sen:2014:RRC, author = "Bibhash Sen and Manojit Dutta and Samik Some and Biplab K. Sikdar", title = "Realizing Reversible Computing in {QCA} Framework Resulting in Efficient Design of Testable {ALU}", journal = j-JETC, volume = "11", number = "3", pages = "30:1--30:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629538", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reversible logic is emerging as a prospective logic design style for implementing ultra-low-power VLSI circuits. It promises low-power consuming circuits by nullifying the energy dissipation in irreversible logic. On the other hand, as a potential alternative to CMOS technology, Quantum-dot Cellular Automata (QCA) promises energy efficient digital design with high device density and high computing speed. The integration of reversible logic in QCA circuit is expected to be effective in addressing the issue of energy dissipation at nano scale regime. This work targets the design of reversible ALU (arithmetic logic unit) in QCA framework and proposes a new ``Reversible QCA'' (RQCA). The primary design focus is on optimizing the number of reversible gates, quantum cost and the garbage outputs that are the most important hindrances in realizing reversible logic. Besides optimization, the fault coverage capability of RQCA under missing/additional cell deposition defects is analysed. The scope of reversible logic is further outstretched by introducing a novel DFT (design for testability) architecture around the reversible ALU that reduces testing overhead. The performance of proposed ALU is evaluated, subjected to different faults, and is established to be more effective than the existing ALU.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Rahman:2014:AQT, author = "Md. Mazder Rahman and Gerhard W. Dueck and Joseph D. Horton", title = "An Algorithm for Quantum Template Matching", journal = j-JETC, volume = "11", number = "3", pages = "31:1--31:??", month = dec, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629537", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 7 15:40:14 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum circuits are often generated by decomposing gates from networks with classical reversible gates. Only in rare cases, the results are minimal. Post-optimization methods, such as template matching, are employed to reduce the quantum costs of circuits. Quantum templates are derived from identity circuits. All minimal realizations, within certain limitations, can be embedded into templates. Due to this property, templates matching has the potential to reduce quantum costs of circuits. However, one of the difficulties in finding templates matches is due to the mobility of the gates within the circuit. Thus far, template matching procedures have employed heuristics to reduce the search space. This article presents an in-depth study of exact template matching with a set of algorithms. A graph structure with the corresponding circuits facilitates the discovery of potential sequences of templates to be matched, and how exact minimization of circuits can be accomplished. The significance of the proposed method is verified in benchmarks optimization.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Hammerstrom:2015:ISI, author = "Dan Hammerstrom and Vijaykrishnan Narayanan", title = "Introduction to Special Issue on Neuromorphic Computing", journal = j-JETC, volume = "11", number = "4", pages = "32:1--32:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2728709", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Rodriguez:2015:TSS, author = "Laurent Rodriguez and Beno{\^\i}t Miramond and Bertrand Granado", title = "Toward a Sparse Self-Organizing Map for Neuromorphic Architectures", journal = j-JETC, volume = "11", number = "4", pages = "33:1--33:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638559", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Neurobiological systems have often been a source of inspiration for computational science and engineering, but in the past their impact has also been limited by the understanding of biological models. Today, new technologies lead to an equilibrium situation where powerful and complex computers bring new biological knowledge of the brain behavior. At this point, we possess sufficient understanding to both imagine new brain-inspired computing paradigms and to sustain a classical paradigm which reaches its end programming and intellectual limitations. In this context, we propose to reconsider the computation problem first in the specific domain of mobile robotics. Our main proposal consists in considering computation as part of a global adaptive system, composed of sensors, actuators, a source of energy and a controlling unit. During the adaptation process, the proposed brain-inspired computing structure does not only execute the tasks of the application but also reacts to the external stimulation and acts on the emergent behavior of the system. This approach is inspired by cortical plasticity in mammalian brains and suggests developing the computation architecture along the system's experience. This article proposes modeling this plasticity as a problem of estimating a probability density function. This function would correspond to the nature and the richness of the environment perceived through multiple modalities. We define and develop a novel neural model solving the problem in a distributed and sparse manner. And we integrate this neural map into a bio-inspired hardware substrate that brings the plasticity property into parallel many-core architectures. The approach is then called Hardware Plasticity. The results show that the self-organization properties of our model solve the problem of multimodal sensory data clusterization. The properties of the proposed model allow envisaging the deployment of this adaptation layer into hardware architectures embedded into the robot's body in order to build intelligent controllers.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chabi:2015:CUS, author = "Djaafar Chabi and Weisheng Zhao and Damien Querlioz and Jacques-Olivier Klein", title = "On-Chip Universal Supervised Learning Methods for Neuro-Inspired Block of Memristive Nanodevices", journal = j-JETC, volume = "11", number = "4", pages = "34:1--34:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629503", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Scaling down beyond CMOS transistors requires the combination of new computing paradigms and novel devices. In this context, neuromorphic architecture is developed to achieve robust and ultra-low power computing systems. Memristive nanodevices are often associated with this architecture to implement efficiently synapses for ultra-high density. In this article, we investigate the design of a neuro-inspired logic block (NLB) dedicated to on-chip function learning and propose learning strategy. It is composed of an array of memristive nanodevices as synapses associated to neuronal circuits. Supervised learning methods are proposed for different type of memristive nanodevices and simulations are performed to demonstrate the ability to learn logic functions with memristive nanodevices. Benefiting from a compact implementation of neuron circuits and the optimization of learning process, this architecture requires small number of nanodevices and moderate power consumption.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Coussy:2015:FBN, author = "Philippe Coussy and Cyrille Chavet and Hugues Nono Wouafo and Laura Conde-Canencia", title = "Fully Binary Neural Network Model and Optimized Hardware Architectures for Associative Memories", journal = j-JETC, volume = "11", number = "4", pages = "35:1--35:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629510", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Brain processes information through a complex hierarchical associative memory organization that is distributed across a complex neural network. The GBNN associative memory model has recently been proposed as a new class of recurrent clustered neural network that presents higher efficiency than the classical models. In this article, we propose computational simplifications and architectural optimizations of the original GBNN. This work leads to significant complexity and area reduction without affecting neither memorizing nor retrieving performance. The obtained results open new perspectives in the design of neuromorphic hardware to support large-scale general-purpose neural algorithms.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Krichmar:2015:LSS, author = "Jeffrey L. Krichmar and Philippe Coussy and Nikil Dutt", title = "Large-Scale Spiking Neural Networks using Neuromorphic Hardware Compatible Models", journal = j-JETC, volume = "11", number = "4", pages = "36:1--36:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629509", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Neuromorphic engineering is a fast growing field with great potential in both understanding the function of the brain, and constructing practical artifacts that build upon this understanding. For these novel chips and hardware to be useful, hardware compatible applications and simulation tools are needed. We argue that the neural circuit approach, in which networks of neuronal elements model brain circuitry are constructed, allows the development of practical applications and the exploration of brain function. At this level of abstraction, networks of 10$^5$ neurons or larger can be efficiently simulated, but still preserve the neuronal and synaptic dynamics that appear to be important for brain function. Because the neural circuit level supports spiking neural networks and the prevalent Addressable Event Representation (AER) communication scheme, it fits well with many existing neuromorphic hardware and simulation tools. To show how this approach can be applied, we present case studies of spiking neural networks in vision and recognition tasks based on one instantiation of a simulation environment. However, there are now many hardware options, simulation environments, and applications in this emerging field. These approaches and other considerations are discussed.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{ChappetDeVangel:2015:RSD, author = "Beno{\^\i}t {Chappet De Vangel} and Cesar Torres-huitzil and Bernard Girau", title = "Randomly Spiking Dynamic Neural Fields", journal = j-JETC, volume = "11", number = "4", pages = "37:1--37:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629517", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Bio-inspired neural computation attracts a lot of attention as a possible solution for the future challenges in designing computational resources. Dynamic neural fields (DNF) provide cortically inspired models of neural populations to which computation can be applied for a wide variety of tasks, such as perception and sensorimotor control. DNFs are often derived from continuous neural field theory (CNFT). In spite of the parallel structure and regularity of CNFT models, few studies of hardware implementations have been carried out targeting embedded real-time processing. In this article, a hardware-friendly model adapted from the CNFT is introduced, namely the RSDNF model (randomly spiking dynamic neural fields). Thanks to their simplified 2D structure, RSDNFs achieve scalable parallel implementations on digital hardware while maintaining the behavioral properties of CNFT models. Spike-based computations within neurons in the field are introduced to reduce interneuron connection bandwidth. Additionally, local stochastic spike propagation ensures inhibition and excitation broadcast without a fully connected network. The behavioral soundness and robustness of the model in the presence of noise and distracters is fully validated through software and hardware. A field programmable gate array (FPGA) implementation shows how the RSDNF model ensures a level of density and scalability out of reach for previous hardware implementations of dynamic neural field models.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kim:2015:RDN, author = "Yongtae Kim and Yong Zhang and Peng Li", title = "A Reconfigurable Digital Neuromorphic Processor with Memristive Synaptic Crossbar for Cognitive Computing", journal = j-JETC, volume = "11", number = "4", pages = "38:1--38:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700234", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents a brain-inspired reconfigurable digital neuromorphic processor (DNP) architecture for large-scale spiking neural networks. The proposed architecture integrates an arbitrary number of N digital leaky integrate-and-fire (LIF) silicon neurons to mimic their biological counterparts and on-chip learning circuits to realize spike-timing-dependent plasticity (STDP) learning rules. We leverage memristor nanodevices to build an N $ \times $ N crossbar array to store not only multibit synaptic weight values but also network configuration data with significantly reduced area overhead. Additionally, the crossbar array is designed to be accessible both column- and row-wise to expedite the synaptic weight update process for learning. The proposed digital pulse width modulator (PWM) produces binary pulses with various durations for reading and writing the multilevel memristive crossbar. The proposed column based analog-to-digital conversion (ADC) scheme efficiently accumulates the presynaptic weights of each neuron and reduces silicon area overhead by using a shared arithmetic unit to process the LIF operations of all N neurons. With 256 silicon neurons, learning circuits and 64K synapses, the power dissipation and area of our DNP are 6.45 mW and 1.86 mm$^2$, respectively, when implemented in a 90-nm CMOS technology. The functionality of the proposed DNP architecture is demonstrated by realizing an unsupervised-learning based character recognition system.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Daneshtalab:2015:SIE, author = "Masoud Daneshtalab and Farhad Mehdipour and Zhiyi Yu and Hannu Tenhunen", title = "Special Issue on Emerging Many-Core Systems for Exascale Computing", journal = j-JETC, volume = "11", number = "4", pages = "39:1--39:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2717312", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Jafri:2015:AID, author = "Syed M. A. H. Jafri and Ozan Ozbag and Nasim Farahini and Kolin Paul and Ahmed Hemani and Juha Plosila and Hannu Tenhunen", title = "Architecture and Implementation of Dynamic Parallelism, Voltage and Frequency Scaling {(PVFS)} on {CGRAs}", journal = j-JETC, volume = "11", number = "4", pages = "40:1--40:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700250", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In the era of platforms hosting multiple applications with arbitrary performance requirements, providing a worst-case platform-wide voltage/frequency operating point is neither optimal nor desirable. As a solution to this problem, designs commonly employ dynamic voltage and frequency scaling (DVFS). DVFS promises significant energy and power reductions by providing each application with the operating point (and hence the performance) tailored to its needs. To further enhance the optimization potential, recent works interleave dynamic parallelism with conventional DVFS. The induced parallelism results in performance gains that allow an application to lower its operating point even further (thereby saving energy and power consumption). However, the existing works employ costly dedicated hardware (for synchronization) and rely solely on greedy algorithms to make parallelism decisions. To efficiently integrate parallelism with DVFS, compared to state-of-the-art, we exploit the reconfiguration (to reduce DVFS synchronization overheads) and enhance the intelligence of the greedy algorithm (to make optimal parallelism decisions). Specifically, our solution relies on dynamically reconfigurable isolation cells and an autonomous parallelism, voltage, and frequency selection algorithm. The dynamically reconfigurable isolation cells reduce the area overheads of DVFS circuitry by configuring the existing resources to provide synchronization. The autonomous parallelism, voltage, and frequency selection algorithm ensures high power efficiency by combining parallelism with DVFS. It selects that parallelism, voltage, and frequency trio which consumes minimum power to meet the deadlines on available resources. Synthesis and simulation results using various applications/algorithms (WLAN, MPEG4, FFT, FIR, matrix multiplication) show that our solution promises significant reduction in area and power consumption (23\% and 51\% ) compared to state-of-the-art.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Olorode:2015:IPS, author = "Oluleye Olorode and Mehrdad Nourani", title = "Improving Performance in Sub-Block Caches with Optimized Replacement Policies", journal = j-JETC, volume = "11", number = "4", pages = "41:1--41:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2668127", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent advances in computer processor design have led to the introduction of sub-blocking to cache architectures. Sub-block caches reduce the tag area and power overhead in caches without reducing the effective cache size by using fewer tags to index the full data RAM array. In spite of achieving reduced area and power overhead, sub-block caches suffer performance degradation due to cache trashing. This occurs when a wider cache line (super-block), made up of multiple valid cache lines (sub-blocks), is replaced or evicted when only a sub-block is to be allocated into the wider super-block. To address this problem, we propose cache replacement policies as they relate specifically to sub-block caches. We propose new replacement policies that are tuned for sub-block caches by adding more intelligence based on the valid state of individual sub-blocks of a super-block. We also investigate the effect of using a few level-0 registers to bypass a few level-1 cache pipe stages on sub-block cache performance. To evaluate the performance improvement offered by our proposed replacement policies and the use of level-0 registers, we developed a sub-block cache simulator based on the Simplescalar toolset for single-core evaluations and the Sniper Simulator for multicore evaluations. We show that, with minimal architectural updates to existing conventional cache replacement policies, we are able to improve level-1 cache hit rates by up to 4.17\% using our proposed policies alone on SPEC2006 benchmarks and up to 14\% in shared level-2 caches using multicore benchmark suites: PARSEC and SPLASH2.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2015:ICI, author = "Zhongqi Li and Nilanjan Goswami and Tao Li", title = "{iConn}: a Communication Infrastructure for Heterogeneous Computing Architectures", journal = j-JETC, volume = "11", number = "4", pages = "42:1--42:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700238", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recently, the graphics processing unit (GPU) has made significant progress as a general-purpose parallel processor. The CPU and GPU cooperate together to solve data-parallel and control-intensive real-world applications in an optimized fashion. For example, emerging heterogeneous computing architectures such as Intel Sandy Bridge and AMD Fusion integrate the functionality of the CPU and GPU in a single die. However, the single-die CPU-GPU heterogeneous computing architecture faces the challenge of tight budget of die area. The conventional homogeneous interconnect fails to provide satisfactory performance by fully exploiting the given area budget in the heterogeneous processing era. In this article, we aim to implement an interconnect network within an area budget for a CPU-GPU heterogeneous computing architecture. We propose iConn, a 2D mesh-style on-chip heterogeneous communication infrastructure. In iConn, a set of GPU logical units such as the stream processors, the texture units, and the rendering output units form a computing unit (CU). Differing from conventional homogeneous router design, iConn adopts nonuniform on-chip routers in order to meet the unique communication demands from each single CPU and CU. The routers can also dynamically allocate their buffers across all virtual channels (VCs) to meet the latency requirements of CPUs and CUs. Moreover, the memory controller scheduling algorithm is modified from traditional load-over-store scheduling in order to prioritize the traffic. Our simulation results show that iConn improves the performance of CPUs by 23.0\% and CUs by 9.4\%.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Khayambashi:2015:ARA, author = "Misagh Khayambashi and Pooria M. Yaghini and Ashkan Eghbal and Nader Bagherzadeh", title = "Analytical Reliability Analysis of {$3$D} {NoC} under {TSV} Failure", journal = j-JETC, volume = "11", number = "4", pages = "43:1--43:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700236", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Apr 28 05:59:37 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The network-on-chip (NoC) technology allows for integration of a manycore design on a single chip for higher efficiency and scalability. Three-dimensional (3D) NoCs offer several advantages over two-dimensional (2D) NoCs. Through-silicon via (TSV) technology is one of the candidates for implementation of 3D NoCs. TSV reliability analysis is still challenging for 3D NoC designers because of their unique electrical, thermal, and physical characteristics. After providing an overview of common TSV issues, this article aims to define a reliability criterion for NoC and provide a framework for quantifying this reliability as it relates to TSV issues. TSV issues are modeled as a time-invariant failure probability. Also, a reliability criterion for TSV-based NoC is defined. The relationship between NoC reliability and TSV failure is quantified. For the first time, the reliability criterion is reduced to a tractable closed-form expression that requires a single Monte Carlo simulation. Importantly, the Monte Carlo simulation depends only on network geometry. To demonstrate our proposed method, the reliability criterion of a simple 8$ \times $8$ \times $8 NoC supported by an 8$ \times $8$ \times $7 network of TSVs is calculated.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Pang:2015:MLN, author = "Jun Pang and Christopher Dwyer and Alvin R. Lebeck", title = "{mNoC}: Large Nanophotonic Network-on-Chip Crossbars with Molecular Scale Devices", journal = j-JETC, volume = "12", number = "1", pages = "1:1--1:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700241", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Moore's law and the continuity of device scaling have led to an increasing number of cores/nodes on a chip, creating a need for new mechanisms to achieve high-performance and power-efficient Network-on-Chip (NoC). Nanophotonics based NoCs provide for higher bandwidth and more power efficient designs than electronic networks. Present approaches often use an external laser source, ring resonators, and waveguides. However, they still suffer from important limitations: large static power consumption, and limited network scalability. In this article, we explore the use of emerging molecular scale devices to construct nanophotonic networks: Molecular-scale Network-on-Chip (mNoC). We leverage on-chip emitters such as quantum dot LEDs, which provide electrical to optical signal modulation, and chromophores, which provide optical signal filtering for receivers. These devices replace the ring resonators and the external laser source used in contemporary nanophotonic NoCs. They reduce energy consumption or enable scaling to larger crossbars for a reduced energy budget. We present a Single Writer Multiple Reader (SWMR) bus based crossbar mNoC. Our evaluation shows that an mNoC can achieve more than 88\% reduction in energy for a $ 64 \times 64 $ crossbar compared to similar ring resonator based designs. Additionally, an mNoC can scale to a $ 256 \times 256 $ crossbar with an average 10\% performance improvement and 54\% energy reduction.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Hossain:2015:MGN, author = "Nahid M. Hossain and Masud H. Chowdhury", title = "Multilayer Graphene Nanoribbon and Carbon Nanotube Based Floating Gate Transistor for Nonvolatile Flash Memory", journal = j-JETC, volume = "12", number = "1", pages = "2:1--2:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2701428", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Floating gate transistor is the fundamental building block of nonvolatile flash memory, which is one of the most widely used memory gadgets in modern micro and nano electronic applications. Recently there has been a surge of interest to introduce a new generation of memory devices using graphene nanotechnology. In this article, we present a new floating gate transistor (FGT) design based on multilayer graphene nanoribbon (MLGNR) and carbon nanotube (CNT). In the proposed FGT, a MLGNR structure would be used as the channel of the field effect transistor (FET) and a layer of CNTs would be used as the floating gate. We have performed an analysis of the programming and erasing mechanism in the floating gate and its dependence on the applied control gate voltages. Based on our analysis we have observed that proposed graphene based floating gate transistor could be operated at a low voltage compared to conventional silicon based floating gate devices. We have presented detail analysis of the operation and the programming and erasing processes of the proposed FGT; the dependency of the programming and erasing current density on different parameters; and the impact of scaling the thicknesses of the control and tunneling oxides. To perform these analyses we have developed equivalent models for device capacitances.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ghofrani:2015:LPV, author = "Amirali Ghofrani and Miguel-Angel Lastras-Monta{\~n}o and Siddharth Gaba and Melika Payvand and Wei Lu and Luke Theogarajan and Kwang-Ting Cheng", title = "A Low-Power Variation-Aware Adaptive Write Scheme for Access-Transistor-Free Memristive Memory", journal = j-JETC, volume = "12", number = "1", pages = "3:1--3:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2717313", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent advances in access-transistor-free memristive crossbars have demonstrated the potential of memristor arrays as high-density and ultra-low-power memory. However, with considerable variations in the write-time characteristics of individual memristors, conventional fixed-pulse write schemes cannot guarantee reliable completion of the write operations and waste significant amount of energy. We propose an adaptive write scheme that adaptively adjusts the write pulses to address such variations in memristive arrays, resulting in $ 7 \times $--$ 11 \times $ average energy saving in our case studies. Our scheme embeds an online monitor to detect the completion of a write operation and takes into account the parasitic effect of line-shared devices in access-transistor-free crossbars. This feature also helps shorten the test time of memory march algorithms by eliminating the need of a verifying read right after a write, which is commonly employed in the test sequences of march algorithms.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Venkatesan:2015:EEA, author = "Rangharajan Venkatesan and Mrigank Sharad and Kaushik Roy and Anand Raghunathan", title = "Energy-Efficient All-Spin Cache Hierarchy Using Shift-Based Writes and Multilevel Storage", journal = j-JETC, volume = "12", number = "1", pages = "4:1--4:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2723165", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spintronic memories are considered to be promising candidates for future on-chip memories due to their high density, nonvolatility, and near-zero leakage. However, they also face challenges such as high write energy and latency and limited read speed due to single-ended sensing. Further, the conflicting requirements of read and write operations lead to stringent design constraints that severely compromises their benefits. Recently, domain wall memory was proposed as a spintronic memory that has a potential for very high density by storing multiple bits in the domains of a ferromagnetic nanowire. While reliable operation of DWM memory with multiple domains faces many challenges, single-bit cells that utilize domain wall motion for writes have been experimentally demonstrated [Fukami et al. 2009]. This bit-cell, which we refer to as Domain Wall Memory with Shift-based Write (DWM-SW), achieves improved write efficiency and features decoupled read-write paths, enabling independent optimizations of read and write operations. However, these benefits are achieved at the cost of sacrificing the original goal of improved density. In this work, we explore multilevel storage as a new direction to enhance the density benefits of DWM-SW. At the device level, we propose a new device--multilevel DWM with shift-based write (ML-DWM-SW)--that is capable of storing 2 bits in a single device. At the circuit level, we propose a ML-DWM-SW based bit-cell design and layout. The ML-DWM-SW bit-cell incurs no additional area overhead compared to the DWM-SW bit-cell despite storing an additional bit, thereby achieving roughly twice the density. However, it requires a two-step write operation and has data-dependent read and write energies, which pose unique challenges. To address these issues, we propose suitable architectural optimizations: (i) intra-word interleaving and (ii) bit encoding. We design ``all-spin'' cache architectures using the proposed ML-DWM-SW bit-cell for both general purpose processors as well as general purpose graphics processing units (GPGPUs). We perform an iso-capacity replacement of SRAM with spintronic memories and study the energy and area benefits at iso-performance conditions. For general purpose processors, the ML-DWM-SW cache achieves 10X reduction in energy and 4.4X reduction in cache area compared to an SRAM cache and 2X and 1.7X reduction in energy and area, respectively, compared to an STT-MRAM cache. For GPGPUs, the ML-DWM-SW cache achieves 5.3X reduction in energy and 3.6X area reduction compared to SRAM and 3.5X energy reduction and 1.9X area reduction compared to STT-MRAM.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Park:2015:MME, author = "Kyu Ho Park and Woomin Hwang and Hyunchul Seok and Chulmin Kim and Dong-jae Shin and Dong Jin Kim and Min Kyu Maeng and Seong Min Kim", title = "{MN-MATE}: Elastic Resource Management of Manycores and a Hybrid Memory Hierarchy for a Cloud Node", journal = j-JETC, volume = "12", number = "1", pages = "5:1--5:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2701429", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent advent of manycore system increases needs for larger but faster memory hierarchy. Emerging next generation memories such as on-chip DRAM and nonvolatile memory (NVRAM) are promising candidates for replacement of DRAM-only main memory. Combined with the manycore trends, it gives an opportunity to rethink conventional resource management system with a memory hierarchy for a single cloud node. In an attempt to mitigate the energy and memory problems, we propose MN-MATE, an elastic resource management architecture for a single cloud node with manycores, on-chip DRAM, and large size of off-chip DRAM and NVRAM. In MN-MATE, the hypervisor places consolidated VMs and balances memory among them. Based on the monitored information about the allocated memory, a guest OS co-schedules tasks accessing different types of memory with complementary access intensity. Polymorphic management of DRAM hierarchy accelerates average memory access speed inside each guest OS. A guest OS reduces energy consumption with small performance loss based on the NVRAM-aware data placement policy and the hybrid page cache. A new lightweight kernel is developed to reduce the overhead from the guest OS for scientific applications. Experiment results show that our techniques in MN-MATE platform improve system performance and reduce energy consumption.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2015:WAS, author = "Jue Wang and Yuan Xie", title = "A Write-Aware {STTRAM}-Based Register File Architecture for {GPGPU}", journal = j-JETC, volume = "12", number = "1", pages = "6:1--6:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700230", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The massively parallel processing capacity of GPGPUs requires a large register file (RF), and its size keeps increasing to support more concurrent threads from generation to generation. Using traditional SRAM-based RFs, there are concerns in both area cost and energy consumption, and soon they will become unrealistic. In this work, we analyze the feasibility of using STTRAM-based RF designs, which have benefits in terms of smaller silicon area and zero standby leakage power. However, STTRAM long write latency and high write energy bring new challenges. Therefore, we propose a write-aware STTRAM-based RF architecture (WarRF), which contains two techniques: Split Bank Write modifies the arbitrator design to increase the parallelism of read and write accesses in the same bank; Write Pool reduces the number of repeated write accesses to RFs. Our experiment shows that the performance of STTRAM-based RF is improved by 13\% and up to 23\% after adopting WarRF. In addition, the energy consumption is reduced by 38\% on average compared to SRAM-based RFs.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Romani:2015:SSC, author = "Aldo Romani and Matteo Filippi and Michele Dini and Marco Tartagni", title = "A Sub-$ \mu $ A Stand-By Current Synchronous Electric Charge Extractor for Piezoelectric Energy Harvesting", journal = j-JETC, volume = "12", number = "1", pages = "7:1--7:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700244", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In the field of energy harvesting there is a growing interest in power management circuits with intrinsic sub-$ \mu $ A current consumptions, in order to operate efficiently with very low levels of available power. In this context, integrated circuits proved to be a viable solution with high associated nonrecurring costs and design risks. As an alternative, this article presents a fully autonomous and battery-less circuit solution for piezoelectric energy harvesting based on discrete components in a low-cost PCB technology, which achieves a comparable performance in a $ 32 \times 43 $ mm$^2$ footprint. The power management circuit implements synchronous electric charge extraction (SECE) with a passive bootstrap circuit from fully discharged states. Circuit characterization showed that the circuit consumes less than 1 $ \mu $A with a 3V output and may achieve energy conversion efficiencies of up to 85\%. In addition, the circuit is specifically designed for operating with input and output voltages up to 20V, which grants a significant flexibility in the choice of transducers and energy storage capacitors.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Jayakumar:2015:QHS, author = "Hrishikesh Jayakumar and Arnab Raha and Woo Suk Lee and Vijay Raghunathan", title = "{QuickRecall}: a {HW\slash SW} Approach for Computing across Power Cycles in Transiently Powered Computers", journal = j-JETC, volume = "12", number = "1", pages = "8:1--8:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700249", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Transiently Powered Computers (TPCs) are a new class of batteryless embedded systems that depend solely on energy harvested from external sources for performing computations. Enabling long-running computations on TPCs is a major challenge due to the highly intermittent nature of the power supply (often bursts of {$<$} 100ms), resulting in frequent system reboots. Prior work seeks to address this issue by frequently checkpointing system state in flash memory, preserving it across power cycles. However, this involves a substantial overhead due to the high erase/write times of flash memory. This article proposes the use of Ferroelectric RAM (FRAM), an emerging nonvolatile memory technology that combines the benefits of SRAM and flash, to seamlessly enable long-running computations in TPCs. We propose a lightweight, in-situ checkpointing technique for TPCs using FRAM that consumes only 30 nJ while decreasing the time taken for saving and restoring a checkpoint to only 21.06 $ \mu $ s, which is over two orders of magnitude lower than the corresponding overhead using flash. We have implemented and evaluated our technique, QuickRecall, using the TI MSP430FR5739 FRAM-enabled microcontroller. Experimental results show that our highly-efficient checkpointing translate to significant speedup ($ 1.25 \times $--$ 8.4 \times $) in program execution time and reduction ($ \approx 3 \times $) in application-level energy consumption.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chien:2015:FTO, author = "Chia-Hung Chien and Rodney {Van Meter} and Sy-Yen Kuo", title = "Fault-Tolerant Operations for Universal Blind Quantum Computation", journal = j-JETC, volume = "12", number = "1", pages = "9:1--9:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700248", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Blind quantum computation is an appealing use of quantum information technology because it can conceal both the client's data and the algorithm itself from the server. However, problems need to be solved in the practical use of blind quantum computation and fault-tolerance is a major challenge. Broadbent et al. proposed running error correction over blind quantum computation, and Morimae and Fujii proposed using fault-tolerant entangled qubits as the resource for blind quantum computation. Both approaches impose severe demands on the teleportation channel, the former requiring unrealistic data rates and the latter near-perfect fidelity. To extend the application range of blind quantum computation, we suggest that Alice send input qubits encoded with error correction code instead of single input qubits. Two fault-tolerant protocols are presented and we showed the trade-off of the computational overhead using the ten-bit quantum carry-lookahead adder as an example. Though these two fault-tolerant protocols require the client to have more quantum computing ability than using approaches from prior work, they provide better fault-tolerance when the client and the server are connected by realistic quantum repeater networks.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cheng:2015:SSC, author = "Ching-Hwa Cheng", title = "{SCKVdd}: a Scalable Clock-Controlled Self-Stabilized Voltage Technique for Low Power {CMOS} Digital Circuits", journal = j-JETC, volume = "12", number = "1", pages = "10:1--10:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2790754", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 4 07:26:23 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "It has been proposed that small amounts of energy dissipate when transfer through a rising Vdd. In typical power gate circuits, the PMOS transistors (P$_{SW}$ ) reduce the leakage of power by shutting off outer Vdd to the idle blocks. We expand this technique by utilizing active P$_{SW}$, which are turned on and off by a clock signal. The proposed SCKVdd technique combines the power source gated mechanism and clock signal to generate stable progressive rising voltage to suppress peak and average currents effectively. The SCKVdd technique is a scalable, clock-controlled, self-stabilized voltage technique. This technique is easily implemented in generic digital circuits to reduce power dissipation. A normal CMOS circuit shows a dynamic power consumption increase proportional to the clock frequency. SCKVdd results in a lower-than-usual frequency dependency, and is suitable for high speed clock circuits. SCKVdd can be integrated with frequency, voltage scaling and an activated P$_{SW}$ number to implement an efficient power-performance trade-off mechanism. In experiments that investigated constant Vdd for MPEG VLD chips, power dissipation savings were in the range of 42\% to 54\% with only a small delay penalty.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Todri-Sanial:2015:GES, author = "Aida Todri-Sanial and Sanjukta Bhanja", title = "Guest Editorial: Special Issue on Advances in Design of Ultra-Low Power Circuits and Systems in Emerging Technologies", journal = j-JETC, volume = "12", number = "2", pages = "11:1--11:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2756554", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Gaillardon:2015:SLP, author = "Pierre-Emmanuel Gaillardon and Edith Beigne and Suzanne Lesecq and Giovanni {De Micheli}", title = "A Survey on Low-Power Techniques with Emerging Technologies: From Devices to Systems", journal = j-JETC, volume = "12", number = "2", pages = "12:1--12:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2714566", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nowadays, power consumption is one of the main limitations of electronic systems. In this context, novel and emerging devices provide new opportunities to extend the trend toward low-power design. In this survey article, we present a transversal survey on energy-efficient techniques ranging from devices to architectures. The actual trends of device research, with fully depleted planar devices, tri-gate geometries, and gate-all-around structures, allows us to reach an increasingly higher level of performance while reducing the associated power. In addition, beyond the simple device property enhancements, emerging devices also lead to innovations at the circuit and architectural levels. In particular, devices whose properties can be tuned through additional terminals enable a fine and dynamic control of device threshold. They also enable designers to realize logic gates and to implement power-related techniques in a compact way unreachable to standard technologies. These innovations reduce power consumption at the gate level and unlock new means of actuation in architectural solutions like adaptive voltage and frequency scaling.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sitik:2015:FBL, author = "Can Sitik and Emre Salman and Leo Filippini and Sung Jun Yoon and Baris Taskin", title = "{FinFET}-Based Low-Swing Clocking", journal = j-JETC, volume = "12", number = "2", pages = "13:1--13:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2701617", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A low-swing clocking methodology is introduced to achieve low-power operation at 20nm FinFET technology. Low-swing clock trees are used in existing methodologies in order to decrease the dynamic power consumption in a trade-off for 3 issues: (1) the effect of leakage power consumption, which is becoming more dominant when the process scales sub-32nm; (2) the increase in insertion delay, resulting in a high clock skew; and (3) the difficulty in driving the existing DFF sinks with a low-swing clock signal without a timing violation. In this article, a FinFET-based low-swing clocking methodology is introduced to preserve the dynamic power savings of low-swing clocking while minimizing these three negative effects, facilitated through an efficient use of FinFET technology. At scaled performance constraints, the proposed methodology at 20nm FinFET leads to 42\% total power savings (clock network+DFF) compared to a FinFET-based full-swing counterpart at the same frequency (3 GHz), thanks to the dynamic power savings of low-swing clocking and 3\% power savings compared to a CMOS-based low-swing implementation running at the half frequency (1.5 GHz), thanks to the leakage power savings of FinFET technology.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhang:2015:DCP, author = "Tiansheng Zhang and Jie Meng and Ayse K. Coskun", title = "Dynamic Cache Pooling in {$3$D} Multicore Processors", journal = j-JETC, volume = "12", number = "2", pages = "14:1--14:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700247", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Resource pooling, where multiple architectural components are shared among cores, is a promising technique for improving system energy efficiency and reducing total chip area. 3D stacked multicore processors enable efficient pooling of cache resources owing to the short interconnect latency between vertically stacked layers. This article first introduces a 3D multicore architecture that provides poolable cache resources. We then propose a runtime management policy to improve energy efficiency in 3D systems by utilizing the flexible heterogeneity of cache resources. Our policy dynamically allocates jobs to cores on the 3D system while partitioning cache resources based on cache hungriness of the jobs. We investigate the impact of the proposed cache resource pooling architecture and management policy in 3D systems, both with and without on-chip DRAM. We evaluate the performance, energy efficiency, and thermal behavior for a wide range of workloads running on 3D systems. Experimental results demonstrate that the proposed architecture and policy reduce system energy-delay product (EDP) and energy-delay-area product (EDAP) by 18.8\% and 36.1\% on average, respectively, in comparison to 3D processors with static cache sizes.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Khasanvis:2015:LPH, author = "Santosh Khasanvis and K. M. Masum Habib and Mostafizur Rahman and Roger Lake and Csaba Andras Moritz", title = "Low-Power Heterogeneous Graphene Nanoribbon-{CMOS} Multistate Volatile Memory Circuit", journal = j-JETC, volume = "12", number = "2", pages = "15:1--15:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700233", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Graphene is an emerging nanomaterial believed to be a potential candidate for post-Si nanoelectronics due to its exotic properties. Recently, a new graphene nanoribbon crossbar (xGNR) device was proposed which exhibits negative differential resistance (NDR). In this article, a multistate memory design is presented that can store multiple bits in a single cell enabled by this xGNR device, called graphene nanoribbon tunneling random access memory (GNTRAM). An approach to increase the number of bits per cell is explored alternative to physical scaling to overcome CMOS SRAM limitations. A comprehensive design for quaternary GNTRAM is presented as a baseline, implemented with a heterogeneous integration between graphene and CMOS. Sources of leakage and approaches to mitigate them are investigated. This design is extensively benchmarked against 16nm CMOS SRAMs and 3T DRAM. The proposed quaternary cell shows up to 2.27$ \times $ density benefit versus 16nm CMOS SRAMs and 1.8$ \times $ versus 3T DRAM. It has comparable read performance and is power efficient up to 1.32$ \times $ during active period and 818$ \times $ during standby against high-performance SRAMs. Multistate GNTRAM has the potential to realize high-density low-power nanoscale embedded memories. Further improvements may be possible by using graphene more extensively, as graphene transistors become available in the future.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kang:2015:SEU, author = "Wang Kang and Yue Zhang and Zhaohao Wang and Jacques-Olivier Klein and Claude Chappert and Dafin{\'e} Ravelosona and Gefei Wang and Youguang Zhang and Weisheng Zhao", title = "Spintronics: Emerging Ultra-Low-Power Circuits and Systems beyond {MOS} Technology", journal = j-JETC, volume = "12", number = "2", pages = "16:1--16:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2663351", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Conventional MOS integrated circuits and systems suffer serve power and scalability challenges as technology nodes scale into ultra-deep-micron technology nodes (e.g., below 40nm). Both static and dynamic power dissipations are increasing, caused mainly by the intrinsic leakage currents and large data traffic. Alternative approaches beyond charge-only-based electronics, and in particular, spin-based devices, show promising potential to overcome these issues by adding the spin freedom of electrons to electronic circuits. Spintronics provides data non-volatility, fast data access, and low-power operation, and has now become a hot topic in both academia and industry for achieving ultra-low-power circuits and systems. The ITRS report on emerging research devices identified the magnetic tunnel junction (MTJ) nanopillar (one of the Spintronics nanodevices) as one of the most promising technologies to be part of future micro-electronic circuits. In this review we will give an overview of the status and prospects of spin-based devices and circuits that are currently under intense investigation and development across the world, and address particularly their merits and challenges for practical applications. We will also show that, with a rapid development of Spintronics, some novel computing architectures and paradigms beyond classic Von-Neumann architecture have recently been emerging for next-generation ultra-low-power circuits and systems.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Azghadi:2015:PST, author = "Mostafa Rahimi Azghadi and Saber Moradi and Daniel B. Fasnacht and Mehmet Sirin Ozdas and Giacomo Indiveri", title = "Programmable Spike-Timing-Dependent Plasticity Learning Circuits in Neuromorphic {VLSI} Architectures", journal = j-JETC, volume = "12", number = "2", pages = "17:1--17:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2658998", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Hardware implementations of spiking neural networks offer promising solutions for computational tasks that require compact and low-power computing technologies. As these solutions depend on both the specific network architecture and the type of learning algorithm used, it is important to develop spiking neural network devices that offer the possibility to reconfigure their network topology and to implement different types of learning mechanisms. Here we present a neuromorphic multi-neuron VLSI device with on-chip programmable event-based hybrid analog/digital circuits; the event-based nature of the input/output signals allows the use of address-event representation infrastructures for configuring arbitrary network architectures, while the programmable synaptic efficacy circuits allow the implementation of different types of spike-based learning mechanisms. The main contributions of this article are to demonstrate how the programmable neuromorphic system proposed can be configured to implement specific spike-based synaptic plasticity rules and to depict how it can be utilised in a cognitive task. Specifically, we explore the implementation of different spike-timing plasticity learning rules online in a hybrid system comprising a workstation and when the neuromorphic VLSI device is interfaced to it, and we demonstrate how, after training, the VLSI device can perform as a standalone component (i.e., without requiring a computer), binary classification of correlated patterns.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Graziano:2015:PVE, author = "Mariagrazia Graziano and Azzurra Pulimeno and Ruiyu Wang and Xiang Wei and Massimo Ruo Roch and Gianluca Piccinini", title = "Process Variability and Electrostatic Analysis of Molecular {QCA}", journal = j-JETC, volume = "12", number = "2", pages = "18:1--18:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2738041", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Molecular quantum-dot cellular automata (mQCA) is an emerging paradigm for nanoscale computation. Its revolutionary features are the expected operating frequencies (THz), the high device densities, the noncryogenic working temperature, and, above all, the limited power densities. The main drawback of this technology is a consequence of one of its very main advantages, that is, the extremely small size of a single molecule. Device prototyping and the fabrication of a simple circuit are limited by lack of control in the technological process [Pulimeno et al. 2013a]. Moreover, high defectivity might strongly impact the correct behavior of mQCA devices. Another challenging point is the lack of a solid method for analyzing and simulating mQCA behavior and performance, either in ideal or defective conditions. Our contribution in this article is threefold: (i) We identify a methodology based on both ab-initio simulations and post-processing of data for analyzing an mQCA system adopting an electronic point of view (we baptized this method as ``MoSQuiTo''); (ii) we assess the performance of an mQCA device (in this case, a bis- ferrocene molecule) working in nonideal conditions, using as a reference the information on fabrication-critical issues and on the possible defects that we are obtaining while conducting our own ongoing experiments on mQCA: (iii) we determine and assess the electrostatic energy stored in a bis-ferrocene molecule both in an oxidized and reduced form. Results presented here consist of quantitative information for an mQCA device working in manifold driving conditions and subjected to defects. This information is given in terms of: (a) output voltage; (b) safe operating area (SOA); (c) electrostatic energy; and (d) relation between SOA and energy, that is, possible energy reduction subject to reliability and functionality constraints. The whole analysis is a first fundamental step toward the study of a complex mQCA circuit. It gives important suggestions on possible improvements of the technological processes. Moreover, it starts an interesting assessment on the energy of an mQCA, one of the most promising features of this technology.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Le:2015:END, author = "Trong Nhan Le and Alain Pegatoquet and Olivier Berder and Olivier Sentieys and Arnaud Carer", title = "Energy-Neutral Design Framework for Supercapacitor-Based Autonomous Wireless Sensor Networks", journal = j-JETC, volume = "12", number = "2", pages = "19:1--19:??", month = aug, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2787512", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 8 18:25:16 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "To design autonomous wireless sensor networks (WSNs) with a theoretical infinite lifetime, energy harvesting (EH) techniques have been recently considered as promising approaches. Ambient sources can provide everlasting additional energy for WSN nodes and exclude their dependence on battery. In this article, an efficient energy harvesting system which is compatible with various environmental sources, such as light, heat, or wind energy, is proposed. Our platform takes advantage of double-level capacitors not only to prolong system lifetime but also to enable robust booting from the exhausting energy of the system. Simulations and experiments show that our multiple-energy-sources converter (MESC) can achive booting time in order of seconds. Although capacitors have virtual recharge cycles, they suffer higher leakage compared to rechargeable batteries. Increasing their size can decrease the system performance due to leakage energy. Therefore, an energy-neutral design framework providing a methodology to determine the minimum size of those storage devices satisfying energy-neutral operation (ENO) and maximizing system quality-of-service (QoS) in EH nodes, when using a given energy source, is proposed. Experiments validating this framework are performed on a real WSN platform with both photovoltaic cells and thermal generators in an indoor environment. Moreover, simulations on OMNET++ show that the energy storage optimized from our design framework is utilized up to 93.86\%.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Shi:2015:ISI, author = "Yiyu Shi and Takashi Sato", title = "Introduction to: Special Issue on Cross-Layer System Design", journal = j-JETC, volume = "12", number = "3", pages = "20:1--20:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2767131", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{De:2015:ASC, author = "Vivek K. De and Andrew B. Kahng and Tanay Karnik and Bao Liu and Milad Maleki and Lu Wang", title = "Application-Specific Cross-Layer Optimization Based on Predictive Variable-Latency {VLSI} Design", journal = j-JETC, volume = "12", number = "3", pages = "21:1--21:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746341", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Traditional synchronous VLSI design requires that all computations in a logic stage complete in one clock cycle. This leads to increasingly pessimistic design as technology scaling introduces increasingly significant parametric variations that result in an increasing performance variability. Alternatively, by allowing computations in a logic stage to complete in a variable number of clock cycles, variable-latency design provides relaxed timing constraints for average performance, area, and power consumption optimization. In this article, we present improved variable-latency design techniques including: (1) a generic minimum-intrusion variable-latency VLSI design paradigm, (2) a signal probability-based approximate prediction logic construction method for minimum misprediction rate at minimum cost, and (3) an application-specific cross-layer analysis methodology. Our experiments show that the proposed variable-latency design methodology on average reduces the computation latency by 26.80\%(14.65\%) at cost of 0.08\%(3.4\%) area and 0.4\%(2.2\%) energy consumption increase for the integer (floating point) unit of an open-source SPARC V8 processor LEON2 synthesized with a clock-cycle time between 1.97ns(3.49ns) and 5.96ns(13.74ns) based on the 45nm Nangate open cell library, while an automotive application-specific design further achieves an average latency reduction of 41.8\%.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Patnaik:2015:PPC, author = "Milan Patnaik and Chidhambaranathan R. and Chirag Garg and Arnab Roy and V. R. Devanathan and Shankar Balachandran and V. Kamakoti", title = "{ProWATCh}: a Proactive Cross-Layer Workload-Aware Temperature Management Framework for Low-Power Chip Multi-Processors", journal = j-JETC, volume = "12", number = "3", pages = "22:1--22:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2753762", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With the increase in process variations and diversity in workloads, it is imperative to holistically explore optimization techniques for power and temperature from the circuit layer right up to the compiler/ operating system (OS) layer. This article proposes one such holistic technique, called proactive workload aware temperature management framework for low-power chip multi-processors (ProWATCh). At the compiler level ProWATCh includes two techniques: (1) a novel compiler design for estimating the architectural parameters of a task at compile time; and (2) a model-based technique for dynamic estimation of architectural parameters at runtime. At the OS level ProWATCh integrates two techniques: (1) a workload- and temperature-aware process manager for dynamic distribution of tasks to different cores; and (2) a model predictive control-based task scheduler for generating the efficient sequence of task execution. At the circuit level ProWATCh implements either of two techniques: (1) a workload-aware voltage manager for dynamic supply and body bias voltage assignment for a given frequency in processors that support adaptive body bias (ABB); or (2) a workload-aware frequency governor for efficient assignment of upper and lower frequency bounds for frequency scaling in processors that do not support an ABB. Employing ProWATCh (with voltage manager) on an ABB-compatible 3D OpenSPARC architecture using MiBench benchmarks resulted in an average 18\% (19C) reduction in peak temperature. Evaluating ProWATCh on an existing quad-core Intel Corei7 processor with frequency governor alone (as the processor does not support an ABB interface) resulted in 10\% (8C) reduction in peak temperature when compared to what was obtained using the native Linux 3.0 completely fair scheduler (CFS). To study the effectiveness of the proposed framework across benchmark suites, ProWATCh was evaluated on a quad-core Intel Corei7 processor using CPU SPEC 2006 benchmarks which resulted in 7C reduction in peak temperature as compared to the native Linux 3.0 CFS.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhao:2015:STD, author = "Chenyuan Zhao and Bryant T. Wysocki and Yifang Liu and Clare D. Thiem and Nathan R. McDonald and Yang Yi", title = "Spike-Time-Dependent Encoding for Neuromorphic Processors", journal = j-JETC, volume = "12", number = "3", pages = "23:1--23:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2738040", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents our research towards developing novel and fundamental methodologies for data representation using spike-timing-dependent encoding. Time encoding efficiently maps a signal's amplitude information into a spike time sequence that represents the input data and offers perfect recovery for band-limited stimuli. In this article, we pattern the neural activities across multiple timescales and encode the sensory information using time-dependent temporal scales. The spike encoding methodologies for autonomous classification of time-series signatures are explored using near-chaotic reservoir computing. The proposed spiking neuron is compact, low power, and robust. A hardware implementation of these results is expected to produce an agile hardware implementation of time encoding as a signal conditioner for dynamical neural processor designs.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Barke:2015:CLA, author = "Martin Barke and Ulf Schlichtmann", title = "A Cross-Layer Approach to Measure the Robustness of Integrated Circuits", journal = j-JETC, volume = "12", number = "3", pages = "24:1--24:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2743022", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The demands on system robustness and its immunity against perturbations are getting increasingly important. Nearly everybody has an intuitive understanding of what robustness means, but there is no proper way how to measure robustness of integrated circuits already during the design phase. Therefore, a general cross-layer robustness model and methods to quantitatively measure robustness are presented. Moreover, these methods are refined to predict the robustness against degradation of digital circuits due to aging effects.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhuo:2015:CLA, author = "Cheng Zhuo and Houle Gan and Wei-Kai Shih and Alaeddin A. Aydiner", title = "A Cross-Layer Approach for Early-Stage Power Grid Design and Optimization", journal = j-JETC, volume = "12", number = "3", pages = "25:1--25:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700246", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Power integrity has become increasingly important for sub-32nm designs. Many prior works have discussed power grid design and optimization in the post-layout stage, when design change is inevitably expensive and difficult. In contrast, during the early stage of a development cycle, designers have more flexibility to improve the design quality. However, there are several fundamental challenges at early stage when the design database is not complete, including extraction, modeling, and optimization. This article tackles these fundamental issues of early-stage power grid design from architecture to layout. The proposed methods have been silicon validated on 32nm on-market chips and successfully applied to a 22nm design for its early-stage power grid design. The findings from such practices reveal that, for sub-32nm chips, an intrinsic on-die capacitance and power gate scheme may have more significant impact than expected on power integrity, and needs to be well addressed at early stage.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lee:2015:REE, author = "Jinho Lee and Kyungsu Kang and Kiyoung Choi", title = "{REDELF}: an Energy-Efficient Deadlock-Free Routing for {$3$D} {NoCs} with Partial Vertical Connections", journal = j-JETC, volume = "12", number = "3", pages = "26:1--26:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2751560", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "3D integrated circuits (3D ICs) using through-silicon vias (TSVs) allow to envision the stacking of dies with different functions and technologies, using as an interconnect backbone a 3D network-on-chip (NoC). However, partial vertical connection in 3D NoCs seems unavoidable because of the large overhead of TSV itself (e.g., large footprint, low fabrication yield, additional fabrication processes) as well as the heterogeneity in dimension. This article proposes an energy-efficient deadlock-free routing algorithm for 3D mesh topologies where vertical connections partially exist. By introducing some rules for selecting elevators (i.e., vertical links between dies), the routing algorithm can eliminate the dedicated virtual channel requirement. In this article, the rules themselves as well as the proof of deadlock freedom are given. By eliminating the virtual channels for deadlock avoidance, the proposed routing algorithm reduces the energy consumption by 38.9\% compared to a conventional routing algorithm. When the virtual channel is used for reducing the head-of-line blocking, the proposed routing algorithm increases performance by up to 23.1\% and 6.9\% on average.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zoni:2015:MDP, author = "Davide Zoni and William Fornaciari", title = "Modeling {DVFS} and Power-Gating Actuators for Cycle-Accurate {NoC}-Based Simulators", journal = j-JETC, volume = "12", number = "3", pages = "27:1--27:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2751561", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Networks-on-chip (NoCs) are a widely recognized viable interconnection paradigm to support the multi-core revolution. One of the major design issues of multicore architectures is still the power, which can no longer be considered mainly due to the cores, since the NoC contribution to the overall energy budget is relevant. To face both static and dynamic power while balancing NoC performance, different actuators have been exploited in literature, mainly dynamic voltage frequency scaling (DVFS) and power gating. Typically, simulation-based tools are employed to explore the huge design space by adopting simplified models of the components. As a consequence, the majority of state-of-the-art on NoC power-performance optimization do not accurately consider timing and power overheads of actuators, or (even worse) do not consider them at all, with the risk of overestimating the benefits of the proposed methodologies. This article presents a simulation framework for power-performance analysis of multicore architectures with specific focus on the NoC. It integrates accurate power gating and DVFS models encompassing also their timing and power overheads. The value added of our proposal is manyfold: (i) DVFS and power gating actuators are modeled starting from SPICE-level simulations; (ii) such models have been integrated in the simulation environment; (iii) policy analysis support is plugged into the framework to enable assessment of different policies; (iv) a flexible GALS ( globally asynchronous locally synchronous ) support is provided, covering both handshake and FIFO re-synchronization schemas. To demonstrate both the flexibility and extensibility of our proposal, two simple policies exploiting the modeled actuators are discussed in the article.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2015:GPF, author = "Xianmin Chen and Niraj K. Jha", title = "{gem5-PVT}: a Framework for {FinFET} System Simulation under {PVT} Variations", journal = j-JETC, volume = "12", number = "3", pages = "28:1--28:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2755564", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "FinFET has begun replacing CMOS at the 22nm technology node and beyond. Compared to planar CMOS, FinFET has a higher on-current and lower leakage due to its double-gate structure. A FinFET-based system simulation framework can be very helpful to system architects for early-stage design-space exploration using this new technology. However, such a simulator does not exist. We fill this gap by presenting the details of one such simulation framework, called gem5-PVT, that we have developed. Our simulation framework combines and extends existing lower-level FinFET simulators to support timing, power, and thermal studies of FinFET-based chip multiprocessor systems under process, voltage, and temperature (PVT) variations. It uses a bottom-up modeling approach based on logic/memory cell libraries that have been very accurately characterized using TCAD device simulation. This allows accuracy to bubble up to the system level. The framework is modular and automated, hence enables system designers the flexibility to evaluate various system implementations. It is currently targeted at the 22nm FinFET technology. We report results for two case studies to demonstrate its usefulness. One study shows that more than 62.1$ \times $ system-level leakage reduction, at the same performance, is possible when using a particular FinFET logic style. Another study characterizes core-to-core frequency and power variations that result from underlying PVT variations and compares the effectiveness of variation-aware scheduling schemes.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bahreini:2015:MMS, author = "Tayebeh Bahreini and Naser Mohammadzadeh", title = "An {MINLP} Model for Scheduling and Placement of Quantum Circuits with a Heuristic Solution Approach", journal = j-JETC, volume = "12", number = "3", pages = "29:1--29:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2766452", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent works on quantum physical design have pushed the scheduling and placement of quantum circuit into their prominent positions. In this article, a mixed integer nonlinear programming model is proposed for the placement and scheduling of quantum circuits in such a way that latency is minimized. The proposed model determines locations of gates and the sequence of operations. The proposed model is proved reducible to a quadratic assignment problem which is a well-known NP-complete combinatorial optimization problem. Since it is impossible to find the optimal solution of this NP-complete problem for large quantum circuits within a reasonable amount of time, a metaheuristic solution method is developed for the proposed model. Some experiments are conducted to evaluate the performance of the developed solution approach. Experimental results show that the proposed approach improves average latency by about 24.09\% for the attempted benchmarks.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Rahman:2015:NVR, author = "Mostafizur Rahman and Santosh Khasanvis and Csaba Andras Moritz", title = "Nanowire Volatile {RAM} as an Alternative to {SRAM}", journal = j-JETC, volume = "12", number = "3", pages = "30:1--30:??", month = sep, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2714567", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 22 17:30:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Maintaining benefits of CMOS technology scaling is becoming challenging, primarily due to increased manufacturing complexities and unwanted passive power dissipations. This is particularly challenging in SRAM, where manufacturing precision and leakage power control are critical issues. To alleviate these challenges, we proposed a novel volatile memory alternative to SRAM called nanowire volatile RAM (NWRAM). Due to NWRAM's regular grid-based layout and innovative circuit style, manufacturing complexities are reduced and, at the same time, considerable benefits are attained in terms of performance and leakage power reduction. In this article we elaborate NWRAM's circuit aspects and manufacturability, and quantify benefits at 16nm technology node through simulation against state-of-the-art 6T-SRAM and gridded 8T-SRAM designs. Our results show that when lower bounds in design rules are considered, 10T-NWRAM's read and write time are 1.38x and 2x faster, and the leakage power is 14x better in comparison to high-performance 6T-SRAM. Similarly the 10T-NWRAM achieves 1.3x and 1.9x read and write performance, and 35x leakage power improvements compared to high-performance 8T-SRAM. 10T-NWRAM's density is comparable to 6T-SRAM and 8T-SRAM for lower bounds, but exhibits higher active power in similar comparisons. This article details all benchmarking results and provides thorough analysis of NWRAM's evaluations.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Khouzani:2016:FEP, author = "Hoda Aghaei Khouzani and Yuan Xue and Chengmo Yang", title = "Fully Exploiting {PCM} Write Capacity Within Near Zero Cost Through Segment-Based Page Allocation", journal = j-JETC, volume = "12", number = "4", pages = "31:1--31:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856423", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Improving the endurance of phase change memory (PCM) is a fundamental issue when PCM technology is considered as an alternative to main memory usage. Existing wear-leveling techniques overcome this challenge through constantly remapping hot virtual pages, thus engendering a fair amount of extra write operations to PCM and imposing considerable performance and energy overhead. Our observation is that it is unnecessary to fully balance the accesses to different physical page frames during the execution of each process. Instead, since endurance is a lifetime factor, the hot virtual pages of different processes can be mapped to different physical pages in the PCM. Leveraging this property, we develop a wear-resistant page allocation algorithm, which exploits the diverse write characteristics of different program segments to improve PCM write endurance within almost no extra remapping cost in terms of energy and performance. The results of experiments conducted based on SPEC benchmarks show that the proposed technique can prolong PCM lifetime by hundreds of times within nearly zero searching and remapping overhead.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Layer:2016:RSP, author = "Christophe Layer and Laurent Becker and Kotb Jabeur and Sylvain Claireux and Bernard Dieny and Guillaume Prenat and Gregory {Di Pendina} and Stephane Gros and Pierre Paoli and Virgile Javerliac and Fabrice Bernard-Granger and Loic Decloedt", title = "Reducing System Power Consumption Using Check-Pointing on Nonvolatile Embedded Magnetic Random Access Memories", journal = j-JETC, volume = "12", number = "4", pages = "32:1--32:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2876507", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The most widely used embedded memory technology, static random access memory (SRAM), is heading toward scaling problems in advanced technology nodes due to the leakage currents caused by the quantum tunneling effect. As an alternative, spin-transfer torque magnetic RAM (STT-MRAM) technology shows comparable performance in terms of speed and power consumption and much better performance in terms of density and leakage. Moreover, MRAM brings up new paradigms in system design thanks to its inherent nonvolatility, which allows the definition of new instant-on/off policies and leakage current optimization. Based on our compact model, we have developed a fully characterized system-on-chip from the basic cell up to the system architecture in a 40nm LP hybrid CMOS/magnetic process. Through simulations, first we demonstrate that STT-MRAM is a candidate for the memory part of embedded systems, and second we implement a check-pointing methodology based on the regular interrupt routines of a processor to enable a fast power on and off functionality. Using a synthetic benchmark developed in high-level programming languages intended to be representative of integer system performance, our method shows that having MRAM instead of SRAM in an embedded design brings up important energy savings. The influence of the check-pointing routine on power consumption is finally evaluated with regard to various shutdown and restart behaviors.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wu:2016:RCA, author = "Chengwen Wu and Guangyan Zhang and Keqin Li", title = "Rethinking Computer Architectures and Software Systems for Phase-Change Memory", journal = j-JETC, volume = "12", number = "4", pages = "33:1--33:40", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2893186", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With dramatic growth of data and rapid enhancement of computing powers, data accesses become the bottleneck restricting overall performance of a computer system. Emerging phase-change memory (PCM) is byte-addressable like DRAM, persistent like hard disks and Flash SSD, and about four orders of magnitude faster than hard disks or Flash SSDs for typical file system I/Os. The maturity of PCM from research to production provides a new opportunity for improving the I/O performance of a system. However, PCM also has some weaknesses, for example, long write latency, limited write endurance, and high active energy. Existing processor cache systems, main memory systems, and online storage systems are unable to leverage the advantages of PCM, and/or to mitigate PCM's drawbacks. The reason behind this incompetence is that they are designed and optimized for SRAM, DRAM memory, and hard drives, respectively, instead of PCM memory. There have been some efforts concentrating on rethinking computer architectures and software systems for PCM. This article presents a detailed survey and review of the areas of computer architecture and software systems that are oriented to PCM devices. First, we identify key technical challenges that need to be addressed before this memory technology can be leveraged, in the form of processor cache, main memory, and online storage, to build high-performance computer systems. Second, we examine various designs of computer architectures and software systems that are PCM aware. Finally, we obtain several helpful observations and propose a few suggestions on how to leverage PCM to optimize the performance of a computer system.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Deb:2016:RSS, author = "Arighna Deb and Debesh K. Das and Hafizur Rahaman and Robert Wille and Rolf Drechsler and Bhargab B. Bhattacharya", title = "Reversible Synthesis of Symmetric Functions with a Simple Regular Structure and Easy Testability", journal = j-JETC, volume = "12", number = "4", pages = "34:1--34:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2894757", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we introduce a novel method of synthesizing symmetric Boolean functions with reversible logic gates. In contrast to earlier approaches, the proposed technique deploys a simple, regular, and cascaded structure consisting of an array of Peres and CNOT gates, which results in significant reduction with respect to the quantum cost. However, the number of circuit inputs may increase slightly when such cascades are used. In order to reduce their number, we next propose a postsynthesis optimization phase that allows judicious reuse of circuit lines. In addition to offering a cost-effective synthesis methodology, the proposed reversible logic structure supports elegant testability properties. With respect to all single or partial missing gate faults (SMGFs and PMGFs), or repeated gate faults (RGFs) in such an n -input circuit module, we show that it admits a universal test set of constant cardinality (=3) for any value of n. Thus, considering both the cost and testability issues, this approach provides a superior option for synthesizing symmetric functions compared to existing designs.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2016:NPM, author = "Qian Wang and Yongtae Kim and Peng Li", title = "Neuromorphic Processors with Memristive Synapses: Synaptic Interface and Architectural Exploration", journal = j-JETC, volume = "12", number = "4", pages = "35:1--35:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2894756", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Due to their nonvolatile nature, excellent scalability, and high density, memristive nanodevices provide a promising solution for low-cost on-chip storage. Integrating memristor-based synaptic crossbars into digital neuromorphic processors (DNPs) may facilitate efficient realization of brain-inspired computing. This article investigates architectural design exploration of DNPs with memristive synapses by proposing two synapse readout schemes. The key design tradeoffs involving different analog-to-digital conversions and memory accessing styles are thoroughly investigated. A novel storage strategy optimized for feedforward neural networks is proposed in this work, which greatly reduces the energy and area cost of the memristor array and its peripherals.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Biswas:2016:IFW, author = "Kalyan Biswas and Angsuman Sarkar and Chandan Kumar Sarkar", title = "Impact of Fin Width Scaling on {RF}\slash Analog Performance of Junctionless Accumulation-Mode Bulk {FinFET}", journal = j-JETC, volume = "12", number = "4", pages = "36:1--36:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2903143", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, the RF and analog performance of junctionless accumulation-mode bulk FinFETs is analyzed by employing the variation of fin width so that it can be used as a high-efficiency RF integrated circuit design. The RF/analog performance evaluation has been carried out using the ATLAS 3D device simulator in terms of evaluation of figure-of-merits metrics such as transconductance (g$_m$ ), gate-to-source/drain capacitances (C$_{gg}$ ), cutoff frequency (f$_T$ ), and maximum frequency of oscillation (f$_{max}$ ). Apart from RF/analog performance investigation, the variation of ON-current to OFF-current ratio (I$_{ON}$ /I$_{OFF}$ ) and transconductance generation factor (g$_m$ /I$_{ds}$ ) have also been carried out. From this study, it is observed that smaller fin width of the device improves its performance.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2016:AMS, author = "Yi-Hang Chen and Jian-Yu Chen and Juinn-Dar Huang", title = "Area Minimization Synthesis for Reconfigurable Single-Electron Transistor Arrays with Fabrication Constraints", journal = j-JETC, volume = "12", number = "4", pages = "37:1--37:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2906360", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Power dissipation has become a pressing issue of concern in the designs of most electronic system as fabrication processes enter even deeper submicron regions. More specifically, leakage power plays a dominant role in system power dissipation. An emerging circuit design style, the reconfigurable single-electron transistor (SET) array, has been proposed for continuing Moore's Law due to its ultra-low leakage power consumption. Recently, several works have been proposed to address the issues related to automated synthesis for the reconfigurable SET array. Nevertheless, all of those existing approaches consider mandatory fabrication constraints of SET array merely in late synthesis stages. In this article, we propose a synthesis algorithm, featuring input-variable ordering and dynamic product term ordering, for area minimization. The fabrication constraints are taken into account at every synthesis stage of proposed flow to guarantee better synthesis outcomes. We also develop a simulated annealing-based postprocess to find a proper phase assignment of each input variable for further area reduction. Experimental results show that our new methodology can achieve up to 29\% area reduction as compared to existing state-of-the-art techniques.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kim:2016:CAP, author = "Moon Seok Kim and William Cane-Wissing and Xueqing Li and Jack Sampson and Suman Datta and Sumeet Kumar Gupta and Vijaykrishnan Narayanan", title = "Comparative Area and Parasitics Analysis in {FinFET} and Heterojunction Vertical {TFET} Standard Cells", journal = j-JETC, volume = "12", number = "4", pages = "38:1--38:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2914790", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Vertical tunnel field-effect transistors (VTFETs) have been extensively explored to overcome the scaling limits and to improve on-current (I$_{ON}$) compared to standard lateral device structures for the future technologies. The benefits in terms of reduced footprint, high I$_{ON}$ and feasibility of fabrication have been demonstrated in several works. Among various VTFETs, the asymmetric heterojunction vertical tunnel FETs (HVTFETs) have emerged as one of the promising alternatives to standard transistors for low-voltage applications. However, while such device-level benefits without parasitics have been widely investigated, logic-gate design with parasitics and layout implications are not clear. In this article, we investigate and compare the layouts and parasitic capacitances and resistances of HVTFETs with FinFETs. Due to the vertical device structure of HVTFETs, a smaller footprint is observed compared to FinFETs in cells with small fan-in. However, for high fan-in cells, HVTFETs exhibit area overheads due to infeasibility of contact sharing in parallel and series transistors. These area overheads also lead to approximately 48\% higher parasitic capacitance and resistance compared to FinFETs when the number of parallel and series connections increases. Further, in order to analyze the impact of parasitics, we modeled the analytical parasitics in SPICE. The models for both HVTFETs and FinFETs with parasitics were used to simulate a 15-stage inverter-based ring oscillator (RO) in order to compare the delay and energy. Our simulation results clearly show that HVTFETs exhibit less delay at a $ V_{DD} < 0.45 V$ and higher energy efficiency for $ V_{DDs}$ in the range of 0.3V--0.7V, albeit at the cost of 8\% performance degradation.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ahsan:2016:DMQ, author = "Muhammad Ahsan and Rodney {Van Meter} and Jungsang Kim", title = "Designing a Million-Qubit Quantum Computer Using a Resource Performance Simulator", journal = j-JETC, volume = "12", number = "4", pages = "39:1--39:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2830570", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The optimal design of a fault-tolerant quantum computer involves finding an appropriate balance between the burden of large-scale integration of noisy components and the load of improving the reliability of hardware technology. This balance can be evaluated by quantitatively modeling the execution of quantum logic operations on a realistic quantum hardware containing limited computational resources. In this work, we report a complete performance simulation software tool capable of (1) searching the hardware design space by varying resource architecture and technology parameters, (2) synthesizing and scheduling a fault-tolerant quantum algorithm within the hardware constraints, (3) quantifying the performance metrics such as the execution time and the failure probability of the algorithm, and (4) analyzing the breakdown of these metrics to highlight the performance bottlenecks and visualizing resource utilization to evaluate the adequacy of the chosen design. Using this tool, we investigate a vast design space for implementing key building blocks of Shor's algorithm to factor a 1,024-bit number with a baseline budget of 1.5 million qubits. We show that a trapped-ion quantum computer designed with twice as many qubits and one-tenth of the baseline infidelity of the communication channel can factor a 2,048-bit integer in less than 5 months.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Arabzadeh:2016:QLS, author = "Mona Arabzadeh and Mahboobeh Houshmand and Mehdi Sedighi and Morteza Saheb Zamani", title = "Quantum-Logic Synthesis of {Hermitian} Gates", journal = j-JETC, volume = "12", number = "4", pages = "40:1--40:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2794263", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, the problem of synthesizing a general Hermitian quantum gate into a set of primary quantum gates is addressed. To this end, an extended version of the Jacobi approach for calculating the eigenvalues of Hermitian matrices in linear algebra is considered as the basis of the proposed synthesis method. The quantum circuit synthesis method derived from the Jacobi approach and its optimization challenges are described. It is shown that the proposed method results in multiple-control rotation gates around the y axis, multiple-control phase shift gates, multiple-control NOT gates, and a middle diagonal Hermitian matrix, which can be synthesized to multiple-control Pauli Z gates. Using the proposed approach, it is shown how multiple-control U gates, where U is a single-qubit Hermitian quantum gate, can be implemented using a linear number of elementary gates in terms of circuit lines with the aid of one auxiliary qubit in an arbitrary state.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Soeken:2016:ELB, author = "Mathias Soeken and Robert Wille and Oliver Keszocze and D. Michael Miller and Rolf Drechsler", title = "Embedding of Large {Boolean} Functions for Reversible Logic", journal = j-JETC, volume = "12", number = "4", pages = "41:1--41:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2786982", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reversible logic represents the basis for many emerging technologies and has recently been intensively studied. However, most of the Boolean functions of practical interest are irreversible and must be embedded into a reversible function before they can be synthesized. Thus far, an optimal embedding is guaranteed only for small functions, whereas a significant overhead results when large functions are considered. We study this issue in this article. We prove that determining an optimal embedding is coNP-hard already for restricted cases. Then, we propose heuristic and exact methods for determining both the number of additional lines and a corresponding embedding. For the approaches, we considered sum of products and binary decision diagrams as function representations. Experimental evaluations show the applicability of the approaches for large functions. Consequently, the reversible embedding of large functions is enabled as a precursor to subsequent synthesis.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tang:2016:DPM, author = "Aoxiang Tang and Xun Gao and Lung-Yen Chen and Niraj K. Jha", title = "Delay\slash Power Modeling and Optimization of {FinFET} Circuit Modules under {PVT} Variations: Observing the Trends between the 22nm and 14nm Technology Nodes", journal = j-JETC, volume = "12", number = "4", pages = "42:1--42:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2795231", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The semiconductor industry has moved to FinFETs because of their superior ability to mitigate short-channel effects relative to CMOS. Thus, good FinFET delay and power models are urgently needed to facilitate FinFET IC design at the upcoming technology nodes. Another urgent problem that needs to be addressed with continued technology scaling is how to analyze circuit performance and power consumption under process, voltage, and temperature (PVT) variations. Such variations arise due to limitations of lithography that lead to variations in the physical dimensions of the device or due to environmental variations. In this article, we propose a delay/power modeling framework for analysis of FinFET logic circuits under PVT variations. We present models for FinFET logic gates and three FinFET SRAM cells. We use GenFin, which is a genetic algorithm based statistical circuit-level delay/power optimizer, to produce the models for functional units (FUs) employed in a processor. We compare the impact of PVT variations at the 22nm and 14nm FinFET technology nodes. We evaluate cache performance for various cache capacities and temperatures as well as that of FUs. Our device simulation results show that the $ 3 \sigma / \mu $ spread for 14nm circuits is, on average, 38.5\% higher in dynamic power and 21.4\% higher in logarithm of leakage power relative to 22nm FinFET circuits. However, the delay spread depends on the circuit.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chaudhuri:2016:ULL, author = "Sourindra M. Chaudhuri and Niraj K. Jha", title = "Ultra-Low-Leakage and High-Performance Logic Circuit Design Using Multiparameter Asymmetric {FinFETs}", journal = j-JETC, volume = "12", number = "4", pages = "43:1--43:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2832913", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recently, multigate field-effect transistors have started replacing traditional planar MOSFETs to keep pace with Moore's Law in deep submicron technology. Among different multigate transistors, FinFETs have become the preferred choice of the semiconductor industry owing to low fabrication cost, superior performance, lower leakage, and design flexibility. The back and front gates of a FinFET can either be shorted or remain independent, leading to two modes of operation: Shorted-Gate (SG) and Independent-Gate (IG). For a given mode of operation, the physical parameters of the FinFET can either be symmetric or asymmetric in nature. In this article, for the first time, we analyze multiparameter asymmetric SG FinFETs and illustrate their potential for implementing logic gates and circuits that are both ultra-low-leakage and high-performance simultaneously. We restrict this work to SG devices because IG FinFETs (symmetric/asymmetric) suffer from severely degraded on-current, which makes them unattractive for high-performance designs. We first compare head-to-head all viable single- and multiparameter symmetric/asymmetric SG FinFETs. Among all such FinFETs, the traditional SG (which are symmetric in nature), Asymmetric Workfunction Shorted-Gate (AWSG), and Asymmetric Workfunction-Underlap Shorted-Gate (AWUSG) FinFETs show the most promise. We characterize these devices under process variations in gate length $ (L_G) $, fin thickness $ (T_{SI}) $, gate-oxide thickness $ (T_{OX}) $, gate underlap $ (L_{UN}) $, and gate-workfunction $ (\Phi_G) $ as well as supply voltage $ (V_{DD}) $ variations, followed by a gate-level leakage/delay analysis at different temperatures. Although AWSG FinFETs consume very low leakage power, they do suffer from performance degradation relative to SG FinFETs. Similarly, our study reveals that no other single-parameter asymmetric FinFET provides a good combination of low-power and high-performance design. We show that gates/circuits based on AWUSG FinFETs are faster, yet consume much less leakage power and less area than gates/circuits based on traditional SG FinFETs. We observe 53.4\% (30.2\%) maximum (average) reduction in total power at temperature $ T = 348 $K while meeting the same delay constraint, with 14.2\% (13.5\%) reduction in area for AWUSG circuits relative to SG circuits. At $ T = 373 $K, we see 68.6\% (46.9\%) maximum (average) reduction in total power.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Beuningen:2016:PPR, author = "Anja {Von Beuningen} and Luca Ramini and Davide Bertozzi and Ulf Schlichtmann", title = "{PROTON+}: a Placement and Routing Tool for {$3$D} Optical Networks-on-Chip with a Single Optical Layer", journal = j-JETC, volume = "12", number = "4", pages = "44:1--44:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2830716", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Optical Networks-on-Chip (ONoCs) are a promising technology to overcome the bottleneck of low bandwidth of electronic Networks-on-Chip. Recent research discusses power and performance benefits of ONoCs based on their system-level design, while layout effects are typically overlooked. As a consequence, laser power requirements are inaccurately computed from the logic scheme but do not consider the layout. In this article, we propose PROTON+, a fast tool for placement and routing of 3D ONoCs minimizing the total laser power. Using our tool, the required laser power of the system can be decreased by up to 94\% compared to a state-of-the-art manually designed layout. In addition, with the help of our tool, we study the physical design space of ONoC topologies. For this purpose, topology synthesis methods (e.g., global connectivity and network partitioning) as well as different objective function weights are analyzed in order to minimize the maximum insertion loss and ultimately the system's laser power consumption. For the first time, we study optimal positions of memory controllers. A comparison of our algorithm to a state-of-the-art placer for electronic circuits shows the need for a different set of tools custom-tailored for the particular requirements of optical interconnects.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Dehghani:2016:NAO, author = "Abbas Dehghani and Kamal Jamshidi", title = "A Novel Approach to Optimize Fault-Tolerant Hybrid Wireless Network-on-Chip Architectures", journal = j-JETC, volume = "12", number = "4", pages = "45:1--45:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2814572", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Wireless Network-on-Chip (WNoC) architectures have emerged as a promising interconnection infrastructure to address the performance limitations of traditional wire-based multihop NOCs. Nevertheless, the WNoC systems encounter high failure rates due to problems pertaining to integration and manufacturing of wireless interconnection in nano-domain technology. As a result, the permanent failures may lead to the formation of any shape of faulty regions in the interconnection network, which can break down the whole system. This issue is not investigated in previous studies on WNoC architectures. Our solution advocates the adoption of communication structures with both node and link on disjoint paths. On the other hand, the imposed costs of WNoC design must be reasonable. Hence, a novel approach to design an optimized fault-tolerant hybrid hierarchical WNoC architecture for enhancing performance as well as minimizing system costs is proposed. The experimental results indicate that the robustness of this newly proposed design is significantly enhanced in comparison with its the fault-tolerant wire-based counterparts in the presence of various faulty regions under both synthetic and application-specific traffic patterns.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mittal:2016:SAT, author = "Sparsh Mittal", title = "A Survey of Architectural Techniques for Near-Threshold Computing", journal = j-JETC, volume = "12", number = "4", pages = "46:1--46:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2821510", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Dec 1 09:26:07 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Energy efficiency has now become the primary obstacle in scaling the performance of all classes of computing systems. Low-voltage computing, specifically, near-threshold voltage computing (NTC), which involves operating the transistor very close to and yet above its threshold voltage, holds the promise of providing many-fold improvement in energy efficiency. However, use of NTC also presents several challenges such as increased parametric variation, failure rate, and performance loss. This article surveys several recent techniques that aim to offset these challenges for fully leveraging the potential of NTC. By classifying these techniques along several dimensions, we also highlight their similarities and differences. It is hoped that this article will provide insights into state-of-the-art NTC techniques to researchers and system designers and inspire further research in this field.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sinanoglu:2016:GES, author = "Ozgur Sinanoglu and Ramesh Karri", title = "Guest Editorial Special Issue on Secure and Trustworthy Computing", journal = j-JETC, volume = "13", number = "1", pages = "1:1--1:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2898433", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Das:2016:MPU, author = "Jayita Das and Kevin Scott and Sanjukta Bhanja", title = "{MRAM PUF}: Using Geometric and Resistive Variations in {MRAM} Cells", journal = j-JETC, volume = "13", number = "1", pages = "2:1--2:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2854154", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this work, we have studied two novel techniques to enhance the performance of existing geometry-based magnetoresistive RAM physically unclonable function (MRAM PUF). Geometry-based MRAM PUFs rely only on geometric variations in MRAM cells that generate preferred ground state in cells and form the basis of digital signature generation. Here we study two novel ways to improve the performance of the geometry-based PUF signature. First, we study how the choice between specific geometries can enhance the reliability of the digital signature. Using fabrications and simulations, we study how the rectangular shape in the PUF cells is more susceptible to lithography-based geometric variations than the elliptical shape of the same aspect ratio. The choice of rectangular over elliptical masks in the lithography process can therefore improve the reliability of the digital signature from PUF. Second, we present a MRAM PUF architecture and study how resistances in MRAM cells can be used to generate analog voltage output that are easier to detect if probed by an adversary. In the new PUF architecture, we have the choice between selection of rows and columns to generate unique and hard-to-predict analog voltage outputs. For a 64-bit response, the analog voltage output can range between 20 and 500 mV, making it tough for an adversary to guess over this wide range of voltages. This work ends with a discussion on the threat resilience ability of the new improved MRAM PUF to attacks from probing-, tampering-, reuse-, and simulation-based models.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bi:2016:ETB, author = "Yu Bi and Kaveh Shamsi and Jiann-Shiun Yuan and Pierre-Emmanuel Gaillardon and Giovanni {De Micheli} and Xunzhao Yin and X. Sharon Hu and Michael Niemier and Yier Jin", title = "Emerging Technology-Based Design of Primitives for Hardware Security", journal = j-JETC, volume = "13", number = "1", pages = "3:1--3:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2816818", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Hardware security concerns such as intellectual property (IP) piracy and hardware Trojans have triggered research into circuit protection and malicious logic detection from various design perspectives. In this article, emerging technologies are investigated by leveraging their unique properties for applications in the hardware security domain. Security, for the first time, will be treated as one design metric for emerging nano-architecture. Five example circuit structures including camouflaging gates, polymorphic gates, current/voltage-based circuit protectors, and current-based XOR logic are designed to show the high efficiency of silicon nanowire FETs and graphene SymFET in applications such as circuit protection and IP piracy prevention. Simulation results indicate that highly efficient and secure circuit structures can be achieved via the use of non-CMOS devices.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Iyengar:2016:SPS, author = "Anirudh Iyengar and Swaroop Ghosh and Kenneth Ramclam and Jae-Won Jang and Cheng-Wei Lin", title = "Spintronic {PUFs} for Security, Trust, and Authentication", journal = j-JETC, volume = "13", number = "1", pages = "4:1--4:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2809781", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We propose spintronic physically unclonable functions (PUFs) to exploit security-specific properties of domain wall memory (DWM) for security, trust, and authentication. We note that the nonlinear dynamics of domain walls (DWs) in the physical magnetic system is an untapped source of entropy that can be leveraged for hardware security. The spatial and temporal randomness in the physical system is employed in conjunction with microscopic and macroscopic properties such as stochastic DW motion, stochastic pinning/depinning, and serial access to realize novel relay-PUF and memory-PUF designs. The proposed PUFs show promising results ($ \approx $50\% interdie Hamming distance (HD) and 10\% to 20\% intradie HD) in terms of randomness, stability, and resistance to attacks. We have investigated noninvasive attacks, such as machine learning and magnetic field attack, and have assessed the PUFs resilience.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Vatajelu:2016:SMB, author = "Elena Ioana Vatajelu and Giorgio {Di Natale} and Mario Barbareschi and Lionel Torres and Marco Indaco and Paolo Prinetto", title = "{STT--MRAM}-Based {PUF} Architecture Exploiting Magnetic Tunnel Junction Fabrication-Induced Variability", journal = j-JETC, volume = "13", number = "1", pages = "5:1--5:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2790302", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Physically Unclonable Functions (PUFs) are emerging cryptographic primitives used to implement low-cost device authentication and secure secret key generation. Weak PUF's (i.e., devices able to generate a single signature or to deal with a limited number of challenges) are widely discussed in literature. One of the most investigated solutions today is based on SRAMs. However, the rapid development of low-power, high-density, high-performance SoCs has pushed the embedded memories to their limits and opened the field to the development of emerging memory technologies. The Spin-Transfer-Torque Magnetic Random Access Memory (STT-MRAM) has emerged as a promising choice for embedded memories due to its reduced read/write latency and high CMOS integration capability. In this article, we propose an innovative PUF design based on STT-MRAM memory. We exploit the high variability affecting the electrical resistance of the Magnetic Tunnel Junction (MTJ) device in anti-parallel magnetization. We will demonstrate that the proposed solution is robust, unclonable, and unpredictable.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Quadir:2016:SCS, author = "Shahed E. Quadir and Junlin Chen and Domenic Forte and Navid Asadizanjani and Sina Shahbazmohamadi and Lei Wang and John Chandy and Mark Tehranipoor", title = "A Survey on Chip to System Reverse Engineering", journal = j-JETC, volume = "13", number = "1", pages = "6:1--6:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2755563", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The reverse engineering (RE) of electronic chips and systems can be used with honest and dishonest intentions. To inhibit RE for those with dishonest intentions (e.g., piracy and counterfeiting), it is important that the community is aware of the state-of-the-art capabilities available to attackers today. In this article, we will be presenting a survey of RE and anti-RE techniques on the chip, board, and system levels. We also highlight the current challenges and limitations of anti-RE and the research needed to overcome them. This survey should be of interest to both governmental and industrial bodies whose critical systems and intellectual property (IP) require protection from foreign enemies and counterfeiters who possess advanced RE capabilities.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Castro:2016:FVB, author = "Stephan {De Castro} and Jean-Max Dutertre and Bruno Rouzeyre and Giorgio {Di Natale} and Marie-Lise Flottes", title = "Frontside Versus Backside Laser Injection: a Comparative Study", journal = j-JETC, volume = "13", number = "1", pages = "7:1--7:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2845999", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The development of cryptographic devices was followed by the development of so-called implementation attacks, which are intended to retrieve secret information exploiting the hardware itself. Among these attacks, fault attacks can be used to disturb the circuit while performing a computation to retrieve the secret. Among possible means of injecting a fault, laser beams have proven to be accurate and powerful. The laser can be used to illuminate the circuit either from its frontside (i.e., where metal interconnections are first encountered) or from the backside (i.e., through the substrate). Historically, frontside injection was preferred because it does not require the die to be thinned. Nevertheless, due to the increasing integration of metal layers in modern technologies, frontside injections do not allow targeting of any desired location. Indeed, metal lines act as mirrors, and they reflect and refract most of the energy provided by the laser beam. Conversely, backside injections, although more difficult to set up, allow an increase of the resolution of the target location and remove the drawbacks of the frontside technique. This article compares experimental results from frontside and backside fault injections. The effectiveness of the two techniques is measured in terms of exploitable errors on an AES circuit (i.e., errors that can be used to extract the value of the secret key used during the encryption process). We will show, conversely to what is generally assumed, that frontside injection can provide even better results compared to backside injection, especially for low-cost beams with a large laser spot.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Barenghi:2016:FBS, author = "Alessandro Barenghi and Guido M. Bertoni and Luca Breveglieri and Gerardo Pelosi and Stefano Sanfilippo and Ruggero Susella", title = "A Fault-Based Secret Key Retrieval Method for {ECDSA}: Analysis and Countermeasure", journal = j-JETC, volume = "13", number = "1", pages = "8:1--8:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2767132", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Elliptic curve cryptosystems proved to be well suited for securing systems with constrained resources like embedded and portable devices. In a fault-based attack, errors are induced during the computation of a cryptographic primitive, and the results are collected to derive information about the secret key safely stored in the device. We introduce a novel attack methodology to recover the secret key employed in implementations of the Elliptic Curve Digital Signature Algorithm. Our attack exploits the information leakage induced when altering the execution of the modular arithmetic operations used in the signature primitive and does not rely on the underlying elliptic curve mathematical structure, thus being applicable to all standardized curves. We provide both a validation of the feasibility of the attack, even employing common off-the-shelf hardware to perform the required computations, and a low-cost countermeasure to counteract it.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lao:2016:BFD, author = "Yingjie Lao and Qianying Tang and Chris H. Kim and Keshab K. Parhi", title = "Beat Frequency Detector-Based High-Speed True Random Number Generators: Statistical Modeling and Analysis", journal = j-JETC, volume = "13", number = "1", pages = "9:1--9:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2866574", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib", abstract = "True random number generators (TRNGs) are crucial components for the security of cryptographic systems. In contrast to pseudo--random number generators (PRNGs), TRNGs provide higher security by extracting randomness from physical phenomena. To evaluate a TRNG, statistical properties of the circuit model and raw bitstream should be studied. In this article, a model for the beat frequency detector--based high-speed TRNG (BFD-TRNG) is proposed. The parameters of the model are extracted from the experimental data of a test chip. A statistical analysis of the proposed model is carried out to derive mean and variance of the counter values of the TRNG. Our statistical analysis results show that mean of the counter values is inversely proportional to the frequency difference of the two ring oscillators (ROSCs), whereas the dynamic range of the counter values increases linearly with standard deviation of environmental noise and decreases with increase of the frequency difference. Without the measurements from the test data, a model cannot be created; similarly, without a model, performance of a TRNG cannot be predicted. The key contribution of the proposed approach lies in fitting the model to measured data and the ability to use the model to predict performance of BFD-TRNGs that have not been fabricated. Several novel alternate BFD-TRNG architectures are also proposed; these include parallel BFD, cascade BFD, and parallel-cascade BFD. These TRNGs are analyzed using the proposed model, and it is shown that the parallel BFD structure requires less area per bit, whereas the cascade BFD structure has a larger dynamic range while maintaining the same mean of the counter values as the original BFD-TRNG. It is shown that 3.25 M and 4 M random bits can be obtained per counter value from parallel BFD and parallel-cascade BFD, respectively, where M counter values are computed in parallel. Furthermore, the statistical analysis results illustrate that BFD-TRNGs have better randomness and less cost per bit than other existing ROSC-TRNG designs. For example, it is shown that BFD-TRNGs accumulate 150\% more jitter than the original two-oscillator TRNG and that parallel BFD-TRNGs require one-third power and one-half area for same number of random bits for a specified period.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kulkarni:2016:RTA, author = "Amey Kulkarni and Youngok Pino and Matthew French and Tinoosh Mohsenin", title = "Real-Time Anomaly Detection Framework for Many-Core Router through Machine-Learning Techniques", journal = j-JETC, volume = "13", number = "1", pages = "10:1--10:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2827699", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we propose a real-time anomaly detection framework for an NoC-based many-core architecture. We assume that processing cores and memories are safe and anomaly is included through a communication medium (i.e., router). The article targets three different attacks, namely, traffic diversion, route looping, and core address spoofing attacks. The attacks are detected by using machine-learning techniques. Comprehensive analysis on machine-learning algorithms suggests that Support Vector Machine (SVM) and K-Nearest Neighbor (K-NN) have better attack detection efficiency. It has been observed that both algorithms have accuracy in the range of 94\% to 97\%. Additional hardware complexity analysis advocates SVM to be implemented on hardware. To test the framework, we implement a condition-based attack insertion module; attacks are performed intra- and intercluster. The proposed real-time anomaly detection framework is fully placed and routed on Xilinx Virtex-7 FPGA. Postplace and -route implementation results show that SVM has 12\% to 2\% area overhead and 3\% to 1\% power overhead for the quad-core and 16-core implementation, respectively. It is also observed that it takes 25\% to 18\% of the total execution time to detect an anomaly in transferred packets for quad-core and 16-core, respectively. The proposed framework achieves 65\% reduction in area overhead and is 3 times faster compared to previous published work.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Deb:2016:GVS, author = "Arighna Deb and Robert Wille and Oliver Kesz{\"o}cze and Stefan Hillmich and Rolf Drechsler", title = "Gates vs. Splitters: Contradictory Optimization Objectives in the Synthesis of Optical Circuits", journal = j-JETC, volume = "13", number = "1", pages = "11:1--11:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2904445", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Optical circuits are considered a promising emerging technology for applications in ultra-high-speed networks or interconnects. However, the development of (automatic) synthesis approaches for such circuits is still in its infancy. Although first generic and automatic synthesis approaches have been proposed, no clear understanding exists yet on how to keep the costs of the resulting circuits as small as possible. In the domain of optical circuits, this is particularly interesting for the number of gates and the effect of so-called splitters to the signal strength. In this work, we investigate this relation by considering a variety of (existing as well as proposed) synthesis approaches for optical circuits. Our investigations show that reducing the number of gates and reducing the number of splitters are contradictory optimization objectives. Furthermore, the performance of synthesis guided with respect to gate efficiency as well as synthesis guided with respect to splitter freeness is evaluated and an overhead factor between the contradictory metrics is experimentally determined.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Todri-Sanial:2017:GES, author = "Aida Todri-Sanial and Saraju P. Mohanty and Mariane Comte and Marc Belleville", title = "Guest Editorial: Special Issue on Nanoelectronic Circuit and System Design Methods for the Mobile Computing Era", journal = j-JETC, volume = "13", number = "2", pages = "12:1--12:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3003370", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sartor:2017:EIH, author = "Anderson L. Sartor and Arthur F. Lorenzon and Luigi Carro and Fernanda Kastensmidt and Stephan Wong and Antonio C. S. Beck", title = "Exploiting Idle Hardware to Provide Low Overhead Fault Tolerance for {VLIW} Processors", journal = j-JETC, volume = "13", number = "2", pages = "13:1--13:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3001935", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Because of technology scaling, the soft error rate has been increasing in digital circuits, which affects system reliability. Therefore, modern processors, including VLIW architectures, must have means to mitigate such effects to guarantee reliable computing. In this scenario, our work proposes three low overhead fault tolerance approaches based on instruction duplication with zero latency detection, which uses a rollback mechanism to correct soft errors in the pipelanes of a configurable VLIW processor. The first uses idle issue slots within a period of time to execute extra instructions considering distinct application phases. The second works at a finer grain, adaptively exploiting idle functional units at run-time. However, some applications present high instruction-level parallelism (ILP), so the ability to provide fault tolerance is reduced: less functional units will be idle, decreasing the number of potential duplicated instructions. The third approach attacks this issue by dynamically reducing ILP according to a configurable threshold, increasing fault tolerance at the cost of performance. While the first two approaches achieve significant fault coverage with minimal area and power overhead for applications with low ILP, the latter improves fault tolerance with low performance degradation. All approaches are evaluated considering area, performance, power dissipation, and error coverage.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Fang:2017:SPM, author = "Yan Fang and Victor V. Yashin and Brandon B. Jennings and Donald M. Chiarulli and Steven P. Levitan", title = "A Simplified Phase Model for Simulation of Oscillator-Based Computing Systems", journal = j-JETC, volume = "13", number = "2", pages = "14:1--14:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2976743", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Building oscillator-based computing systems with emerging nano-device technologies has become a promising solution for unconventional computing tasks like computer vision and pattern recognition. However, simulation and analysis of these computing systems is both time and compute intensive due to the nonlinearity of new devices and the complex behavior of coupled oscillators. In order to speed up the simulation of coupled oscillator systems, we propose a simplified phase model to perform phase and frequency synchronization prediction based on a synthesis of earlier models. Our model can predict the frequency-locking behavior with several orders of magnitude speedup compared to direct evaluation, enabling the effective and efficient simulation of the large numbers of oscillators required for practical computing systems. We demonstrate the oscillator-based computing paradigm with three applications, pattern matching, convolution, and image segmentation. The simulation with these models are respectively sped up by factors of 780, 300, and 1120 in our tests.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Singhvi:2017:FGU, author = "Ajay Singhvi and Matheus T. Moreira and Ramy N. Tadros and Ney L. V. Calazans and Peter A. Beerel", title = "A Fine-Grain, Uniform, Energy-Efficient Delay Element for $2$-Phase Bundled-Data Circuits", journal = j-JETC, volume = "13", number = "2", pages = "15:1--15:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2948067", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Contemporary digitally controlled delay elements (DEs) trade off power overheads and delay quantization error (DQE). This article proposes a new programmable DE that provides a balanced design that yields low power with moderate DQE even under process, voltage, and temperature variations. The element employs and leverages the advantages offered by a 28nm fully depleted silicon on insulator technology, using back body biasing to add an extra dimension to its programmability. To do so, a novel generic delay shift block is proposed, which enables incorporating both fine and coarse delays in a single DE that can be easily integrated into digital systems, which is an advantage over hybrid DEs that rely on analog design.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mohammadi:2017:FTR, author = "Hassan Ghasemzadeh Mohammadi and Pierre-Emmanuel Gaillardon and Jian Zhang and Giovanni {De Micheli} and Ernesto Sanchez and Matteo Sonza Reorda", title = "A Fault-Tolerant Ripple-Carry Adder with Controllable-Polarity Transistors", journal = j-JETC, volume = "13", number = "2", pages = "16:1--16:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2988234", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article first explores the effects of faults on circuits implemented with controllable-polarity transistors. We propose a new fault model that suits the characteristics of these devices, and we report the results of a SPICE-based analysis of the effects of faults on the behavior of some basic gates implemented with them. Hence, we show that the considered devices are able to intrinsically tolerate a rather high number of faults. We finally exploit this property to build a robust and scalable adder whose area, performance, and leakage power characteristics are improved by 15\%, 18\%, and 12\%;, respectively, when compared to an equivalent FinFET solution at 22nm technology node.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Senni:2017:NVP, author = "Sophiane Senni and Lionel Torres and Gilles Sassatelli and Abdoulaye Gamatie", title = "Non-Volatile Processor Based on {MRAM} for Ultra-Low-Power {IoT} Devices", journal = j-JETC, volume = "13", number = "2", pages = "17:1--17:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3001936", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Over the past few years, a new era of smart connected devices has emerged in the market to enable the future world of the Internet of Things (IoT). A key requirement for IoT applications is the power consumption to allow very high autonomy in the case of battery-powered systems. Depending on the application, such devices will be most of the time in a low-power mode (sleep mode) and will wake up only when there is a task to accomplish (active mode). Emerging non-volatile memory technologies are seen as a very attractive solution to design ultra-low-power systems. Among these technologies, magnetic random access memory is a promising candidate, as it combines non-volatility, high density, reasonable latency, and low leakage. Integration of non-volatility as a new feature of memories has the great potential to allow full data retention after a complete shutdown with a fast wake-up time. This article explores the benefits of having a non-volatile processor to enable ultra-low-power IoT devices.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Rakshit:2017:MTS, author = "Joydeep Rakshit and Kartik Mohanram and Runlai Wan and Kai Tak Lam and Jing Guo", title = "Monolayer Transistor {SRAMs}: Toward Low-Power, Denser Memory Systems", journal = j-JETC, volume = "13", number = "2", pages = "18:1--18:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967613", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Monolayer heterojunction FETs based on vertical heterogeneous transition metal dichalcogenides (TMDCFETs) and planar black phosphorus FETs (BPFETs) have demonstrated excellent subthreshold swing, high I$_{ON}$ I$_{OFF}$, and high scalability, making them attractive candidates for post-CMOS memory design. This article explores TMDCFET and BPFET SRAM design by combining atomistic self-consistent device modeling with SRAM circuit design and simulation. We perform detailed evaluations of the TMDCFET/BPFET SRAMs at a single bitcell and at SRAM array level. Our simulations show that at low operating voltages, TMDCFET/BPFET SRAMs exhibit significant advantages in static power, dynamic read/write noise margin, and read/write delay over nominal 16nm CMOS SRAMs at both bitcell and array-level implementations. We also analyze the effect of process variations on the performance of TMDCFET/BPFET SRAMs. Our simulations demonstrate that TMDCFET/BPFET SRAMs exhibit high tolerance to process variations, which is desirable for low operating voltages.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2017:ACP, author = "Xuan Wang and Jiang Xu and Zhe Wang and Haoran Li and Zhehui Wang and Peng Yang and Luan H. K. Duong and Rafael K. V. Maeda and Zhifei Wang", title = "Alleviate Chip Pin Constraint for Multicore Processor by On\slash Off-Chip Power Delivery System Codesign", journal = j-JETC, volume = "13", number = "2", pages = "19:1--19:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2914791", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The number of chip pins is limited due to the cost and reliability issues of sophisticated packages, and it is predicted that the chip pin count will be overstretched to satisfy the requirements of both power delivery and memory access. The gap between the achievable pin count and the demand will increase as the technology scales, due to the increasing computation resources and supply current. Pin reduction techniques are thus required for continued computing performance growth. In this article, we propose a chip pin constraint alleviation strategy, through on/off-chip power delivery system co-design, to effectively reduce the demand for power pins. An analytical model of a power delivery system, consisting of on/off-chip regulators and a power delivery network, is proposed to evaluate the influence of regulator design and package conduction loss. By combining this model with a multi-core processor model of performance and memory bandwidth requirements, we characterize the entire multi-core processor system to investigate the relationship between the chip pin constraint and performance in multi-core processor scaling and the effectiveness of our strategy. Experiments show that with the conventional power delivery system design, the chip pin constraint severely limits the performance growth as the technology scales. Using the on/off-chip power delivery system co-design, our strategy achieves a significant pin count reduction, for example, 31.3\% at the 8nm technology node, compared to the conventional design with the same chip performance, while, provided with the same chip pin count, it is able to improve, by 35.0\%, the chip performance at 8nm compared to the conventional design. For real applications of different parallelism, our strategy outperforms its counterpart, with a 23.7\% performance improvement on average at the 8nm technology node.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Pajouhi:2017:YAE, author = "Zoha Pajouhi and Xuanyao Fong and Anand Raghunathan and Kaushik Roy", title = "Yield, Area, and Energy Optimization in {STT--MRAMs} Using Failure-Aware {ECC}", journal = j-JETC, volume = "13", number = "2", pages = "20:1--20:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2934685", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spin-Transfer Torque MRAMs are attractive due to their non-volatility, high density, and zero leakage. However, STT-MRAMs suffer from poor reliability due to shared read and write paths. Additionally, conflicting requirements for data retention and writeability (both related to the energy barrier height of the storage device) makes design more challenging. Furthermore, the energy barrier height depends on the geometry of the storage. Any variations in the geometry of the storage device lead to variations in the energy barrier height. In order to address the poor reliability of STT-MRAMs, usage of Error Correcting Codes (ECC) has been proposed. Unlike traditional CMOS memory technologies, ECC is expected to correct both soft and hard errors in STT-MRAMs. To achieve acceptable yield with low write power, stronger ECC is required, resulting in increased number of encoded bits and degraded memory capacity. In this article, we propose Failure-aware ECC (FaECC), which masks permanent faults while maintaining the same correction capability for soft errors without increased number of encoded bits. Furthermore, we investigate the impact of process variations on run-time reliability of STT-MRAMs. In order to analyze the effectiveness of our methodology, we developed a cross-layer simulation framework that consists of device, circuit and array level analysis of STT-MRAM memory arrays. Our results show that using FaECC relaxes the requirements on the energy barrier height, which reduces the write energy and results in smaller access transistor size and memory array area.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mankalale:2017:OSC, author = "Meghna G. Mankalale and Sachin S. Sapatnekar", title = "Optimized Standard Cells for All-Spin Logic", journal = j-JETC, volume = "13", number = "2", pages = "21:1--21:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967612", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "All-Spin Logic (ASL) devices provide a promising spintronics-based alternative for Boolean logic implementations in the post-Complementary Metal-Oxide Semiconductor (CMOS) era. In principle, any logic functionality can be implemented in ASL. In practice, the performance of an ASL gate is significantly affected by layout choices, but such implications have not been adequately explored in the past. This article proposes a systematic approach for building standard cells in ASL, which are a basic building block in an overall design methodology for implementing large ASL-based circuits. We first propose a new technique to reduce the magnet count for an ASL majority gate but still ensure correct functioning through layout optimization methods. Building on physics-based analysis, we then build a standard cell library with diverse functionality and characterize the library for delay, energy, and area. We perform delay-optimized technology mapping on ISCAS85 benchmark circuits using our library. Our approach results in circuits that are 12.90\% faster, consume 26.16\% less energy, and are 33.56\% more area efficient compared to a standard cell library that does not incorporate layout-based optimization techniques of our work.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Jiang:2017:SLD, author = "Wei Jiang and Liang Wen and Ke Jiang and Xia Zhang and Xiong Pan and Keran Zhou", title = "System-Level Design to Detect Fault Injection Attacks on Embedded Real-Time Applications", journal = j-JETC, volume = "13", number = "2", pages = "22:1--22:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967611", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Fault injection attack has been a serious threat to security-critical embedded systems for a long time, yet existing research ignores addressing of the problem from a system-level perspective. This article presents an approach to the synthesis of secure real-time applications mapped on distributed embedded systems, which focuses on preventing fault injection attacks of the security protection on processing units. We utilize symmetric cryptographic service to protect confidentiality and deploy fault detection within a confidential algorithm to resist fault injection attacks. Several fault detection schemes are identified, and their fault coverage rates and time overheads are derived and measured. Our synthesis approach makes efforts to determine the best fault detection schemes for the encryption/decryption of messages such that the overall security strength of detecting a fault injection attack is maximized and the deadline constraint of the real-time applications is guaranteed. Due to the complexity of the problem, we propose an efficient algorithm based on the fruit fly optimization algorithm, and we compare it to the simulated annealing approach. Extensive experiments and a real-life application evaluation demonstrate the superiority of our approach.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Goud:2017:AUF, author = "A. Arun Goud and Rangharajan Venkatesan and Anand Raghunathan and Kaushik Roy", title = "Asymmetric Underlapped {FinFETs} for Near- and Super-Threshold Logic at Sub-10nm Technology Nodes", journal = j-JETC, volume = "13", number = "2", pages = "23:1--23:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967615", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Extending double-gate FinFET scaling to sub-10nm technology regime requires device-engineering techniques for countering the rise of direct source to drain tunneling (DSDT), edge direct tunneling (EDT) and short channel effects (SCE) that degrade FinFET I-V characteristics. Symmetric underlap is effective for eliminating EDT, diminishing DSDT, and lowering the fringe component of gate capacitance. However, excessive symmetric underlap also lowers the on-current, which is mainly due to thermionic emission. In this work, it is demonstrated that at sub-10nm node, asymmetric underlapped FinFETs with slightly longer underlap toward drain side than source side are superior to symmetric underlapped FinFETs due to further improvement in I$_{on}$ /I$_{off}$ and reduction in gate-to-drain capacitance. Using quantum mechanical device simulations, FinFETs with various degrees of underlap have been analyzed for improvement in I-V characteristics. A FinFET model for circuit simulations has been constructed that captures the major sub-10nm leakage components, namely, thermionic emission, DSDT, EDT, direct gate oxide tunneling and its associated components. By simulating a 10-stage NAND circuit and a LEON3 processor with interconnect parasitics using these devices, it is shown that asymmetric underlap instead of symmetric underlap in sub-10nm FinFETs can offer lower energy consumption with improved performance for near-threshold logic and higher energy-efficiency for super-threshold logic operation.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Abellan:2017:EPN, author = "Jos{\'e} L. Abell{\'a}n and Chao Chen and Ajay Joshi", title = "Electro-Photonic {NoC} Designs for Kilocore Systems", journal = j-JETC, volume = "13", number = "2", pages = "24:1--24:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967614", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The increasing core count in manycore systems requires a corresponding large Network-on-chip (NoC) bandwidth to support the overlying applications. However, it is not possible to provide this large bandwidth in an energy-efficient manner using electrical link technology. To overcome this issue, photonic link technology has been proposed as a replacement. This work explores the limits and opportunities for using photonic links to design the NoC architecture for a future Kilocore system. Three different NoC designs are explored: ElecNoC, an electrical concentrated two-dimensional- (2D) mesh NoC; HybNoC, an electrical concentrated 2D mesh with a photonic multi-crossbar NoC; and PhotoNoC, a photonic multi-bus NoC. We consider both private and shared cache architectures and, to leverage the large bandwidth density of photonic links, we investigate the use of prefetching and aggressive non-blocking caches. Our analysis using contemporary Big Data workloads shows that the non-blocking caches with a shared LLC can best leverage the large bandwidth of the photonic links in the Kilocore system. Moreover, compared to ElecNoC-based and HybNoC-based Kilocore systems, a PhotoNoC-based Kilocore system achieves up to 2.5$ \times $ and 1.5$ \times $ better performance, respectively, and can support up to 2.1$ \times $ and 1.1$ \times $ higher bandwidth, respectively, while dissipating comparable power in the overall system.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2017:OSS, author = "Yao Wang and Liang Rong and Haibo Wang and Guangjun Wen", title = "One-Step Sneak-Path Free Read Scheme for Resistive Crossbar Memory", journal = j-JETC, volume = "13", number = "2", pages = "25:1--25:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012002", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A one-step sneak-path free read scheme for resistive crossbar memory is proposed in this article. During read operation, it configures the crossbar array into a four-terminal resistance network, which is composed of the selected cell and three other resistors corresponding to unselected cells that contribute to the sneak-path. Two sensing voltages with equal potential are applied to three terminals of the network. One is for sensing the resistance of the selected cell; the other is for creating zero-voltage drop across one of the three resistors, which connects the sneak-path to the selected cell. This effectively suppresses the current injected by the sneak-path to the selected cell-sensing loop. This work also proposes a cost-effective data-encoding circuit that guarantees that at least half of the memory cells are in a high-resistance state, which further minimizes sneak-path current. The impact of key design parameters, such as sensing voltage, switch on-resistance, and the ratio of memory cell resistances in different states, as well as nonideal effects are investigated. Equations for estimating the maximum array size to share a single read circuit are derived. The effectiveness of the proposed design has been validated via circuit simulations. Impacts of the word-/bit-line resistance are also analyzed.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Guler:2017:ULL, author = "Abdullah Guler and Niraj K. Jha", title = "Ultra-low-leakage, Robust {FinFET SRAM} Design Using Multiparameter Asymmetric {FinFETs}", journal = j-JETC, volume = "13", number = "2", pages = "26:1--26:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2988233", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Memory arrays consisting of Static Random Access Memory (SRAM) cells occupy the largest area on chip and are responsible for significant leakage power consumption in modern microprocessors. With the transition from planar Complementary Metal-Oxide-Semiconductor (CMOS) technology to FinFETs, FinFET SRAM design has become important. However, increasing leakage power consumption of FinFETs due to aggressive scaling, width quantization, read-write conflict, and process variations make FinFET SRAM design challenging. In this article, we show how Multiparameter Asymmetric (MPA) FinFETs can be used to design ultra-low-leakage and robust 6T SRAM cells. We combine multiple asymmetries, namely, asymmetry in gate work function, source/drain doping concentration, and gate underlap, to address various SRAM design issues all at once. We propose five novel MPA FinFET SRAM cell designs and compare them with symmetric and Single-Parameter Asymmetric (SPA) FinFET SRAM cells using dc and transient metrics. We show that the leakage current of MPA FinFET SRAM cells can be reduced by up to 58 $ \times $ while ensuring reasonable read/write stability metric values. In addition, high stability metric values can be achieved with 22 $ \times $ leakage current reduction compared to the traditional symmetric FinFET SRAM cell. There is no area overhead associated with MPA FinFET SRAM cells.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhang:2017:SSR, author = "Hang Zhang and Xuhao Chen and Nong Xiao and Lei Wang and Fang Liu and Wei Chen and Zhiguang Chen", title = "Shielding {STT--RAM} Based Register Files on {GPUs} against Read Disturbance", journal = j-JETC, volume = "13", number = "2", pages = "27:1--27:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996191", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "To address the high energy consumption issue of SRAM on GPUs, emerging Spin-Transfer Torque (STT-RAM) memory technology has been intensively studied to build GPU register files for better energy-efficiency, thanks to its benefits of low leakage power, high density, and good scalability. However, STT-RAM suffers from the read disturbance issue, which stems from the fact that the voltage difference between read current and write current becomes smaller as technology scales. The read disturbance leads to high error rates for read operations, which cannot be effectively protected by the SEC-DED ECC on large-capacity register files of GPUs. Prior schemes (e.g., read-restore) to mitigate the read disturbance usually incur either non-trivial performance loss or excessive energy overhead, thus not applicable for the GPU register file design that aims to achieve both high performance and energy-efficiency. To combat the read disturbance, we propose a novel software-hardware co-designed solution (i.e., Red-Shield ), which consists of three optimizations to overcome the limitations of the existing solutions. First, we identify dead reads at compiling stage and augment instructions to avoid unnecessary restores. Second, we employ a small read buffer to accommodate register reads with high-access locality to further reduce restores. Third, we propose an adaptive restore mechanism to selectively pick the suitable restore scheme, according to the busy status of corresponding register banks. Experimental results show that our proposed design can effectively mitigate the performance loss and energy overhead caused by restore operations while still maintaining the reliability of reads.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Biswas:2017:SAT, author = "Arnab Kumar Biswas", title = "Source Authentication Techniques for Network-on-Chip Router Configuration Packets", journal = j-JETC, volume = "13", number = "2", pages = "28:1--28:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996194", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "It is known that maliciously configured Network-on-Chip routers can enable an attacker to launch different attacks inside a Multiprocessor System-on-Chip. A source authentication mechanism for router configuration packets can prevent such vulnerability. This ensures that a router is configured by the configuration packets sent only by a trusted configuration source. Conventional method like Secure Hash Algorithm-3 (SHA-3) can provide required source authentication in a router but with a router area overhead of 1355.25\% compared to a normal router area. We propose eight source authentication mechanisms that can achieve similar level of security as SHA-3 for a router configuration perspective without causing significant area and power increase. Moreover, the processing time of our proposed techniques is 1/100th of SHA-3 implementation. Most of our proposed techniques use different timing channel watermarking methods to transfer source authentication data to the receiver router. We also propose the Individual packet-based stream authentication technique and combinations of this technique with timing channel watermarking techniques. It is shown that, among all of our proposed techniques, maximum router area increment required is 28.32\% compared to a normal router.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mittal:2017:STA, author = "Sparsh Mittal", title = "A Survey of Techniques for Architecting Processor Components Using Domain-Wall Memory", journal = j-JETC, volume = "13", number = "2", pages = "29:1--29:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2994550", ISSN = "1550-4832", bibdate = "Sat Apr 8 10:16:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Recent trends of increasing core-count and bandwidth/memory wall have motivated researchers to explore novel memory technologies for designing processor components such as cache, register file, shared memory, and so on. Domain-wall memory (DWM), also known as racetrack memory, is a promising emerging technology due to its non-volatility and very high density. However, use of DWM presents challenges due to characteristics of both DWM itself (e.g., requirement of shift operations, variable latency) and processor components. Recently, several techniques have been proposed to address these challenges. This article presents a survey of architectural techniques for using DWM for designing components in both CPU and GPU. We discuss techniques related to performance, energy, and reliability and also discuss works that compare DWM with other memory technologies. We also highlight the opportunities and obstacles in using DWM for designing processor components. This survey is expected to spark further research in this area and be useful for researchers, chip designers, and computer architects.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cao:2017:GEI, author = "Yu Cao and Xin Li and Taemin Kim and Suyog Gupta", title = "Guest Editors' Introduction: Hardware and Algorithms for On-Chip Learning", journal = j-JETC, volume = "13", number = "3", pages = "30:1--30:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3022193", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Page:2017:SHA, author = "Adam Page and Ali Jafari and Colin Shea and Tinoosh Mohsenin", title = "{SPARCNet}: a Hardware Accelerator for Efficient Deployment of Sparse Convolutional Networks", journal = j-JETC, volume = "13", number = "3", pages = "31:1--31:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3005448", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Deep neural networks have been shown to outperform prior state-of-the-art solutions that often relied heavily on hand-engineered feature extraction techniques coupled with simple classification algorithms. In particular, deep convolutional neural networks have been shown to dominate on several popular public benchmarks such as the ImageNet database. Unfortunately, the benefits of deep networks have yet to be fully exploited in embedded, resource-bound settings that have strict power and area budgets. Graphical processing unit (GPU) have been shown to improve throughput and energy-efficiency over central processing unit (CPU) due to their highly parallel architecture yet still impose a significant power burden. In a similar fashion, field programmable gate array (FPGA) can be used to improve performance while further allowing more fine-grained control over implementation to improve efficiency. In order to reduce power and area while still achieving required throughput, classification-efficient network architectures are required in addition to optimal deployment on efficient hardware. In this work, we target both of these enterprises. For the first objective, we analyze simple, biologically inspired reduction strategies that are applied both before and after training. The central theme of the techniques is the introduction of sparsification to help dissolve away the dense connectivity that is often found at different levels in convolutional neural networks. The sparsification techniques include feature compression partition, structured filter pruning, and dynamic feature pruning. Additionally, we explore filter factorization and filter quantization approximation techniques to further reduce the complexity of convolutional layers. In the second contribution, we propose SPARCNet, a hardware accelerator for efficient deployment of SPAR se C onvolutional NET works. The accelerator looks to enable deploying networks in such resource-bound settings by both exploiting efficient forms of parallelism inherent in convolutional layers and by exploiting the sparsification and approximation techniques proposed. To demonstrate both contributions, modern deep convolutional network architectures containing millions of parameters are explored within the context of the computer vision dataset CIFAR. Utilizing the reduction techniques, we demonstrate the ability to reduce computation and memory by 60\% and 93\% with less than 0.03\% impact on accuracy when compared to the best baseline network with 93.47\% accuracy. The SPARCNet accelerator with different numbers of processing engines is implemented on a low-power Artix-7 FPGA platform. Additionally, the same networks are optimally implemented on a number of embedded commercial-off-the-shelf platforms including NVIDIAs CPU+GPU SoCs TK1 and TX1 and Intel Edison. Compared to NVIDIAs TK1 and TX1, the FPGA-based accelerator obtains 11.8 $ \times $ and 7.5 $ \times $ improvement in energy efficiency while maintaining a classification throughput of 72 images/s. When further compared to a number of recent FPGA-based accelerators, SPARCNet is able to achieve up to 15 $ \times $ improvement in energy efficiency while consuming less than 2W of total board power at 100MHz. In addition to improving efficiency, the accelerator has built-in support for sparsification techniques and ability to perform in-place rectified linear unit (ReLU) activation function, max-pooling, and batch normalization.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Anwar:2017:SPD, author = "Sajid Anwar and Kyuyeon Hwang and Wonyong Sung", title = "Structured Pruning of Deep Convolutional Neural Networks", journal = j-JETC, volume = "13", number = "3", pages = "32:1--32:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3005348", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Real-time application of deep learning algorithms is often hindered by high computational complexity and frequent memory accesses. Network pruning is a promising technique to solve this problem. However, pruning usually results in irregular network connections that not only demand extra representation efforts but also do not fit well on parallel computation. We introduce structured sparsity at various scales for convolutional neural networks: feature map-wise, kernel-wise, and intra-kernel strided sparsity. This structured sparsity is very advantageous for direct computational resource savings on embedded computers, in parallel computing environments, and in hardware-based systems. To decide the importance of network connections and paths, the proposed method uses a particle filtering approach. The importance weight of each particle is assigned by assessing the misclassification rate with a corresponding connectivity pattern. The pruned network is retrained to compensate for the losses due to pruning. While implementing convolutions as matrix products, we particularly show that intra-kernel strided sparsity with a simple constraint can significantly reduce the size of the kernel and feature map tensors. The proposed work shows that when pruning granularities are applied in combination, we can prune the CIFAR-10 network by more than 70\% with less than a 1\% loss in accuracy.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Panda:2017:EEI, author = "Priyadarshini Panda and Abhronil Sengupta and Kaushik Roy", title = "Energy-Efficient and Improved Image Recognition with Conditional Deep Learning", journal = j-JETC, volume = "13", number = "3", pages = "33:1--33:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007192", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Deep-learning neural networks have proven to be very successful for a wide range of recognition tasks across modern computing platforms. However, the computational requirements associated with such deep nets can be quite high, and hence their energy-efficient implementation is of great interest. Although, traditionally, the entire network is utilized for the recognition of all inputs, we observe that the classification difficulty varies widely across inputs in real-world datasets; only a small fraction of inputs requires the full computational effort of a network, while a large majority can be classified correctly with very low effort. In this article, we propose Conditional Deep Learning (CDL), where the convolutional layer features are used to identify the variability in the difficulty of input instances and conditionally activate the deeper layers of the network. We achieve this by cascading a linear network of output neurons for each convolutional layer and monitoring the output of the linear network to decide whether classification can be terminated at the current stage or not. The proposed methodology thus enables the network to dynamically adjust the computational effort depending on the difficulty of the input data while maintaining competitive classification accuracy. The overall energy benefits for MNIST/CIFAR10/Tiny ImageNet datasets with state-of-the-art deep-learning architectures are $ 1.84 \times $ / $ 2.83 \times $ / $ 4.02 \times $, respectively. We further employ the conditional approach to train deep-learning networks from scratch with integrated supervision from the additional output neurons appended at the intermediate convolutional layers. Our proposed integrated CDL training leads to an improvement in the gradient convergence behavior giving substantial error rate reduction on MNIST/CIFAR-10, resulting in improved classification over state-of-the-art baseline networks.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Karam:2017:MCR, author = "Robert Karam and Somnath Paul and Ruchir Puri and Swarup Bhunia", title = "Memory-Centric Reconfigurable Accelerator for Classification and Machine Learning Applications", journal = j-JETC, volume = "13", number = "3", pages = "34:1--34:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2997649", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Big Data refers to the growing challenge of turning massive, often unstructured datasets into meaningful, organized, and actionable data. As datasets grow from petabytes to exabytes and beyond, it becomes increasingly difficult to run advanced analytics, especially Machine Learning (ML) applications, in a reasonable time and on a practical power budget using traditional architectures. Previous work has focused on accelerating analytics readily implemented as SQL queries on data-parallel platforms, generally using off-the-shelf CPUs and General Purpose Graphics Processing Units (GPGPUs) for computation or acceleration. However, these systems are general-purpose and still require a vast amount of data transfer between the storage devices and computing elements, thus limiting the system efficiency. As an alternative, this article presents a reconfigurable memory-centric advanced analytics accelerator that operates at the last level of memory and dramatically reduces energy required for data transfer. We functionally validate the framework using an FPGA-based hardware emulation platform and three representative applications: Na{\"\i}ve Bayesian Classification, Convolutional Neural Networks, and k-Means Clustering. Results are compared with implementations on a modern CPU and workstation GPGPU. Finally, the use of in-memory dataset decompression to further reduce data transfer volume is investigated. With these techniques, the system achieves an average energy efficiency improvement of 74$ \times $ and 212$ \times $ over GPU and single-threaded CPU, respectively, while dataset compression is shown to improve overall efficiency by an additional 1.8$ \times $ on average.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yuan:2017:VAR, author = "Bo Yuan and Keshab K. Parhi", title = "{VLSI} Architectures for the {Restricted Boltzmann Machine}", journal = j-JETC, volume = "13", number = "3", pages = "35:1--35:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007193", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Neural network (NN) systems are widely used in many important applications ranging from computer vision to speech recognition. To date, most NN systems are processed by general processing units like CPUs or GPUs. However, as the sizes of dataset and network rapidly increase, the original software implementations suffer from long training time. To overcome this problem, specialized hardware accelerators are needed to design high-speed NN systems. This article presents an efficient hardware architecture of restricted Boltzmann machine (RBM) that is an important category of NN systems. Various optimization approaches at the hardware level are performed to improve the training speed. As-soon-as-possible and overlapped-scheduling approaches are used to reduce the latency. It is shown that, compared with the flat design, the proposed RBM architecture can achieve 50\% reduction in training time. In addition, an on-the-fly computation scheme is also used to reduce the storage requirement of binary and stochastic states by several hundreds of times. Then, based on the proposed approach, a 784-2252 RBM design example is developed for MNIST handwritten digit recognition dataset. Analysis shows that the VLSI design of RBM achieves significant improvement in training speed and energy efficiency as compared to CPU/GPU-based solution.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ni:2017:DMC, author = "Leibin Ni and Hantao Huang and Zichuan Liu and Rajiv V. Joshi and Hao Yu", title = "Distributed In-Memory Computing on Binary {RRAM} Crossbar", journal = j-JETC, volume = "13", number = "3", pages = "36:1--36:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996192", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The recently emerging resistive random-access memory (RRAM) can provide nonvolatile memory storage but also intrinsic computing for matrix-vector multiplication, which is ideal for the low-power and high-throughput data analytics accelerator performed in memory. However, the existing RRAM crossbar--based computing is mainly assumed as a multilevel analog computing, whose result is sensitive to process nonuniformity as well as additional overhead from AD-conversion and I/O. In this article, we explore the matrix-vector multiplication accelerator on a binary RRAM crossbar with adaptive 1-bit-comparator--based parallel conversion. Moreover, a distributed in-memory computing architecture is also developed with the according control protocol. Both memory array and logic accelerator are implemented on the binary RRAM crossbar, where the logic-memory pair can be distributed with the control bus protocol. Experimental results have shown that compared to the analog RRAM crossbar, the proposed binary RRAM crossbar can achieve significant area savings with better calculation accuracy. Moreover, significant speedup can be achieved for matrix-vector multiplication in neural network--based machine learning such that the overall training and testing time can be both reduced. In addition, large energy savings can be also achieved when compared to the traditional CMOS-based out-of-memory computing architecture.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Merkel:2017:SCB, author = "Cory Merkel and Dhireesha Kudithipudi and Manan Suri and Bryant Wysocki", title = "Stochastic {CBRAM}-Based Neuromorphic Time Series Prediction System", journal = j-JETC, volume = "13", number = "3", pages = "37:1--37:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996193", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this research, we present a Conductive-Bridge RAM (CBRAM)-based neuromorphic system which efficiently addresses time series prediction. We propose a new (i) voltage-mode, stochastic, multiweight synapse circuit based on experimental bi-stable CBRAM devices, (ii) a voltage-mode neuron circuit based on the concept of charge sharing, and (iii) an optimized training methodology powered by a stochastic implementation of the Least-Mean-Squares (SLMS) training rule. To validate the proposed design, we use time series prediction for short-term electrical load forecasting in smart grids. Our system is able to forecast hourly electrical loads with a mean accuracy of 96\%, an estimated power dissipation of 15 $ \mu $ W, and area of 14.5 $ \mu m^2 $ at 65 nm CMOS technology.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Topaloglu:2017:EJS, author = "Rasit O. Topaloglu and Naveen Verma", title = "Editorial for {JETC} Special Issue on Alternative Computing Systems", journal = j-JETC, volume = "13", number = "3", pages = "38:1--38:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3022700", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Britt:2017:HPC, author = "Keith A. Britt and Travis S. Humble", title = "High-Performance Computing with Quantum Processing Units", journal = j-JETC, volume = "13", number = "3", pages = "39:1--39:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007651", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The prospects of quantum computing have driven efforts to realize fully functional quantum processing units (QPUs). Recent success in developing proof-of-principle QPUs has prompted the question of how to integrate these emerging processors into modern high-performance computing (HPC) systems. We examine how QPUs can be integrated into current and future HPC system architectures by accounting for functional and physical design requirements. We identify two integration pathways that are differentiated by infrastructure constraints on the QPU and the use cases expected for the HPC system. This includes a tight integration that assumes infrastructure bottlenecks can be overcome as well as a loose integration that assumes they cannot. We find that the performance of both approaches is likely to depend on the quantum interconnect that serves to entangle multiple QPUs. We also identify several challenges in assessing QPU performance for HPC, and we consider new metrics that capture the interplay between system architecture and the quantum parallelism underlying computational performance.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yoon:2017:MUM, author = "Su-Kyung Yoon and Young-Sun Youn and Kihyun Park and Shin-Dug Kim", title = "Mobile Unified Memory-Storage Structure Based on Hybrid Non-Volatile Memories", journal = j-JETC, volume = "13", number = "3", pages = "40:1--40:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007650", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In mobile computing systems, the limited amount of main memory space leads to page swap operation overhead and data duplication in both main memory and secondary storage. Furthermore, SQLite write operations in mobile devices such as smartphones and tablet PCs tend to frequently overwrite data to storage, significantly degrading performance. Thus, this article presents a unified memory-storage structure that is optimized for mobile devices and blurs the boundary between the existing main memory layer and secondary storage layer. This structure can eliminate the conventional page-swap operations that cause significant performance degradation and support fast program execution time. The unified memory-storage structure consists of a dynamic RAM (DRAM) and phase change memory (PCM) -based dual buffering module, a hybrid unified memory-storage array consisting of DRAM and NAND Flash memory, and an associated unified storage translation layer devised for the memory address and file translation mechanism as a system software module. This hybrid array of non-volatile memories is formed as a single memory-disk integrated storage space that can be logically divided into static and dynamic spaces. Experimental results show that the overall performance of the hybrid unified memory-storage system with the buffering structure increases by around 13\% and power consumption is also improved by 35\%, compared to current mobile system.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Guha:2017:RTS, author = "Krishnendu Guha and Debasri Saha and Amlan Chakrabarti", title = "Real-Time {SoC} Security against Passive Threats Using Crypsis Behavior of Geckos", journal = j-JETC, volume = "13", number = "3", pages = "41:1--41:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3014166", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The rapid evolution of the embedded era has witnessed globalization for the design of SoC architectures in the semiconductor design industry. Though issues of cost and stringent marketing deadlines have been resolved in such a methodology, yet the root of hardware trust has been evicted. Malicious circuitry, a.k.a. Hardware Trojan Horse (HTH), is inserted by adversaries in the less trusted phases of design. A HTH remains dormant during testing but gets triggered at runtime to cause sudden active and passive attacks. In this work, we focus on the runtime passive threats based on the parameter delay. Nature-inspired algorithms offer an alternative to the conventional techniques for solving complex problems in the domain of computer science. However, most are optimization techniques and none is dedicated to security. We seek refuge to the crypsis behavior exhibited by geckos in nature to generate a runtime security technique for SoC architectures, which can bypass runtime passive threats of a HTH. An adaptive security intellectual property (IP) that works on the proposed security principles is designed. Embedded timing analysis is used for experimental validation. Low area and power overhead of our proposed security IP over standard benchmarks and practical crypto SoC architectures as obtained in experimental results supports its applicability for practical implementations.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2017:CPU, author = "Yin Liu and Keshab K. Parhi", title = "Computing Polynomials Using Unipolar Stochastic Logic", journal = j-JETC, volume = "13", number = "3", pages = "42:1--42:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007648", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article addresses subtraction and polynomial computations using unipolar stochastic logic. Stochastic computing requires simple logic gates, and stochastic logic--based circuits are inherently fault tolerant. Thus, these structures are well suited for nanoscale CMOS technologies. It is well known that an AND gate and a multiplexer can be used to implement stochastic unipolar multiplier and adder, respectively. Although it is easy to realize multiplication and scaled addition, implementation of subtraction is nontrivial using unipolar stochastic logic. Additionally, an accurate computation of subtraction is critical for the implementation of polynomials with negative coefficients in stochastic unipolar representation. This work, for the first time, demonstrates that instead of using well-known Bernstein polynomials, stochastic computation of polynomials can be implemented by using a stochastic subtractor and factorization. Three major contributions are given in this article. First, two approaches are proposed to compute subtraction in stochastic unipolar representation. In the first approach, the subtraction operation is approximated by cascading multilevels of OR and AND gates. The accuracy of the approximation is improved with the increase in the number of stages. In the second approach, the stochastic subtraction is implemented using a multiplexer and a stochastic divider. This approach requires more hardware complexity due to the use of a linear-feedback shift register and a counter for division. Second, computation of polynomials in stochastic unipolar format is presented using scaled addition and proposed stochastic subtraction. Third, we propose stochastic computation of polynomials using factorization. Stochastic implementations of first- and second-order factors are presented for different locations of polynomial roots. From experimental results, it is shown that the proposed stochastic logic circuits require less hardware complexity than the previous stochastic polynomial implementation using Bernstein polynomials.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Golnari:2017:PCE, author = "Pareesa Ameneh Golnari and Yavuz Yetim and Margaret Martonosi and Yakir Vizel and Sharad Malik", title = "{PPU}: a Control Error-Tolerant Processor for Streaming Applications with Formal Guarantees", journal = j-JETC, volume = "13", number = "3", pages = "43:1--43:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990502", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With increasing technology scaling and design complexity there are increasing threats from device and circuit failures. This is expected to worsen with post-CMOS devices. Current error-resilient solutions ensure reliability of circuits through protection mechanisms such as redundancy, error correction, and recovery. However, the costs of these solutions may be high, rendering them impractical. In contrast, error-tolerant solutions allow errors in the computation and are positioned to be suitable for error-tolerant applications such as media applications. For such programmable error-tolerant processors, the Instruction-Set-Architecture (ISA) no longer serves as a specification since it is acceptable for the processor to allow for errors during the execution of instructions. In this work, we address this specification gap by defining the basic requirements needed for an error-tolerant processor to provide acceptable results. Furthermore, we formally define properties that capture these requirements. Based on this, we propose the Partially Protected Uniprocessor (PPU), an error-tolerant processor that aims to meet these requirements with low-cost microarchitectural support. These protection mechanisms convert potentially fatal control errors to potentially tolerable data errors instead of ensuring instruction-level or byte-level correctness. The protection mechanisms in PPU protect the system against crashes, unresponsiveness, and external device corruption. In addition, they also provide support for achieving acceptable result quality. Additionally, we provide a methodology that formally proves the specification properties on PPU using model checking. This methodology uses models for the hardware and software that are integrated with the fault and recovery models. Finally, we experimentally demonstrate the results of model checking and the application-level quality of results for PPU.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Gorantla:2017:DAC, author = "Anusha Gorantla and Deepa P.", title = "Design of Approximate Compressors for Multiplication", journal = j-JETC, volume = "13", number = "3", pages = "44:1--44:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007649", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Approximate computing is a promising technique for energy-efficient Very Large Scale Integration (VLSI) system design. It is best suited for error-resilient applications such as signal processing and multimedia. Approximate computing reduces accuracy but still provides significant and faster results with lower power consumption. This is attractive to arithmetic circuits. In this article, various novel design approaches of approximate 4-2 and 5-2 compressors have been proposed for reduction of the partial product stages in multiplication. Three approximate 8 $ \times $ 8 Dadda multiplier designs using three novel approximate 4-2 compressors and two approximate 8 $ \times $ 8 Dadda multiplier designs using two novel approximate 5-2 compressors have proposed. The synthesis results show that the proposed designs achieved significant accuracy improvement together with power and delay reductions compared to the existing approximate designs.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kumar:2017:THS, author = "Arvind Kumar and Zhe Wan and Winfried W. Wilcke and Subramanian S. Iyer", title = "Toward Human-Scale Brain Computing Using {$3$D} Wafer Scale Integration", journal = j-JETC, volume = "13", number = "3", pages = "45:1--45:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2976742", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The Von Neumann architecture, defined by strict and hierarchical separation of memory and processor, has been a hallmark of conventional computer design since the 1940s. It is becoming increasingly unsuitable for cognitive applications, which require massive parallel processing of highly interdependent data. Inspired by the brain, we propose a significantly different architecture characterized by a large number of highly interconnected simple processors intertwined with very large amounts of low-latency memory. We contend that this memory-centric architecture can be realized using 3D wafer scale integration for which the technology is nearing readiness, combined with current CMOS device technologies. The natural fault tolerance and lower power requirements of neuromorphic processing make 3D wafer stacking particularly attractive. In order to assess the performance of this architecture, we propose a specific embodiment of a neuronal system using 3D wafer scale integration; formulate a simple model of brain connectivity including short- and long-range connections; and estimate the memory, bandwidth, latency, and power requirements of the system using the connectivity model. We find that 3D wafer scale integration, combined with technologies nearing readiness, offers the potential for scaleup to a primate-scale brain, while further scaleup to a human-scale brain would require significant additional innovations.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Alawad:2017:SCS, author = "Mohammed Alawad and Mingjie Lin", title = "Sketching Computation with Stochastic Processing Engines", journal = j-JETC, volume = "13", number = "3", pages = "46:1--46:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007652", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article explores how to leverage stochastic principles to gracefully exploit partial computation results, hence achieving quality-scalable embedded computing. Our work is inspired by the concept of incremental sketching frequently found in artistic rendering, where the drawing procedure consists of a series of steps, each gradually improving the quality of results. The essence of our approach is to first encode input signals as probability density functions (PDFs), then perform stochastic computing operations on all signals in the probabilistic domain, and finally decode output signals by estimating the PDF of these resulting random samples. Although numerous approximate computing schemes exist, such as inaccurate adders and multipliers that reduce bit width or weaken logic circuit design, none of them can seamlessly improve computing accuracy incrementally without making any changes to the computing hardware at runtime. Furthermore, in conventional embedded computing, a sudden shortage of computing resources, such as premature termination, often means a complete computing failure and totally unusable results. Our sketching computing scheme can readily trade off between the quality of results and computing efforts without modifying its circuit design. To validate our proposed architecture design, we have implemented a proof-of-concept computation sketching engine based on a probabilistic convolver using a Virtex-6 FPGA device. Using three widely deployed image processing applications-image correspondence, image sharpening, and edge detection-we have demonstrated that important embedded computing applications can indeed be ``sketched'' in a graceful manner using roughly one third the hardware and one fifth the energy compared to the traditional multiplier-based computing method.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Alaghi:2017:TAE, author = "Armin Alaghi and Wei-Ting J. Chan and John P. Hayes and Andrew B. Kahng and Jiajia Li", title = "Trading Accuracy for Energy in Stochastic Circuit Design", journal = j-JETC, volume = "13", number = "3", pages = "47:1--47:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990503", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As we approach the limits of traditional Moore's-Law scaling, alternative computing techniques that consume energy more efficiently become attractive. Stochastic computing (SC), as a re-emerging computing technique, is a low-cost and error-tolerant alternative to conventional binary circuits in several important applications such as image processing and communications. SC allows a natural accuracy-energy tradeoff that has been exploited in the past. This article presents an accuracy-energy tradeoff technique for SC circuits that reduces their energy consumption with virtually no accuracy loss. To this end, we employ voltage or frequency scaling, which normally reduce energy consumption at the cost of timing errors. Then we show that due to their inherent error tolerance, SC circuits operate satisfactorily without significant accuracy loss even with aggressive scaling. This significantly improves their energy efficiency. In contrast, conventional binary circuits quickly fail as the supply voltage decreases. To find the most energy-efficient operating point of an SC circuit, we propose an error estimation method that allows us to quickly explore the circuit's design space. The error estimation method is based on Markov chain and least-squares regression. Furthermore, we investigate opportunities to optimize SC circuits under such aggressive scaling. We find that logical and physical design techniques can be combined to significantly expand the already-powerful accuracy-energy tradeoff possibilities of SC. In particular, we demonstrate that careful adjustment of path delays can lead to significant error reduction under voltage and frequency scaling. We perform buffer insertion and route detouring to achieve more balanced path delays. These techniques differ from conventional path-balancing techniques whose goal is to minimize power consumption by resizing the non-critical paths. The goal of our path-balancing approach is to increase error cancellation chances in voltage-/frequency-scaled SC circuits. Our circuit optimization comprehends the tradeoff between power overheads due to inserted buffers and wires versus the energy reduction from supply voltage downscaling enabled by more balanced path delays. Simulation results show that our optimized SC circuits can tolerate aggressive voltage scaling with no significant signal-to-noise ratio (SNR) degradation. In one example, a 40\% supply voltage reduction (1V to 0.6V) on the SC circuit leads to 66\% energy saving (20.7pJ to 6.9pJ) and makes it more efficient than its conventional binary counterpart. In the same example, a 100\% frequency boosting (400ps to 200ps) of the optimized circuits leads to no significant SNR degradation. We also show that process variation and temperature variation have limited impact on optimized SC circuits. The error change is less than 5\% when temperature changes by 100${}^\circ $C or process condition changes from worst case to best case.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Salehi:2017:SSM, author = "Soheil Salehi and Deliang Fan and Ronald F. Demara", title = "Survey of {STT--MRAM} Cell Design Strategies: Taxonomy and Sense Amplifier Tradeoffs for Resiliency", journal = j-JETC, volume = "13", number = "3", pages = "48:1--48:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2997650", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spin-Transfer Torque Random Access Memory (STT-MRAM) has been explored as a post-CMOS technology for embedded and data storage applications seeking non-volatility, near-zero standby energy, and high density. Towards attaining these objectives for practical implementations, various techniques to mitigate the specific reliability challenges associated with STT-MRAM elements are surveyed, classified, and assessed in this article. Cost and suitability metrics assessed include the area of nanomagmetic and CMOS components per bit, access time and complexity, sense margin, and energy or power consumption costs versus resiliency benefits. Solutions to the reliability issues identified are addressed within a taxonomy created to categorize the current and future approaches to reliable STT-MRAM designs. A variety of destructive and non-destructive sensing schemes are assessed for process variation tolerance, read disturbance reduction, sense margin, and write polarization asymmetry compensation. The highest resiliency strategies deliver a sensing margin above 300mV while incurring low power and energy consumption on the order of picojoules and microwatts, respectively, and attaining read sense latency of a few nanoseconds down to hundreds of picoseconds for non-destructive and destructive sensing schemes, respectively.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yu:2017:RMA, author = "Songping Yu and Nong Xiao and Mingzhu Deng and Fang Liu and Wei Chen", title = "Redesign the Memory Allocator for Non-Volatile Main Memory", journal = j-JETC, volume = "13", number = "3", pages = "49:1--49:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2997651", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The non-volatile memory (NVM) has the merits of byte-addressability, fast speed, persistency and low power consumption, which make it attractive to be used as main memory. Commonly, user process dynamically acquires memory through memory allocators. However, traditional memory allocators designed with in-place data writes are not appropriate for the non-volatile main memory (NVRAM) due to the limited endurance. In this article, first, we quantitatively analyze the wear-oblivious of DRAM-oriented designed allocator-glibc malloc and the inefficiency of wear-conscious allocator NVMalloc. Then, we propose WAlloc, an efficient wear-aware manual memory allocator designed for NVRAM: (1) decouples metadata and data management; (2) distinguishes metadata with volatility; (3) redirects the data writes around to achieve wear-leveling; (4) redesigns an efficient and effective NVM copy mechanism, bypassing the CPU cache partially and prefetching data explicitly. Finally, experimental results show that the wear-leveling of WAlloc outperforms that of NVMalloc about 30\% and 60\% under random workloads and well-distributed workloads, respectively. Besides, WAlloc reduces the average data memory writes in 64 bytes block by 1.5 times comparing with glibc malloc. With the fulfillment of data persistency, cache bypassing NVM copy is better than cache line flushing NVM copy with performance improvement circa 14\%.", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2017:PUD, author = "Bing Li and Yu Hu and Ying Wang and Jing Ye and Xiaowei Li", title = "Power-Utility-Driven Write Management for {MLC PCM}", journal = j-JETC, volume = "13", number = "3", pages = "50:1--50:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2997648", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Jul 11 17:10:31 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Phase change memory (PCM) is a promising alternative to Dynamic Random Access Memory (DRAM) as main memory due to its merits of high density and low leakage power. Multi-level Cell (MLC) PCM is more attractive than Single-level Cell (SLC) PCM, because it can store multiple bits per cell to achieve higher density and lower per-bit cost. With the iterative program-verify write technique, MLC PCM writes demand at much higher power than DRAM writes, while the power supply system of MLC memory system is similar to that of DRAM, and the power capability is limited. The incompatibility of high write power and limited power budget results in the degradation of the write throughput and performance in MLC PCM. In this work, we investigate both write scheduling policy and power management to improve the MLC power utility and alleviate the negative impacts induced by high write power. We identify the power-utility-driven write scheduling as an online bin-packing problem and then derive a power-utility-driven scheduling (PUDS) policy from the First Fit algorithm to improve the write power usage. Based on the ramp-down characteristic of the SET pulse (the pulse changes the PCM to high resistance), we propose the SET Power Amortization (SPA) policy, which proactively reclaims the power tokens at the intra-SET level to promote the power utilization. Our experimental results demonstrate that the PUDS and SPA respectively achieve 24\% and 27\% performance improvement over the state-of-the-art power management technique, and the PUDS8SPA has an overall 31\% improvement of the power utility and 50\% increase of performance compared to the baseline system.", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ghosh:2017:AQC, author = "Mrityunjay Ghosh and Amlan Chakrabarti and Niraj K. Jha", title = "Automated Quantum Circuit Synthesis and Cost Estimation for the Binary Welded Tree Oracle", journal = j-JETC, volume = "13", number = "4", pages = "51:1--51:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060582", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum computing is a new computational paradigm that promises an exponential speed-up over classical algorithms. To develop efficient quantum algorithms for problems of a non-deterministic nature, random walk is one of the most successful concepts employed. In this article, we target both continuous-time and discrete-time random walk in both the classical and quantum regimes. Binary Welded Tree (BWT), or glued tree, is one of the most well-known quantum walk algorithms in the continuous-time domain. Prior work implements quantum walk on the BWT with static welding. In this context, static welding is randomized but case-specific. We propose a solution to automatically generate the circuit for the Oracle for welding. We implement the circuit using the Quantum Assembly Language, which is a language for describing quantum circuits. We then optimize the generated circuit using the Fault-Tolerant Quantum Logic Synthesis tool for any BWT instance. Automatic welding enables us to provide a generalized solution for quantum walk on the BWT.", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Govindaraj:2017:DAS, author = "Rekha Govindaraj and Swaroop Ghosh", title = "Design and Analysis of {STTRAM}-Based Ternary Content Addressable Memory Cell", journal = j-JETC, volume = "13", number = "4", pages = "52:1--52:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060578", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Content Addressable Memory (CAM) is widely used in applications where searching a specific pattern of data is a major operation. Conventional CAMs suffer from area, power, and speed limitations. We propose Spin-Torque Transfer RAM--based Ternary CAM (TCAM) cells. The proposed NOR-type TCAM cell has a 62.5\% (33\%) reduction in number of transistor compared to conventional CMOS TCAMs (spintronic TCAMs). We analyzed the sense margin of the proposed TCAM with respect to 16-, 32-, 64-, 128-, and 256-bit word sizes in 22nm predictive technology. Simulations indicated a reliable sense margin of 50mV even at 0.7V supply voltage for 256-bits word. We also explored a selective threshold voltage modulation of transistors to improve the sense margin and tolerate process and voltage variations. The worst-case search latency and sense margin of 256-bit TCAM is found to be 263ps and 220mV, respectively, at 1V supply voltage. The average search power consumed is 13mW, and the search energy is 4.7fJ/bit search. The write time is 4ns, and the write energy is 0.69pJ/bit. We leverage the NOR-type TCAM design to realize a 9T-2 Magnetic Tunnel Junctions NAND-type TCAM cell that has 43.75\% less number of transistors than the conventional CMOS TCAM cell. A NAND-type cell can support up to 64-bit words with a maximum sense margin of up to 33mV. We compare the performance metrics of NOR- and NAND-type TCAM cells with other TCAMs in the literature.", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Peter:2017:OON, author = "Eldhose Peter and Anuj Arora and Janibul Bashir and Akriti Bagaria and Smruti R. Sarangi", title = "Optical Overlay {NUCA}: a High-Speed Substrate for Shared {L2} Caches", journal = j-JETC, volume = "13", number = "4", pages = "53:1--53:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3064833", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we propose using optical networks-on-chip (NoCs) to design cache access protocols for large shared L2 caches. We observe that the problem is unique because optical networks have very low latency, and in principle all of the cache banks are very close to each other. A naive approach is to broadcast a request to a set of banks that might possibly contain the copy of a block. However, this approach is wasteful in terms of energy and bandwidth. Hence, we propose a set of novel schemes that create a set of virtual networks ( overlays ) of cache banks over a physical optical NoC. We search for a block inside each overlay using a combination of multicast and unicast messages. We first propose two simple protocols: TSI and Broadcast. The former uses unicast messages, and the latter uses multicast messages. We subsequently propose an improved scheme, OP\_BCAST, that combines the best of TSI and Broadcast, and mainly uses restricted multicast messages. Then we propose a set of novel hardware structures for creating and managing overlays, for efficiently locating blocks in the overlay, and for implementing dynamically changing overlays with OP\_BCAST. The performance of the TSI scheme is within 2\% to 3\% of a broadcast scheme, and it is faster than traditional schemes with electrical networks by 26\%. Compared to the broadcast scheme, it reduces the number of accesses, and consequently the dynamic energy of the caches by 6\% to 8\%. OP\_BCAST is 34\% faster than the best solutions with copper-based NoCs; moreover, it reduces the dynamic energy for cache access by 33\% compared to the TSI scheme.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Koneru:2017:IEC, author = "Abhishek Koneru and Sukeshwar Kannan and Krishnendu Chakrabarty", title = "Impact of Electrostatic Coupling and Wafer-Bonding Defects on Delay Testing of Monolithic {$3$D} Integrated Circuits", journal = j-JETC, volume = "13", number = "4", pages = "54:1--54:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3041026", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Monolithic three-dimensional (M3D) integration is gaining momentum, as it has the potential to achieve significantly higher device density compared to 3D integration based on through-silicon vias. M3D integration uses several techniques that are not used in the fabrication of conventional integrated circuits (ICs). Therefore, a detailed analysis of the M3D fabrication process is required to understand the impact of defects that are likely to occur during chip fabrication. In this article, we first analyze electrostatic coupling in M3D ICs, which arises due to the aggressive scaling of the interlayer dielectric (ILD) thickness. We then analyze defects that arise due to voids created during wafer bonding, a key step in most M3D fabrication processes. We quantify the impact of these defects on the threshold voltage of a top-layer transistor in an M3D IC. We also show that wafer-bonding defects can lead to a change in the resistance of interlayer vias (ILVs), and in some cases lead to an open in an ILV or a short between two ILVs. We then analyze the impact of these defects on path delays using HSpice simulations. We study their impact on the effectiveness of delay-test patterns for multiple instances of IWLS 2005 benchmarks in which these defects were randomly injected. Our results show that the timing characteristics of an M3D IC can be significantly altered due to coupling and wafer-bonding defects if the thickness of its ILD is less than 100nm. Therefore, for such M3D ICs, test-generation methods must be enhanced to take M3D fabrication defects into account.", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Houshmand:2017:QCS, author = "Mahboobeh Houshmand and Mehdi Sedighi and Morteza Saheb Zamani and Kourosh Marjoei", title = "Quantum Circuit Synthesis Targeting to Improve One-Way Quantum Computation Pattern Cost Metrics", journal = j-JETC, volume = "13", number = "4", pages = "55:1--55:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3064834", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "One-way quantum computation (1WQC) is a model of universal quantum computations in which a specific highly entangled state called a cluster state allows for quantum computation by single-qubit measurements. The needed computations in this model are organized as measurement patterns. The traditional approach to obtain a measurement pattern is by translating a quantum circuit that solely consists of CZ and $ J(\alpha) $ gates into the corresponding measurement patterns and then performing some optimizations by using techniques proposed for the 1WQC model. However, in these cases, the input of the problem is a quantum circuit, not an arbitrary unitary matrix. Therefore, in this article, we focus on the first phase-that is, decomposing a unitary matrix into CZ and $ J(\alpha) $ gates. Two well-known quantum circuit synthesis methods, namely cosine-sine decomposition and quantum Shannon decomposition are considered and then adapted for a library of gates containing CZ and $ J(\alpha) $, equipped with optimizations. By exploring the solution space of the combinations of these two methods in a bottom-up approach of dynamic programming, a multiobjective quantum circuit synthesis method is proposed that generates a set of quantum circuits. This approach attempts to simultaneously improve the measurement pattern cost metrics after the translation from this set of quantum circuits.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yogendra:2017:CST, author = "Karthik Yogendra and Chamika Liyanagedera and Deliang Fan and Yong Shim and Kaushik Roy", title = "Coupled Spin-Torque Nano-Oscillator-Based Computation: a Simulation Study", journal = j-JETC, volume = "13", number = "4", pages = "56:1--56:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3064835", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we present a comprehensive study of four frequency locking mechanisms in Spin Torque Nano Oscillators (STNOs) and explore their suitability for a class of specialized computing applications. We implemented a physical STNO model based on Landau--Lifshitz--Gilbert-Slonczewski equation and benchmarked the model to experimental data. Based on our simulations, we provide an in-depth analysis of how the ``self-organizing'' ability of coupled STNO array can be effectively used for computations that are unsuitable or inefficient in the von-Neumann computing domain. As a case study, we demonstrate the computing ability of coupled STNOs with two applications: edge detection of an image and associative computing for image recognition. We provide an analysis of the scaling trends of STNOs and the effectiveness of different frequency locking mechanisms with scaling in the presence of thermal noise. We also provide an in-depth analysis of the effect of variations on the four locking mechanisms to find the most robust one in the presence of variations.", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Najafi:2017:RAS, author = "M. Hassan Najafi and Peng Li and David J. Lilja and Weikang Qian and Kia Bazargan and Marc Riedel", title = "A Reconfigurable Architecture with Sequential Logic-Based Stochastic Computing", journal = j-JETC, volume = "13", number = "4", pages = "57:1--57:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060537", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Computations based on stochastic bit streams have several advantages compared to deterministic binary radix computations, including low power consumption, low hardware cost, high fault tolerance, and skew tolerance. To take advantage of this computing technique, previous work proposed a combinational logic-based reconfigurable architecture to perform complex arithmetic operations on stochastic streams of bits. The long execution time and the cost of converting between binary and stochastic representations, however, make the stochastic architectures less energy efficient than the deterministic binary implementations. This article introduces a methodology for synthesizing a given target function stochastically using finite-state machines (FSMs), and enhances and extends the reconfigurable architecture using sequential logic. Compared to the previous approach, the proposed reconfigurable architecture can save hardware area and energy consumption by up to 30\% and 40\%, respectively, while achieving a higher processing speed. Both stochastic reconfigurable architectures are much more tolerant of soft errors (bit flips) than the deterministic binary radix implementations, and their fault tolerance scales gracefully to very large numbers of errors.", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chittamuru:2017:SRS, author = "Sai Vineel Reddy Chittamuru and Srinivas Desai and Sudeep Pasricha", title = "{SWIFTNoC}: a Reconfigurable Silicon-Photonic Network with Multicast-Enabled Channel Sharing for Multicore Architectures", journal = j-JETC, volume = "13", number = "4", pages = "58:1--58:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060517", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "On-chip communication is widely considered to be one of the major performance bottlenecks in contemporary chip multiprocessors (CMPs). With recent advances in silicon nanophotonics, photonics-based network-on-chip (NoC) architectures are being considered as a viable solution to support communication in future CMPs as they can enable higher bandwidth and lower power dissipation compared to traditional electrical NoCs. In this article, we present SwiftNoC, a novel reconfigurable silicon-photonic NoC architecture that features improved multicast-enabled channel sharing, as well as dynamic re-prioritization and exchange of bandwidth between clusters of cores running multiple applications, to increase channel utilization and system performance. Experimental results show that SwiftNoC improves throughput by up to $ 25.4 \times $ while reducing latency by up to 72.4\% and energy-per-bit by up to 95\% over state-of-the-art solutions.", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Samal:2017:IPU, author = "Sandeep Kumar Samal and Guoqing Chen and Sung Kyu Lim", title = "Improving Performance under Process and Voltage Variations in Near-Threshold Computing Using {$3$D} {ICs}", journal = j-JETC, volume = "13", number = "4", pages = "59:1--59:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3060579", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Near-threshold computing (NTC) circuits have been shown to offer significant energy efficiency and power benefits but with a huge performance penalty. This performance loss exacerbates if process and voltage variations are considered. In this article, we demonstrate that three-dimensional (3D) IC technology can overcome this limitation. We present a detailed case study with a 28nm commercial-grade core at 0.6V operation optimized with various 3D IC physical design methods. First, our study under the deterministic case shows that 3D IC NTC design outperforms 2D IC NTC by 29.5\% in terms of performance at comparable energy. This is significantly higher than the 12.8\% performance benefit of 3D IC at nominal voltage supplies due to higher delay sensitivity to input slew at lower voltages. Second, it is well demonstrated that transistor delay is more sensitive to voltage changes at NTC operation. However, our full-chip study reveals that IR drop effect on 2D/3D IC NTC performance is not severe due to the low power consumption and hence lower IR drop values. Third, die-to-die variation impact on full-chip performance is visible in 3D IC NTC designs, but it is not worse compared to 2D IC NTC designs. This is mainly due to the shorter critical path length in 3D IC NTC designs.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Jiang:2017:RCC, author = "Honglan Jiang and Cong Liu and Leibo Liu and Fabrizio Lombardi and Jie Han", title = "A Review, Classification, and Comparative Evaluation of Approximate Arithmetic Circuits", journal = j-JETC, volume = "13", number = "4", pages = "60:1--60:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3094124", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Often as the most important arithmetic modules in a processor, adders, multipliers, and dividers determine the performance and energy efficiency of many computing tasks. The demand of higher speed and power efficiency, as well as the feature of error resilience in many applications (e.g., multimedia, recognition, and data analytics), have driven the development of approximate arithmetic design. In this article, a review and classification are presented for the current designs of approximate arithmetic circuits including adders, multipliers, and dividers. A comprehensive and comparative evaluation of their error and circuit characteristics is performed for understanding the features of various designs. By using approximate multipliers and adders, the circuit for an image processing application consumes as little as 47\% of the power and 36\% of the power-delay product of an accurate design while achieving similar image processing quality. Improvements in delay, power, and area are obtained for the detection of differences in images by using approximate dividers.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2017:EEC, author = "Hui Li and S{\'e}bastien {Le Beux} and Martha Johanna Sepulveda and Ian O'Connor", title = "Energy-Efficiency Comparison of Multi-Layer Deposited Nanophotonic Crossbar Interconnects", journal = j-JETC, volume = "13", number = "4", pages = "61:1--61:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3094125", ISSN = "1550-4832", bibdate = "Sat Aug 12 09:05:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/jetc/; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Single-layer optical crossbar interconnections based on Wavelength Division Multiplexing stand among other nanophotonic interconnects because of their low latency and low power. However, such architectures suffer from a poor scalability due to losses induced by long propagation distances on waveguides and waveguide crossings. Multi-layer deposited silicon technology allows the stacking of optical layers that are connected by means of Optical Vertical Couplers. This allows significant reduction in the optical losses, which contributes to improve the interconnect scalability but also leads to new challenges related to network designs and layouts. In this article, we investigate the design of optical crossbars using multi-layer silicon deposited technology. We propose implementations for Ring-, Matrix-, $ \lambda $-router-, and Snake-based topologies. Layouts avoiding waveguide crossings are compared to those minimizing the waveguide length according to worst-case and average losses. The laser output power is estimated from the losses, which allows us to evaluate the energy efficiency improvement induced by multi-layer technology over traditional planar implementations (33\% on average). Finally, networks comparison has been carried out and the results show that the ring topology leads to a 43\% reduction in the laser output power.", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Gala:2018:ATN, author = "Neel Gala and Sarada Krithivasan and Wei-Yu Tsai and Xueqing Li and Vijaykrishnan Narayanan and V. Kamakoti", title = "An Accuracy Tunable Non-{Boolean} Co-Processor Using Coupled Nano-Oscillators", journal = j-JETC, volume = "14", number = "1", pages = "1:1--1:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3094263", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As we enter an era witnessing the closer end of Dennard scaling, where further reduction in power supply-voltage to reduce power consumption becomes more challenging in conventional systems, a goal of developing a system capable of performing large computations with minimal area and power overheads needs more optimization aspects. A rigorous exploration of alternate computing techniques, which can mitigate the limitations of Complementary Metal-Oxide Semiconductor (CMOS) technology scaling and conventional Boolean systems, is imperative. Reflecting on these lines of thought, in this article we explore the potential of non-Boolean computing employing nano-oscillators for performing varied functions. We use a two coupled nano-oscillator as our basic computational model and propose an architecture for a non-Boolean coupled oscillator based co-processor capable of executing certain functions that are commonly used across a variety of approximate application domains. The proposed architecture includes an accuracy tunable knob, which can be tuned by the programmer at runtime. The functionality of the proposed co-processor is verified using a soft coupled oscillator model based on Kuramoto oscillators. The article also demonstrates how real-world applications such as Vector Quantization, Digit Recognition, Structural Health Monitoring, and the like, can be deployed on the proposed model. The proposed co-processor architecture is generic in nature and can be implemented using any of the existing modern day nano-oscillator technologies such as Resonant Body Transistors (RBTs), Spin-Torque Nano-Oscillators (STNOs), and Metal-Insulator Transition (MITs). In this article, we perform a validation of the proposed architecture using the HyperField Effect Transistor (FET) technology-based coupled oscillators, which provide improvements of up to $ 3.5 \times $ increase in clock speed and up to $ 10.75 \times $ and $ 14.12 \times $ reduction in area and power consumption, respectively, as compared to a conventional Boolean CMOS accelerator executing the same functions.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Uddin:2018:DCM, author = "Mesbah Uddin and MD. Badruddoja Majumder and Karsten Beckmann and Harika Manem and Zahiruddin Alamgir and Nathaniel C. Cady and Garrett S. Rose", title = "Design Considerations for Memristive Crossbar Physical Unclonable Functions", journal = j-JETC, volume = "14", number = "1", pages = "2:1--2:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3094414", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Hardware security has emerged as a field concerned with issues such as integrated circuit (IC) counterfeiting, cloning, piracy, and reverse engineering. Physical unclonable functions (PUF) are hardware security primitives useful for mitigating such issues by providing hardware-specific fingerprints based on intrinsic process variations within individual IC implementations. As technology scaling progresses further into the nanometer region, emerging nanoelectronic technologies, such as memristors or RRAMs (resistive random-access memory), have become interesting options for emerging computing systems. In this article, using a comprehensive temperature dependent model of an HfO$_x$ (hafnium-oxide) memristor, based on experimental measurements, we explore the best region of operation for a memristive crossbar PUF (XbarPUF). The design considered also employs XORing and a column shuffling technique to improve reliability and resilience to machine learning attacks. We present a detailed analysis for the noise margin and discuss the scalability of the XbarPUF structure. Finally, we present results for estimates of area, power, and delay alongside security performance metrics to analyze the strengths and weaknesses of the XbarPUF. Our XbarPUF exhibits nearly ideal (near 50\%) uniqueness, bit-aliasing and uniformity, good reliability of 90\% and up (with 100\% being ideal), a very small footprint, and low average power consumption $ \approx 104 \mu $W.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yu:2018:SOF, author = "Ye Yu and Niraj K. Jha", title = "Statistical Optimization of {FinFET} Processor Architectures under {PVT} Variations Using Dual Device-Type Assignment", journal = j-JETC, volume = "14", number = "1", pages = "3:1--3:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3110714", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "With semiconductor technology scaling to the 22nm node and beyond, fin field-effect transistor (FinFET) has started replacing complementary metal-oxide semiconductor (CMOS), thanks to its superior control of short-channel effects and much lower leakage current. However, process, supply voltage, and temperature (PVT) variations across the integrated circuit (IC) become worse with technology scaling. Thus, to analyze timing, leakage power, and dynamic power under PVT variations, statistical analysis/optimization techniques are more suitable than traditional static timing/power analysis and optimization counterparts. In this article, we propose a statistical optimization framework using dual device-type assignment at the architecture level under PVT variations that takes spatial correlations into account and leverages circuit-level statistical analysis techniques. To the best of our knowledge, this is the first work to study statistical optimization at the system level under PVT variations. Simulation results show that leakage power yield and dynamic power yield at the mean value of the baseline can be improved by up to 44.2\% and 21.2\%, respectively, with no loss in timing yield for a single-core processor and up to 43.0\% and 50.0\%, respectively, without any loss in timing yield for an 8-core chip multiprocessor (CMP), at little area overhead. Under the same (99.0\%) power yield constraints, leakage power and dynamic power are reduced by up to 91.2\% and 4.3\%, respectively, for a single-core processor, and up to 44.6\% and 12.5\%, respectively, for an 8-core CMP, with no loss in timing yield. We also show that optimizations performed without taking module-to-module and core-to-core spatial correlations into account overestimate yield, establishing the importance of taking such correlations into account.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Hajkazemi:2018:HHM, author = "Mohammad Hossein Hajkazemi and Mohammad Khavari Tavana and Tinoosh Mohsenin and Houman Homayoun", title = "Heterogeneous {HMC + DDRx} Memory Management for Performance-Temperature Tradeoffs", journal = j-JETC, volume = "14", number = "1", pages = "4:1--4:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3106233", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Three-dimensional DRAMs (3D-DRAMs) are emerging as a promising solution to address the memory wall problem in computer systems. However, high fabrication cost per bit and thermal issues are the main reasons that prevent architects from using 3D-DRAM alone as the main memory building block. In this article, we address this issue by proposing a heterogeneous memory system that combines a double data rate (DDRx) DRAM with an emerging 3D hybrid memory cube (HMC) technology. Bandwidth and temperature management are the challenging issues for this heterogeneous memory architecture. To address these challenges, first we introduce a memory page allocation policy for the heterogeneous memory system to maximize performance. Then, using the proposed policy, we introduce a temperature-aware algorithm that dynamically distributes the requested bandwidth between HMC and DDRx DRAM to reduce the thermal hotspot while maintaining high performance. We take into account the impact of both core count and HMC channel count on performance while using the proposed policies. The results show that the proposed memory page allocation policy can utilize the memory bandwidth close to 99\% of the ideal bandwidth utilization. Moreover, our temperate-aware bandwidth adaptation reduces the average steady-state temperature of the HMC hotspot across various workloads by 4.5 K while incurring 2.5\% performance overhead.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bhattacharjee:2018:RFT, author = "Sukanta Bhattacharjee and Debasis Mitra and Bhargab B. Bhattacharya", title = "Robust In-Field Testing of Digital Microfluidic Biochips", journal = j-JETC, volume = "14", number = "1", pages = "5:1--5:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3123586", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Microfluidic technology offers vast promise for implementing biochemistry-on-chip with diverse applications to clinical diagnosis, genome analysis, drug design, and point-of-care testing. Among various types of fluid-chips, droplet-based digital microfluidic biochips (DMFBs), which consist of a patterned array of controllable electrodes, provide the advantage of programmability, ease of fluidic operations, and versatile droplet mobility. However, because of manufacturing or field defects, electrode degradation, or dielectric breakdown, these chips may suffer from incorrect fluidic behavior. Reliability of fluidic operations is of utmost concern in DMFBs that are used to perform safety-critical bio-protocols. Various methods are deployed to test these devices, either offline or being overlapped with bioassay operations (termed as concurrent or in-field testing). The main challenge of in-field testing lies in the fact that the test must run concurrently with the execution of the normal assay without hampering the correctness of the latter. In prior work, optimal testing for droplet mobility over all electrodes was formulated in terms of finding either a Hamiltonian path or a Eulerian path in an undirected graph that represents the electrode-adjacency structure. Although these models have been studied for offline testing, no such effort was made in the area of concurrent testing. In this work, we propose, for in-field application, an SAT-based modeling and solution approach to find an optimal test plan that can be used to check droplet movement across the boundary between every pair of adjacent electrodes, which is visited by the droplets of the ongoing assay. The proposed method is robust and determines a test solution successfully regardless of the cover assay that is being executed concurrently. Experiments on several real-life assays and other test cases demonstrate the effectiveness of the method with respect to test completion time.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yang:2018:IAC, author = "Xiaokun Yang and Wujie Wen and Ming Fan", title = "Improving {AES} Core Performance via an Advanced {ASBUS} Protocol", journal = j-JETC, volume = "14", number = "1", pages = "6:1--6:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3110713", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Security is becoming a de-facto requirement of System-on-Chips (SoC), leading up to a significant share of circuit design cost. In this article, we propose an advanced SBUS protocol (ASBUS), to improve the data feeding efficiency of the Advanced Encryption Standard (AES) encrypted circuits. As a case study, the direct memory access (DMA) combined with AES engine and memory controller are implemented as our design-under-test (DUT) using field-programmable gate arrays (FPGA). The results show that our presented ASBUS structure outperforms the AXI-based design for cipher tests. As an example, the 32-bit ASBUS design costs less in terms of hardware resources and achieves higher throughput ($ 1.30 \times $) than the 32-bit AXI implementation, and the dynamic energy consumed by the ASBUS cipher test is reduced to 71.27\% compared with the AXI test.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Oneal:2018:RCS, author = "Kenneth O'neal and Daniel Grissom and Philip Brisk", title = "Resource-Constrained Scheduling for Digital Microfluidic Biochips", journal = j-JETC, volume = "14", number = "1", pages = "7:1--7:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3093930", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Digital microfluidics based on electrowetting-on-dielectric technology is poised to revolutionize many aspects of chemistry and biochemistry through miniaturization, automation, and software programmability. Digital microfluidic biochips (DMFBs) offer ample spatial parallelism, which is then exposed to the compiler. The first problem that a DMFB compiler must solve is resource-constrained scheduling, which is NP-complete. If the compiler is applied off-line, then long-running algorithms that produce solutions of high quality, such as iterative improvement or branch-and-bound search, can be applied; in an online context, where a biochemical reaction is to be executed as soon as it is specified by the programmer, heuristics that sacrifice solution quality to attain a fast runtime are used. This article describes in detail the algorithms and heuristics that have been proposed for resource-constrained scheduling, focusing on several recent contributions: path scheduling and force-directed list scheduling. It also discusses shortcomings and limitations of existing optimal scheduling problem formulations based on Integer Linear Programming and presents an updated formulation that addresses these issues. The algorithms are compared and evaluated on an extensive benchmark suite of biochemical assays used for applications, such as in vitro diagnostics, protein crystallization, and automated sample preparation.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Motaman:2018:IPV, author = "Seyedhamidreza Motaman and Swaroop Ghosh and Jaydeep Kulkarni", title = "Impact of Process Variation on Self-Reference Sensing Scheme and Adaptive Current Modulation for Robust {STTRAM} Sensing", journal = j-JETC, volume = "14", number = "1", pages = "8:1--8:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3132577", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spin-Transfer-Torque RAM (STTRAM) is a promising technology for high-density on-chip cache due to low standby power and high speed. However, the process variation of the Magnetic Tunnel Junction (MTJ) and access transistor poses a serious challenge to sensing. Nondestructive sensing suffers from reference resistance variation, whereas destructive sensing suffers from failures due to unoptimized selection of data and reference currents. Furthermore, the sense speed is tightly coupled with the reference/data current requirement. In this work, we study the process variation effect on a self-reference sensing scheme to eliminate bit-to-bit process variation in MTJ resistance. Read current modulation is proposed to overcome the failures due to process variation. Simulation results reveal $ < 0.01 \% $ failures at the cost of 9ns sense time and 190$ \mu $W power consumption.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Catania:2018:IEE, author = "Vincenzo Catania and Andrea Mineo and Salvatore Monteleone and Maurizio Palesi and Davide Patti", title = "Improving Energy Efficiency in Wireless Network-on-Chip Architectures", journal = j-JETC, volume = "14", number = "1", pages = "9:1--9:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3138807", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Wireless Network-on-Chip (WiNoC) represents a promising emerging communication technology for addressing the scalability limitations of future manycore architectures. In a WiNoC, high-latency and power-hungry long-range multi-hop communications can be realized by performance- and energy-efficient single-hop wireless communications. However, the energy contribution of such wireless communication accounts for a significant fraction of the overall communication energy budget. This article presents a novel energy managing technique for WiNoC architectures aimed at improving the energy efficiency of the main elements of the wireless infrastructure, namely, radio-hubs. The rationale behind the proposed technique is based on selectively turning off, for the appropriate number of cycles, all the radio-hubs that are not involved in the current wireless communication. The proposed energy managing technique is assessed on several network configurations under different traffic scenarios both synthetic and extracted from the execution of real applications. The obtained results show that the application of the proposed technique allows up to 25\% total communication energy saving without any impact on performance and with a negligible impact on the silicon area of the radio-hub.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2018:ELC, author = "Bohua Li and Yukui Pei and Wujie Wen", title = "Efficient {LDPC} Code Design for Combating Asymmetric Errors in {STT-RAM}", journal = j-JETC, volume = "14", number = "1", pages = "10:1--10:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154836", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Spin-transfer torque random access memory (STT-RAM) is a promising emerging memory technology in the future memory hierarchy. However, its unique reliability challenges, i.e., the asymmetric bit failure mechanism at different bit flippings, have raised significant concerns in its real applications. Recent studies even show that the common memory error repair ``remedies'' cannot efficiently address them. In this article, we for the first time systematically study the potentials of the strong low-density parity-check (LDPC) code for combating such unique asymmetric errors in both single-level-cell (SLC) and multi-level-cell (MLC) STT-RAM designs. A generic STT-RAM channel model suitable for the SLC/MLC designs, is developed to analytically calibrate all the accumulated asymmetric factors of the write/read operations. The key initial information for LDPC decoding, namely asymmetric log-likelihood ratio (A-LLR), is redesigned and extracted from the proposed channel model, to unleash the LDPC's asymmetric error correcting capability. LDPC codec is also carefully designed to lower the hardware cost by leveraging the systematic-structured parity check matrix. Then two customized short-length LDPC codes-(585,512) and (683,512)-augmented from the semi-random parity check matrix and the A-LLR based asymmetric decoding, are proposed for SLC and MLC STT-RAM designs, respectively. Experiments show that our proposed LDPC designs can improve the STT-RAM reliability by at least 10$^2$ (10$^4$ ) when compared to the existing error correction codes (ECCs) for the SLC (MLC) design, demonstrating the feasibility of LDPC solutions on STT-RAM.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2018:OAE, author = "Yu Liu and Yingyezhe Jin and Peng Li", title = "Online Adaptation and Energy Minimization for Hardware Recurrent Spiking Neural Networks", journal = j-JETC, volume = "14", number = "1", pages = "11:1--11:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3145479", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The Liquid State Machine (LSM) is a promising model of recurrent spiking neural networks that provides an appealing brain-inspired computing paradigm for machine-learning applications such as pattern recognition. Moreover, processing information directly on spiking events makes the LSM well suited for cost- and energy-efficient hardware implementation. In this article, we systematically present three techniques for optimizing energy efficiency while maintaining good performance of the proposed LSM neural processors from both an algorithmic and hardware implementation point of view. First, to realize adaptive LSM neural processors, thus boost learning performance, we propose a hardware-friendly Spike-Timing Dependent Plastic (STDP) mechanism for on-chip tuning. Then, the LSM processor incorporates a novel runtime correlation-based neuron gating scheme to minimize the power dissipated by reservoir neurons. Furthermore, an activity-dependent clock gating approach is presented to address the energy inefficiency due to the memory-intensive nature of the proposed neural processors. Using two different real-world tasks of speech and image recognition to benchmark, we demonstrate that the proposed architecture boosts the average learning performance by up to 2.0\% while reducing energy dissipation by up to 29\% compared to a baseline LSM with little extra hardware overhead on a Xilinx Virtex-6 FPGA.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Grani:2018:SPS, author = "Paolo Grani and Sandro Bartolini", title = "Scalable Path-Setup Scheme for All-Optical Dynamic Circuit Switched {NoCs} in Cache Coherent {CMPs}", journal = j-JETC, volume = "14", number = "1", pages = "12:1--12:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154840", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nanophotonics is a promising solution for on-chip interconnection due to its intrinsic low-latency and low-power features, which can be useful for performance and energy in future Chip Multi-Processors (CMPs). This article proposes a novel arbitrated all-optical path-setup scheme for tiled CMPs adopting circuit-switched optical networks. It aims at significantly reducing path-setup latency and overall energy consumption. The proposed arbitrated scheme is able to configure multiple photonic switches simultaneously, instead of sequentially as it is done in state-of-the-art proposals. The proposed fast optical path-setup solution reduces the overhead in each transmission and, most importantly, allows optical circuit-switched networks to effectively serve cache coherence traffic, which is mainly composed of relatively small messages. Specifically, we propose a single-arbiter scheme where the whole topology is managed by a central module (single-arbiter) that takes care of the path-setup procedures. Then, to tackle scalability, we propose a logically clustered architecture (multi-arbiter) in which an arbiter is allocated in each logical core-cluster and an ad hoc distributed reservation protocol coordinates arbiters to manage inter-cluster path reservations. We show that our proposed single-arbiter architecture outperforms a state-of-the-art optical network with sequential path-setup (optical baseline) in the case of 8- and 16-core tiled CMP setups. However, due to serialization issues, the single-arbiter solution is not able to compete with a reference electronic baseline for bigger 32- and 64-core setups even if still performing much better than the optical baseline. Conversely, our multi-arbiter hierarchical solution allows us to improve performance up to almost 20\% and 40\% for 32- and 64-core setups, respectively, demonstrating a wide applicability of the proposed technique. Energy-wise, the analyzed solutions enable significant savings compared to both the optical baseline with sequential path setup, and to the electronic counterpart. Specifically, results show more than 25\% average improvement for the single-arbiter in the 8- and 16-core cases, and more than 40\% and 15\% savings for the multi-arbiter in the 32- and 64-core cases, respectively.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{VanRynbach:2018:QCP, author = "Andre {Van Rynbach} and Muhammad Ahsan and Jungsang Kim", title = "A Quantum Computing Performance Simulator Based on Circuit Failure Probability and Fault Path Counting", journal = j-JETC, volume = "14", number = "1", pages = "13:1--13:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154837", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum computing performance simulators are needed to provide practical metrics for the effectiveness of executing theoretical quantum information processing protocols on physical hardware. In this work, we present a tool to simulate the execution of fault-tolerant quantum computation by automating the tracking of common fault paths for error propagation through an encoded circuit block and quantifying the failure probability of each encoded qubit throughout the circuit. Our simulator runs a fault path counter on encoded circuit blocks to determine the probability that two or more errors remain on the encoded qubits after each block is executed, and it combines errors from all the encoded blocks to estimate performance metrics such as the logical qubit failure probability, the overall circuit failure probability, the number of qubits used, and the time required to run the overall circuit. Our technique efficiently estimates the upper bound of the error probability and provides a useful measure of the error threshold at low error probabilities where conventional Monte Carlo methods are ineffective. We describe a way of simplifying the fault-tolerant measurement process in the Steane code to reduce the number of error correction steps necessary. We present simulation results comparing the execution of quantum adders, which constitute a major part of Shor's algorithm.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cao:2018:GEI, author = "Yu Cao and Xin Li and Jae-Sun Seo and Ganesh Dasika", title = "{Guest Editors}' Introduction: Frontiers of Hardware and Algorithms for On-chip Learning", journal = j-JETC, volume = "14", number = "2", pages = "14:1--14:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3205944", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Kim:2018:DNN, author = "Hyungjun Kim and Taesu Kim and Jinseok Kim and Jae-Joon Kim", title = "Deep Neural Network Optimized to Resistive Memory with Nonlinear Current-Voltage Characteristics", journal = j-JETC, volume = "14", number = "2", pages = "15:1--15:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3145478", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Artificial Neural Network computation relies on intensive vector-matrix multiplications. Recently, the emerging nonvolatile memory (NVM) crossbar array showed a feasibility of implementing such operations with high energy efficiency. Thus, there have been many works on efficiently utilizing emerging NVM crossbar arrays as analog vector-matrix multipliers. However, nonlinear I-V characteristics of NVM restrain critical design parameters, such as the read voltage and weight range, resulting in substantial accuracy loss. In this article, instead of optimizing hardware parameters to a given neural network, we propose a methodology of reconstructing the neural network itself to be optimized to resistive memory crossbar arrays. To verify the validity of the proposed method, we simulated various neural networks with MNIST and CIFAR-10 dataset using two different Resistive Random Access Memory models. Simulation results show that our proposed neural network produces inference accuracies significantly higher than conventional neural network when the network is mapped to synapse devices with nonlinear I-V characteristics.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Sarwar:2018:EEN, author = "Syed Shakib Sarwar and Swagath Venkataramani and Aayush Ankit and Anand Raghunathan and Kaushik Roy", title = "Energy-Efficient Neural Computing with Approximate Multipliers", journal = j-JETC, volume = "14", number = "2", pages = "16:1--16:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3097264", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Neural networks, with their remarkable ability to derive meaning from a large volume of complicated or imprecise data, can be used to extract patterns and detect trends that are too complex for the von Neumann computing paradigm. Their considerable computational requirements stretch the capabilities of even modern computing platforms. We propose an approximate multiplier that exploits the inherent application resilience to error and utilizes the notion of computation sharing to achieve improved energy consumption for neural networks. We also propose a Multiplier-less Artificial Neuron (MAN), which is even more compact and energy efficient. We also propose a network retraining methodology to recover some of the accuracy loss due to the use of these approximate multipliers. We evaluated the proposed algorithm/design on several recognition applications. The results show that we achieve $ \approx $33\%, $ \approx $32\%, and $ \approx $25\% reduction in power consumption and $ \approx $33\%, $ \approx $34\%, and $ \approx $27\% reduction in area, respectively, for 12-, 8-, and 4-bit MAN, with a maximum $ \approx $2.4\% loss in accuracy compared to a conventional neuron implementation of equivalent bit precision. These comparisons were performed under iso-speed conditions.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ko:2018:RTL, author = "Glenn G. Ko and Rob A. Rutenbar", title = "Real-Time and Low-Power Streaming Source Separation Using {Markov} Random Field", journal = j-JETC, volume = "14", number = "2", pages = "17:1--17:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3183351", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Machine learning (ML) has revolutionized a wide range of recognition tasks, ranging from text analysis to speech to vision, most notably in cloud deployments. However, mobile deployment of these ideas involves a very different category of design problems. In this article, we develop a hardware architecture for a sound source separation task, intended for deployment on a mobile phone. We focus on a novel Markov random field (MRF) sound source separation algorithm that uses expectation-maximization and Gibbs sampling to learn MRF parameters on the fly and infer the best separation of sources. The intrinsically iterative algorithm suggests challenges for both speed and power. A real-time streaming FPGA implementation runs at 150MHz with 207KB RAM, achieves a speed-up of $ 22 \times $ over a software reference, performs with an SDR of up to 7.021dB with 1.601ms latency, and exhibits excellent perceived audio quality. A 45nm CMOS ASIC virtual prototype simulated at 20MHz shows that this architecture is small ({$<$10} million gates) and consumes only 70mW, which is less than 2\% of the power of an ARM Cortex-A9 software version. To the best of our knowledge, this is the first Gibbs sampling inference accelerator designed in conventional FPGA/ASIC technology that targets a realistic mobile perceptual application.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2018:GOF, author = "Yixing Li and Zichuan Liu and Kai Xu and Hao Yu and Fengbo Ren", title = "A {GPU}-Outperforming {FPGA} Accelerator Architecture for Binary Convolutional Neural Networks", journal = j-JETC, volume = "14", number = "2", pages = "18:1--18:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154839", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "FPGA-based hardware accelerators for convolutional neural networks (CNNs) have received attention due to their higher energy efficiency than GPUs. However, it is challenging for FPGA-based solutions to achieve a higher throughput than GPU counterparts. In this article, we demonstrate that FPGA acceleration can be a superior solution in terms of both throughput and energy efficiency when a CNN is trained with binary constraints on weights and activations. Specifically, we propose an optimized fully mapped FPGA accelerator architecture tailored for bitwise convolution and normalization that features massive spatial parallelism with deep pipelines stages. A key advantage of the FPGA accelerator is that its performance is insensitive to data batch size, while the performance of GPU acceleration varies largely depending on the batch size of the data. Experiment results show that the proposed accelerator architecture for binary CNNs running on a Virtex-7 FPGA is $ 8.3 \times $ faster and $ 75 \times $ more energy-efficient than a Titan X GPU for processing online individual requests in small batch sizes. For processing static data in large batch sizes, the proposed solution is on a par with a Titan X GPU in terms of throughput while delivering $ 9.5 \times $ higher energy efficiency.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Potok:2018:SCD, author = "Thomas E. Potok and Catherine Schuman and Steven Young and Robert Patton and Federico Spedalieri and Jeremy Liu and Ke-Thia Yao and Garrett Rose and Gangotree Chakma", title = "A Study of Complex Deep Learning Networks on High-Performance, Neuromorphic, and Quantum Computers", journal = j-JETC, volume = "14", number = "2", pages = "19:1--19:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178454", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Current deep learning approaches have been very successful using convolutional neural networks trained on large graphical-processing-unit-based computers. Three limitations of this approach are that (1) they are based on a simple layered network topology, i.e., highly connected layers, without intra-layer connections; (2) the networks are manually configured to achieve optimal results, and (3) the implementation of the network model is expensive in both cost and power. In this article, we evaluate deep learning models using three different computing architectures to address these problems: quantum computing to train complex topologies, high performance computing to automatically determine network topology, and neuromorphic computing for a low-power hardware implementation. We use the MNIST dataset for our experiment, due to input size limitations of current quantum computers. Our results show the feasibility of using the three architectures in tandem to address the above deep learning limitations. We show that a quantum computer can find high quality values of intra-layer connection weights in a tractable time as the complexity of the network increases, a high performance computer can find optimal layer-based topologies, and a neuromorphic computer can represent the complex topology and weights derived from the other architectures in low power memristive hardware.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2018:SPC, author = "Jiang Xu and Yuichi Nakamura and Andrew Kahng", title = "Silicon Photonics for Computing Systems", journal = j-JETC, volume = "14", number = "2", pages = "20:1--20:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3208198", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhang:2018:LBT, author = "Zhe Zhang and Yaoyao Ye", title = "A Learning-Based Thermal-Sensitive Power Optimization Approach for Optical {NoCs}", journal = j-JETC, volume = "14", number = "2", pages = "21:1--21:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173468", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Optical networks-on-chip (NoCs) based on silicon photonics have been proposed as emerging on-chip communication architectures for chip multiprocessors with large core counts. However, due to the thermal sensitivity of optical devices used in optical NoCs, on-chip temperature variations cause significant thermal-induced optical power loss, which would counteract the power advantages of optical NoCs. To tackle this problem, in this work, we propose a learning-based thermal-sensitive power optimization approach for mesh- or torus-based optical NoCs in the presence of temperature variations. The key techniques proposed include an initial device-setting and thermal-tuning mechanism that is a device-level optimization technique, and a learning-based thermal-sensitive adaptive routing algorithm that is a network-level optimization technique. Simulation results of an 8x8 mesh-based optical NoC show that the proposed initial device-setting and thermal-tuning mechanism confines the worst-case thermal-induced optical energy consumption to be on the order of tens of pJ/bit, by avoiding significant thermal-induced optical power loss caused by temperature-dependent wavelength shifts. Besides, it shows that the learning-based thermal-sensitive adaptive routing algorithm is able to find an optimal path with the minimum estimated thermal-induced optical power consumption for each communication pair. The proposed routing has a greater space for optimization, especially for applications with more long-distance traffic.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2018:PVT, author = "Yi Xu and Jun Yang and Rami Melhem", title = "A Process-Variation-Tolerant Method for Nanophotonic On-Chip Network", journal = j-JETC, volume = "14", number = "2", pages = "22:1--22:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3208073", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Nanophotonic networks, a potential candidate for future networks on-chip, have been challenged for their reliability due to several device-level limitations. One of the main issues is that fabrication errors (a.k.a. process variations) can cause devices to malfunction, rendering communication unreliable. For example, the microring resonator, a preferred optical modulator device, may not resonate at the designated wavelength under process variations (PVs), leading to communication errors and bandwidth loss. This article proposes a series of solutions to the wavelength drifting problem of microrings and subsequent bandwidth loss problem of an optical network, due to PVs. The objective is to maximize network bandwidth through proper arrangement among microrings and wavelengths with minimum power requirements. Our arrangement, called ``MinTrim,'' solves this problem using simple integer linear programming, adding supplementary microrings, and allowing flexible assignment of wavelengths to network nodes as long as the resulting network presents maximal bandwidth. Each step is shown to improve bandwidth provisioning with lower power requirements. Evaluations on a sample network show that a baseline network could lose more than 40\% bandwidth due to PVs. Such loss can be recovered by MinTrim to produce a network with 98.4\% working bandwidth. In addition, the power required for arranging microrings is 39\% lower than the baseline. Therefore, MinTrim provides an efficient PV-tolerant solution to improving the reliability of on-chip photonics.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Fusella:2018:RPC, author = "Edoardo Fusella and Alessandro Cilardo", title = "Reducing Power Consumption of Lasers in Photonic {NoCs} through Application-Specific Mapping", journal = j-JETC, volume = "14", number = "2", pages = "23:1--23:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173463", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "To face the complex communication problems that arise as the number of on-chip components grows up, photonic networks-on-chip (NoCs) have been recently proposed to replace electronic interconnects. However, photonic NoCs lack efficient laser sources, possibly resulting in an inefficient or inoperable architecture. In this article, we introduce a methodology for the design space exploration of optical NoC mapping solutions, which automatically assigns IPs/cores to the network tiles such that the laser power consumption is minimized. The experimental evaluation shows average reductions of 34.7\% and 27.3\% in the power consumption compared to, respectively, application-oblivious and randomly mapped photonic NoCs, allowing improved energy efficiency.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Luo:2018:OOW, author = "Jiating Luo and Cedric Killian and Sebastien {Le Beux} and Daniel Chillet and Olivier Sentieys and Ian O'Connor", title = "Offline Optimization of Wavelength Allocation and Laser Power in Nanophotonic Interconnects", journal = j-JETC, volume = "14", number = "2", pages = "24:1--24:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178453", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Optical Network-on-Chip (ONoC) is a promising communication medium for large-scale multiprocessor systems-on-chips. Indeed, ONoC can outperform classical electrical NoCs in terms of energy efficiency and bandwidth density, in particular, because this medium can support multiple transactions at the same time on different wavelengths by using Wavelength Division Multiplexing (WDM). However, multiple signals sharing simultaneously the same part of a waveguide can lead to inter-channel crosstalk noise. This problem impacts the signal-to-noise ratio of the optical signals, which leads to an increase in the Bit Error Rate (BER) at the receiver side. If a specific BER is targeted, an increase of laser power should be necessary to satisfy the SNR. In this context, an important issue is to evaluate the laser power needed to satisfy the various desired communication bandwidths based on the BER performance requirements. In this article, we propose an off-line approach that concurrently optimizes the laser power scaling and execution time of a global application. A set of different levels of power is introduced for each laser, to ensure that optical signals can be emitted with just-enough power to ensure targeted BER. As a result, most promising solutions are highlighted for mapping a defined application onto a 16-core ring-based WDM ONoC.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Vanwinkle:2018:SSH, author = "Scott Vanwinkle and Avinash Karanth Kodi", title = "{SHARP}: Shared Heterogeneous Architecture with Reconfigurable Photonic Network-on-Chip", journal = j-JETC, volume = "14", number = "2", pages = "25:1--25:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3185383", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As the relentless quest for higher throughput and lower energy cost continues in heterogeneous multicores, there is a strong demand for energy-efficient and high-performance Network-on-Chip (NoC) architectures. Heterogeneous architectures that can simultaneously utilize both the serialized nature of the CPU as well as the thread level parallelism of the GPU are gaining traction in the industry. A critical issue with heterogeneous architectures is finding an optimal way to utilize the shared resources such as the last level cache and NoC without hindering the performance of either the CPU or the GPU core. Photonic interconnects are a disruptive technology solution that has the potential to increase the bandwidth, reduce latency, and improve energy-efficiency over traditional metallic interconnects. In this article, we propose a CPU-GPU heterogeneous architecture called Shared Heterogeneous Architecture with Reconfigurable Photonic Network-on-Chip (SHARP) that clusters CPU and GPU cores around the same router and dynamically allocates bandwidth between the CPU and GPU cores based on application demands. The SHARP architecture is designed as a Single-Writer Multiple-Reader (SWMR) crossbar with reservation-assist to connect CPU/GPU cores that dynamically reallocates bandwidth using buffer utilization information at runtime. As network traffic exhibits temporal and spatial fluctuations due to application behavior, SHARP can dynamically reallocate bandwidth and thereby adapt to application demands. SHARP demonstrates 34\% performance (throughput) improvement over a baseline electrical CMESH while consuming 25\% less energy per bit. Simulation results have also shown 6.9\% to 14.9\% performance improvement over other flavors of the proposed SHARP architecture without dynamic bandwidth allocation.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ishihara:2018:INP, author = "Tohru Ishihara and Akihiko Shinya and Koji Inoue and Kengo Nozaki and Masaya Notomi", title = "An Integrated Nanophotonic Parallel Adder", journal = j-JETC, volume = "14", number = "2", pages = "26:1--26:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178452", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Integrated optical circuits with nanophotonic devices have attracted significant attention due to their low power dissipation and light-speed operation. With light interference and resonance phenomena, the nanophotonic device works as a voltage-controlled optical pass-gate like a pass-transistor. This article first introduces the concept of optical pass-gate logic and then proposes a parallel adder circuit based on optical pass-gate logic. Experimental results obtained with an optoelectronic circuit simulator show the advantages of our optical parallel adder circuit over a traditional CMOS-based parallel adder circuit.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2018:IHS, author = "Shi Xu and Zhang Luo and Mingche Lai and Zhengbin Pang and Renfa Li", title = "Integrated High-Speed Optical {SerDes} over {100GBd} Based on Optical Time Division Multiplexing", journal = j-JETC, volume = "14", number = "2", pages = "27:1--27:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154838", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "An on-chip optical transceiver for transmission system over 100GBd is proposed based on optical time division multiplexing (OTDM) technology, and the performances, such as the insertion loss, the inter-symbol interference (ISI) crosstalk, and the potential symbol rate, are analyzed in detail. Co-designed with the double rail driver, on-chip Mach-Zehnder interferometer switch repeatedly generates extremely narrow sampling pulses of only 12ps full width at half maximum. Based on such narrow optical sampling pulse train, a four-stage cascaded optical switch divides the 25GHz clock cycle into four recurrent 9.5ps time slots and one blank time slot of 2ps. Thus, a 100GBd optical transmission channel is realized based on 4-bit 25Gbps bit-streams at the electrical interface. The ISI extinction ratio at the worst channel is 1.9dB with 10dB depth modulator, and the insertion loss caused by the OTDM mechanism is about 16dB. Further, taking advantages of dark modulation, an OTDM system with 5-bit 25Gbps bit-streams at the electrical interface is proposed to generate a 125GBd transmission utilizing the same optical sampling pulse. The ISI performance is much better and the extinction ratio at the worst channel is enhanced to 3.99dB.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Alsuwaiyan:2018:MMT, author = "Ali Alsuwaiyan and Kartik Mohanram", title = "{MFNW}: an {MLC\slash TLC} Flip-N-Write Architecture", journal = j-JETC, volume = "14", number = "2", pages = "28:1--28:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154841", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The increased capacity of multi-level cells (MLC) and triple-level cells (TLC) in emerging non-volatile memory (NVM) technologies comes at the cost of higher cell write energies and lower cell endurance. In this article, we describe MFNW, a Flip-N-Write encoding that effectively reduces the write energy and improves the endurance of MLC NVMs. Two MFNW modes are analyzed: cell Hamming distance mode and energy Hamming distance mode. We derive an approximate model that accurately predicts the average number of cell writes that is proportional to the energy consumption, enabling word length optimization to maximize energy reduction subject to memory space overhead constraints. In comparison to state-of-the-art MLC NVM encodings, our simulation results indicate that MFNW achieves up to 7\%--39\% saving for 1.56\%--50\% NVM space overhead. Extra energy saving (up to 19\%--47\%) can be achieved for the same NVM space overhead using our proposed variations of MFNW, i.e., MFNW2 and MFNW3. For TLC NVMs, we propose TFNW that can achieve up to 53\% energy saving in comparison to state-of-the-art TLC NVM encodings. Endurance simulations indicate that MFNW (TFNW) is capable of extending MLC (TLC) NVM life by up to 100\% (87\%).", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chen:2018:CLA, author = "Shuai Chen and Junlin Chen and Lei Wang", title = "A Chip-Level Anti-Reverse Engineering Technique", journal = j-JETC, volume = "14", number = "2", pages = "29:1--29:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173462", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Protection of intellectual property (IP) is increasingly critical for IP vendors in the semiconductor industry. However, advanced reverse engineering techniques can physically disassemble the chip and derive the IPs at a much lower cost than the value of IP design that chips carry. This invasive hardware attack-obtaining information from IC chips-always violates the IP rights of vendors. The intent of this article is to present a chip-level reverse engineering resilient design technique. In the proposed technique, transformable interconnects enable an IC chip to maintain functioning in normal use and to transform its physical structure into another pattern when exposed to invasive attacks. The newly created pattern will significantly increase the difficulty of reverse engineering. Furthermore, to improve the effectiveness of the proposed technique, a systematic design method is developed targeting integrated circuits with multiple design constraints. Simulations have been conducted to demonstrate the capability of the proposed technique, which generates extremely large complexity for reverse engineering with manageable overhead.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bhattacharjee:2018:KSA, author = "Debjyoti Bhattacharjee and Anne Siemon and Eike Linn and Stephan Menzel and Anupam Chattopadhyay", title = "{Kogge--Stone} Adder Realization using {1S1R} Resistive Switching Crossbar Arrays", journal = j-JETC, volume = "14", number = "2", pages = "30:1--30:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3183352", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Low operating voltage, high storage density, non-volatile storage capabilities, and relative low access latencies have popularized memristive devices as storage devices. Memristors can be ideally used for in-memory computing in the form of hybrid CMOS nano-crossbar arrays. In-memory serial adders have been theoretically and experimentally proven for crossbar arrays. To harness the parallelism of memristive arrays, parallel-prefix adders can be effective. In this work, a novel mapping scheme for in-memory Kogge-Stone adder has been presented. The number of cycles increases logarithmically with the bit width N of the operands, i.e., O ( log$_2$ N ), and the device count is 5 N. We verify the correctness of the proposed scheme by means of TaO$_x$ device model-based memristive simulations. We compare the proposed scheme with other proposed schemes in terms of number of cycle and number of devices.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Neugebauer:2018:FQM, author = "Florian Neugebauer and Ilia Polian and John P. Hayes", title = "Framework for Quantifying and Managing Accuracy in Stochastic Circuit Design", journal = j-JETC, volume = "14", number = "2", pages = "31:1--31:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3183345", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:40 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Stochastic circuits (SCs) offer considerable area- and power-consumption benefits in various applications at the expense of computational inaccuracies. Unlike conventional logic synthesis, managing accuracy is a central problem in SC design. It is usually tackled in ad hoc fashion by multiple trial-and-error simulations that vary relevant parameters like the stochastic number length n. We present, for the first time, a systematic design approach to controlling the accuracy of SCs and balancing it against other design parameters. We express the (in)accuracy of a circuit processing n -bit stochastic numbers by the numerical deviation of the computed value from the expected result, in conjunction with a confidence level. Using the theory of Monte Carlo simulation, we derive expressions for the stochastic number length required for a desired level of accuracy or vice versa. We discuss the integration of the theory into a design framework that is applicable to both combinational and sequential SCs. We show that for combinational SCs, accuracy is independent of the circuit's size or complexity, a surprising result. We also show how the analysis can identify subtle errors in both combinational and sequential designs. Finally, we apply the proposed methods to a case study on filtering noisy EKG signals.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lee:2018:DSE, author = "Dongjin Lee and Sourav Das and Dae Hyun Kim and Janardhan Rao Doppa and Partha Pratim Pande", title = "Design Space Exploration of {$3$D} Network-on-Chip: a Sensitivity-based Optimization Approach", journal = j-JETC, volume = "14", number = "3", pages = "32:1--32:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3197567", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "High-performance and energy-efficient Network-on-Chip (NoC) architecture is one of the crucial components of the manycore processing platforms. A very promising NoC architecture recently proposed in the literature is the three-dimensional small-world NoC (3D SWNoC). Due to short vertical links in 3D integration and the robustness of small-world networks, the 3D SWNoC architecture outperforms its other 3D counterparts. However, the performance of 3D SWNoC is highly dependent on the placement of the links and associated routers. In this article, we propose a sensitivity-based link placement algorithm (SEN) to optimize the performance of 3D SWNoC. The sensitivity of a link in a NoC measures the importance of the link. The SEN algorithm optimizes the performance of 3D SWNoC by calculating the sensitivities of all the links in the NoC and removing the least important link repeatedly. We compare the performance of SEN algorithm with simulated annealing- (SA) and recently proposed machine-learning-based (ML) optimization algorithm. The optimized 3D SWNoC obtained by the proposed SEN algorithm achieves, on average, 11.5\% and 13.6\% lower latency and 18.4\% and 21.7\% lower energy-delay product than those optimized by the SA and ML algorithms respectively. In addition, the SEN algorithm is 26 to 33 times faster than the SA algorithm for the optimization of 64-, 128-, and 256-core 3D SWNoC designs. The performance gain provided by the SEN-, SA-, and ML-based methods also depend on the characteristics of the benchmarks under consideration. If the traffic pattern generated by a benchmark does not have enough variation, then the ML-based method does not have adequate opportunity to optimize the network. However, we find that ML-based methodology has faster convergence time than SEN and SA for bigger systems. The ML-based optimization algorithm is almost 4 and 97 times faster than the SEN- and SA-based algorithm for a system with 256 cores.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cui:2018:HTD, author = "Xiaotong Cui and Elnaz Koopahi and Kaijie Wu and Ramesh Karri", title = "Hardware {Trojan} Detection Using the Order of Path Delay", journal = j-JETC, volume = "14", number = "3", pages = "33:1--33:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3229050", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Many fabrication-less design houses are outsourcing their designs to third-party foundries for fabrication to lower cost. This IC development process, however, raises serious security concerns on Hardware Trojans (HTs). Many design-for-trust techniques have been proposed to detect HTs through observing erroneous output or abnormal side-channel characteristics. Side-channel characteristics such as path delay have been widely used for HT detection and functionality verification, as the changes of the characteristics of the host circuit incurred by the inserted HT can be identified through proper methods. In this article, for the first time, we propose a two-phase technique, which uses the order of the path delay in path pairs to detect HTs. In the design phase, a full-cover path set that covers all the nets of the design is generated; meanwhile, in the set, the relative order of paths in path pairs is determined according to their delay. The order of the paths in path pairs serves as the fingerprint of the design. In the test phase, the actual delay of the paths in the full-cover set is extracted from the fabricated circuits, and the order of paths in path pairs is compared with the fingerprint generated in the design phase. A mismatch between them indicates the existence of HTs. Both process variations and measurement noise are taken into consideration. The efficiency and accuracy of the proposed technique are confirmed by a series of experiments, including the examination of both violated path pairs incurred by HTs and their false alarm rate.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lu:2018:RHM, author = "Guan-Ruei Lu and Ansuman Banerjee and Bhargab B. Bhattacharya and Tsung-Yi Ho and Hung-Ming Chen", title = "Reliability Hardening Mechanisms in Cyber-Physical Digital-Microfluidic Biochips", journal = j-JETC, volume = "14", number = "3", pages = "34:1--34:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3229052", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In the area of biomedical engineering, digital-microfluidic biochips (DMFBs) have received considerable attention because of their capability of providing an efficient and reliable platform for conducting point-of-care clinical diagnostics. System reliability, in turn, mandates error-recoverability while implementing biochemical assays on-chip for medical applications. Unfortunately, the technology of DMFBs is not yet fully equipped to handle error-recovery from various microfluidic operations involving droplet motion and reaction. Recently, a number of cyber-physical systems have been proposed to provide real-time checking and error-recovery in assays based on the feedback received from a few on-chip checkpoints. However, to synthesize robust feedback systems for different types of DMFBs, certain practical issues need to be considered such as co-optimization of checkpoint placement, error-recoverability, and layout of droplet-routing pathways. For application-specific DMFBs, we propose here an algorithm that minimizes the number of checkpoints and determines their locations to cover every path in a given droplet-routing solution. Next, for general-purpose DMFBs, where the checkpoints are pre-deployed in specific locations, we present a checkpoint-aware routing algorithm such that every droplet-routing path passes through at least one checkpoint to enable error-recovery and to ensure physical routability of all droplets. Furthermore, we also propose strategies for executing the algorithms in reliable mode to enhance error-recoverability. The proposed methods thus provide reliability-hardening mechanisms for a wide class of cyber-physical DMFBs.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Parveen:2018:IEE, author = "Farhana Parveen and Shaahin Angizi and Deliang Fan", title = "{IMFlexCom}: Energy Efficient In-Memory Flexible Computing Using Dual-Mode {SOT-MRAM}", journal = j-JETC, volume = "14", number = "3", pages = "35:1--35:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3223047", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In this article, we propose an In-Memory Flexible Computing platform (IMFlexCom) using a novel Spin Orbit Torque Magnetic Random Access Memory (SOT-MRAM) array architecture, which could work in dual mode: memory mode and computing mode. Such intrinsic in-memory logic (AND/OR/XOR) could be used to process data within memory to greatly reduce power-hungry and long distance massive data communication in conventional Von Neumann computing systems. A comprehensive reliability analysis is performed, which confirms $ \approx $90mV and $ \approx $10mV (worst-case) sense margin for memory and in-memory logic operation in variations on resistance-area product and tunnel magnetoresistance. We further show that sense margin for in-memory logic computation can be significantly increased by increasing the oxide thickness. Furthermore, we employ bulk bitwise vector operation and data encryption engine as case studies to investigate the performance of our proposed design. IMFlexCom shows $ \approx 35 \times $ energy saving and $ \approx 18 \times $ speedup for bulk bitwise in-memory vector AND/OR operation compared to DRAM-based in-memory logic. Again, IMFlexCom can achieve 77.27\% and 85.4\% lower energy consumption compared to CMOS-ASIC- and CMOL-based Advanced Encryption Standard (AES) implementations, respectively. It offers almost similar energy consumption as recent DW-AES implementation with 66.7\% less area overhead.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Munoz-Coreas:2018:CQO, author = "Edgard Mu{\~n}oz-Coreas and Himanshu Thapliyal", title = "{T}-count and Qubit Optimized Quantum Circuit Design of the Non-Restoring Square Root Algorithm", journal = j-JETC, volume = "14", number = "3", pages = "36:1--36:15", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3264816", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/elefunt.bib; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Quantum circuits for basic mathematical functions such as the square root are required to implement scientific computing algorithms on quantum computers. Quantum circuits that are based on Clifford+T gates can easily be made fault tolerant, but the T gate is very costly to implement. As a result, reducing T-count has become an important optimization goal. Further, quantum circuits with many qubits are difficult to realize, making designs that save qubits and produce no garbage outputs desirable. In this work, we present a T-count optimized quantum square root circuit with only 2 s n + 1 qubits and no garbage output. To make a fair comparison against existing work, the Bennett's garbage removal scheme is used to remove garbage output from existing works. We determined that our proposed design achieves an average T-count savings of 43.44\%, 98.95\%, 41.06\%, and 20.28\% as well as qubit savings of 85.46\%, 95.16\%, 90.59\%, and 86.77\% compared to existing works.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Alqahtani:2018:SLA, author = "Ayed Alqahtani and Zongqing Ren and Jaeho Lee and Nader Bagherzadeh", title = "System-Level Analysis of {$3$D} {ICs} with Thermal {TSVs}", journal = j-JETC, volume = "14", number = "3", pages = "37:1--37:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3264736", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "3D stacking of integrated circuits (ICs) provides significant advantages in saving device footprints, improving power management, and continuing performance enhancement, particularly for many-core systems. However, the stacked structure makes the heat dissipation a challenging issue. While Thermal Through Silicon Via (TTSV) is a promising way of lowering the thermal resistance of dies, past research has either overestimated or underestimated the effects of TTSVs as a consequence of the lack of detailed 3D IC models or system-level simulations. Here, we propose a simulation flow to accurately simulate TTSV effects on 3D ICs. We adopt benchmarks from Splash-2 running on a full-system mode of the gem5 simulator, which generates all the system component activities. McPAT is used to generate the corresponding power consumption and the power traces are fed to HotSpot for thermal simulation. The temperature profiles of 2D and 3D Nehalem-like x86 processors are compared. TTSVs are later placed close to hotspot regions to facilitate heat dissipation; the peak temperature of 3D Nehalem is reduced by 5--25\% with a small area overhead of 6\%. By using a detailed 3D thermal model, full-system simulation, and a validated thermal simulator, our results show accurate thermal analysis of 3D ICs.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Athreyas:2018:MCA, author = "Nihar Athreyas and Wenhao Song and Blair Perot and Qiangfei Xia and Abbie Mathew and Jai Gupta and Dev Gupta and J. Joshua Yang", title = "Memristor-{CMOS} Analog Coprocessor for Acceleration of High-Performance Computing Applications", journal = j-JETC, volume = "14", number = "3", pages = "38:1--38:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3269985", ISSN = "1550-4832", bibdate = "Thu Nov 1 16:44:41 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib", abstract = "Vector matrix multiplication computation underlies major applications in machine vision, deep learning, and scientific simulation. These applications require high computational speed and are run on platforms that are size, weight, and power constrained. With the transistor scaling coming to an end, existing digital hardware architectures will not be able to meet this increasing demand. Analog computation with its rich set of primitives and inherent parallel architecture can be faster, more efficient, and compact for some of these applications. One such primitive is a memristor-CMOS crossbar array-based vector matrix multiplication. In this article, we develop a memristor-CMOS analog coprocessor architecture that can handle floating-point computation. To demonstrate the working of the analog coprocessor at a system level, we use a new electronic design automation tool called PSpice Systems Option, which performs integrated cosimulation of MATLAB/Simulink and PSpice. It is shown that the analog coprocessor has a superior performance when compared to other processors, and a speedup of up to $ 12 \times $ when compared to projected GPU performance is observed. Using the new PSpice Systems Option tool, various application simulations for image processing and solutions to partial differential equations are performed on the analog coprocessor model.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Anonymous:2018:GEI, author = "Anonymous", title = "{Guest Editor} Introduction: Neuromorphic Computing", journal = j-JETC, volume = "14", number = "4", pages = "39:1--39:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3283217", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3283217", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Hamilton:2018:SHE, author = "Kathleen E. Hamilton and Neena Imam and Travis S. Humble", title = "Sparse Hardware Embedding of Spiking Neuron Systems for Community Detection", journal = j-JETC, volume = "14", number = "4", pages = "40:1--40:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3223048", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3223048", abstract = "We study the applicability of spiking neural networks and neuromorphic hardware for solving general opti- mization problems without the use of adaptive training or learning algorithms. We leverage the dynamics of Hopfield networks and spin-glass systems to construct a fully connected spiking neural system to generate synchronous spike responses indicative of the underlying community structure in an undirected, unweighted graph. Mapping this fully connected system to current generation neuromorphic hardware is done by embedding sparse tree graphs to generate only the leading-order spiking dynamics. We demonstrate that for a chosen set of benchmark graphs, the spike responses generated on a current generation neuromorphic processor can improve the stability of graph partitions and non-overlapping communities can be identified even with the loss of higher-order spiking behavior if the graphs are sufficiently dense. For sparse graphs, the loss of higher-order spiking behavior improves the stability of certain graph partitions but does not retrieve the known community memberships.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bontupalli:2018:EMB, author = "Venkataramesh Bontupalli and Chris Yakopcic and Raqibul Hasan and Tarek M. Taha", title = "Efficient Memristor-Based Architecture for Intrusion Detection and High-Speed Packet Classification", journal = j-JETC, volume = "14", number = "4", pages = "41:1--41:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3264819", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3264819", abstract = "Deep packet inspection (DPI) is a critical component to prevent intrusion detection. This requires a detailed analysis of each network packet header and body. Although this is often done on dedicated high-power servers in most networked systems, mobile systems could potentially be vulnerable to attack if utilized on an unprotected network. In this case, having DPI hardware on the mobile system would be highly beneficial. Unfortunately, DPI hardware is generally area and power consuming, making its implementation difficult in mobile systems. We developed a memristor crossbar-based approach, inspired by memristor crossbar neuromorphic circuits, for a low-power, low-area, and high-throughput DPI system that examines both the header and body of a packet. Two key types of circuits are presented: static pattern matching and regular expression circuits. This system is able to reduce execution time and power consumption due to its high-density grid and massive parallelism. Independent searches are performed using low-power memristor crossbar arrays giving rise to a throughput of 160Gbps with no loss in the classification accuracy.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Chang:2018:PPA, author = "Kyungwook Chang and Deepak Kadetotad and Yu Cao and Jae-Sun Seo and Sung Kyu Lim", title = "Power, Performance, and Area Benefit of Monolithic {$3$D} {ICs} for On-Chip Deep Neural Networks Targeting Speech Recognition", journal = j-JETC, volume = "14", number = "4", pages = "42:1--42:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3273956", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3273956", abstract = "In recent years, deep learning has become widespread for various real-world recognition tasks. In addition to recognition accuracy, energy efficiency and speed (i.e., performance) are other grand challenges to enable local intelligence in edge devices. In this article, we investigate the adoption of monolithic three-dimensional (3D) IC (M3D) technology for deep learning hardware design, using speech recognition as a test vehicle. M3D has recently proven to be one of the leading contenders to address the power, performance, and area (PPA) scaling challenges in advanced technology nodes. Our study encompasses the influence of key parameters in DNN hardware implementations towards their performance and energy efficiency, including DNN architectural choices, underlying workloads, and tier partitioning choices in M3D designs. Our post-layout M3D designs, together with hardware-efficient sparse algorithms, produce power savings and performance improvement beyond what can be achieved using conventional 2D ICs. Experimental results show that M3D offers 22.3\% iso-performance power saving and 6.2\% performance improvement, convincingly demonstrating its entitlement as a solution for DNN ASICs. We further present architectural and physical design guidelines for M3D DNNs to maximize the benefits.", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zyarah:2018:STM, author = "Abdullah M. Zyarah and Dhireesha Kudithipudi", title = "Semi-Trained Memristive Crossbar Computing Engine with In Situ Learning Accelerator", journal = j-JETC, volume = "14", number = "4", pages = "43:1--43:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233987", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3233987", abstract = "On-device intelligence is gaining significant attention recently as it offers local data processing and low power consumption. In this research, an on-device training circuitry for threshold-current memristors integrated in a crossbar structure is proposed. Furthermore, alternate approaches of mapping the synaptic weights into fully trained and semi-trained crossbars are investigated. In a semi-trained crossbar, a confined subset of memristors are tuned and the remaining subset of memristors are not programmed. This translates to optimal resource utilization and power consumption, compared to a fully programmed crossbar. The semi-trained crossbar architecture is applicable to a broad class of neural networks. System level verification is performed with an extreme learning machine for binomial and multinomial classification. The total power for a single $ 4 \times 4 $ layer network, when implemented in IBM 65nm node, is estimated to be $ \approx 42.16 \mu $ W and the area is estimated to be $ 26.48 \mu $ m $ \times 22.35 \mu $ m.", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Srinivasan:2018:SBU, author = "Gopalakrishnan Srinivasan and Priyadarshini Panda and Kaushik Roy", title = "{STDP}-based Unsupervised Feature Learning using Convolution-over-time in Spiking Neural Networks for Energy-Efficient Neuromorphic Computing", journal = j-JETC, volume = "14", number = "4", pages = "44:1--44:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3266229", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3266229", abstract = "Brain-inspired learning models attempt to mimic the computations performed in the neurons and synapses constituting the human brain to achieve its efficiency in cognitive tasks. In this work, we propose Spike Timing Dependent Plasticity-based unsupervised feature learning using convolution-over-time in Spiking Neural Network (SNN). We use shared weight kernels that are convolved with the input patterns over time to encode representative input features, thereby improving the sparsity as well as the robustness of the learning model. We show that the Convolutional SNN self-learns several visual categories for object recognition with limited number of training patterns while yielding comparable classification accuracy relative to the fully connected SNN. Further, we quantify the energy benefits of the Convolutional SNN over fully connected SNN on neuromorphic hardware implementation.", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bai:2018:DEE, author = "Kangjun Bai and Yang Yi", title = "{DFR}: an Energy-efficient Analog Delay Feedback Reservoir Computing System for Brain-inspired Computing", journal = j-JETC, volume = "14", number = "4", pages = "45:1--45:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3264659", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Neuromorphic computing, which is built on a brain-inspired silicon chip, is uniquely applied to keep pace with the explosive escalation of algorithms and data density on machine learning. Reservoir computing, an emerging computing paradigm based on the recurrent neural network with proven benefits across multifaceted applications, offers an alternative training mechanism only at the readout stage. In this work, we successfully design and fabricate an energy-efficient analog delayed feedback reservoir (DFR) computing system, which is built upon a temporal encoding scheme, a nonlinear transfer function, and a dynamic delayed feedback loop. Measurement results demonstrate its high energy efficiency with rich dynamic behaviors, making the designed system a candidate for low power embedded applications. The system performance, as well as the robustness, are studied and analyzed through the Monte Carlo simulation. The chaotic time series prediction benchmark, NARMA10, is examined through the proposed DFR computing system, and exhibits a 36\%--85\% reduction on the error rate compared to state-of-the-art DFR computing system designs. To the best of our knowledge, our work represents the first analog integrated circuit (IC) implementation of the DFR computing system.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Loomis:2018:FIT, author = "Lisa Loomis and Nathan McDonald and Cory Merkel", title = "An {FPGA} Implementation of a Time Delay Reservoir Using Stochastic Logic", journal = j-JETC, volume = "14", number = "4", pages = "46:1--46:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3269984", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents and demonstrates a stochastic logic time delay reservoir design in FPGA hardware. The reservoir network approach is analyzed using a number of metrics, such as kernel quality, generalization rank, and performance on simple benchmarks and is also compared to a deterministic design. A novel re-seeding method is introduced to reduce the adverse effects of stochastic noise, which may also be implemented in other stochastic logic reservoir computing designs, such as echo state networks. Benchmark results indicate that the proposed design performs well on noise-tolerant classification problems, but more work needs to be done to improve the stochastic logic time delay reservoir's robustness for regression problems. In addition, we show that the stochastic design can significantly reduce area cost if the conversion between binary and stochastic representations is implemented efficiently.", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2018:MLO, author = "Zhongyang Liu and Shaoheng Luo and Xiaowei Xu and Yiyu Shi and Cheng Zhuo", title = "A Multi-Level-Optimization Framework for {FPGA}-Based Cellular Neural Network Implementation", journal = j-JETC, volume = "14", number = "4", pages = "47:1--47:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3273957", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3273957", abstract = "Cellular Neural Network (CeNN) is considered as a powerful paradigm for embedded devices. Its analog and mix-signal hardware implementations are proved to be applicable to high-speed image processing, video analysis, and medical signal processing with its efficiency and popularity limited by smaller implementation size and lower precision. Recently, digital implementations of CeNNs on FPGA have attracted researchers from both academia and industry due to its high flexibility and short time-to-market. However, most existing implementations are not well optimized to fully utilize the advantages of FPGA platform with unnecessary design and computational redundancy that prevents speedup. We propose a multi-level-optimization framework for energy-efficient CeNN implementations on FPGAs. In particular, the optimization framework is featured with three level optimizations: system-, module-, and design-space-level, with focus on computational redundancy and attainable performance, respectively. Experimental results show that with various configurations our framework can achieve an energy-efficiency improvement of $ 3.54 \times $ and up to $ 3.88 \times $ speedup compared with existing implementations with similar accuracy.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2018:EHI, author = "Xiaowei Xu and Qing Lu and Tianchen Wang and Yu Hu and Chen Zhuo and Jinglan Liu and Yiyu Shi", title = "Efficient Hardware Implementation of Cellular Neural Networks with Incremental Quantization and Early Exit", journal = j-JETC, volume = "14", number = "4", pages = "48:1--48:??", month = dec, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3264817", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Cellular neural networks (CeNNs) have been widely adopted in image processing tasks. Recently, various hardware implementations of CeNNs have emerged in the literature, with Field Programmable Gate Array (FPGA) being one of the most popular choices due to its high flexibility and low time-to-market. However, CeNNs typically involve extensive computations in a recursive manner. As an example, to simply process an image of 1,920 $ \times $ 1,080 pixels requires 4--8 Giga floating point multiplications (for $ 3 \times 3 $ templates and 50--100 iterations), which needs to be done in a timely manner for real-time applications. To address this issue, in this article, we propose a compressed CeNN framework for efficient FPGA implementations. It involves various techniques, such as incremental quantization and early exit, which significantly reduces computation demands while maintaining an acceptable performance. Particularly, incremental quantization quantizes the numbers in CeNN templates to powers of two, so that complex and expensive multiplications can be converted to simple and cheap shift operations, which only require a minimum number of registers and logical elements (LEs). While a similar concept has been explored in hardware implementations of Convolutional Neural Networks (CNNs), CeNNs have completely different computation patterns, which require different quantization and implementation strategies. Experimental results on FPGAs show that incremental quantization and early exit can achieve a speedup of up to $ 7.8 \times $ and $ 8.3 \times $, respectively, compared with the state-of-the-art implementations, while with almost no performance loss with four widely adopted applications. We also discover that different from CNNs, the optimal quantization strategies of CeNNs depend heavily on the applications. We hope that our work can serve as a pioneer in the hardware optimization of CeNNs.", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Fusella:2019:GEI, author = "Edoardo Fusella and Mahdi Nikdast and Ian O'Connor and Jos{\'e} Flich and Sudeep Pasricha", title = "{Guest Editors}' Introduction: Emerging Networks-on-Chip Designs, Technologies, and Applications", journal = j-JETC, volume = "15", number = "1", pages = "1:1--1:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3296021", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Dridi:2019:DMA, author = "Mourad Dridi and St{\'e}phane Rubini and Mounir Lallali and Martha Johanna Sep{\'u}lveda Fl{\'o}rez and Frank Singhoff and Jean-Philippe Diguet", title = "Design and Multi-Abstraction-Level Evaluation of a {NoC} Router for Mixed-Criticality Real-Time Systems", journal = j-JETC, volume = "15", number = "1", pages = "2:1--2:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3264818", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "A Mixed Criticality System (MCS) combines real-time software tasks with different criticality levels. In a MCS, the criticality level specifies the level of assurance against system failure. For high-critical flows of messages, it is imperative to meet deadlines; otherwise, the whole system might fail, leading to catastrophic results, like loss of life or serious damage to the environment. In contrast, low-critical flows may tolerate some delays. Furthermore, in MCS, flow performances such as the Worst Case Communication Time (WCCT) may vary depending on the criticality level of the applications. Then execution platforms must provide different operating modes for applications with different levels of criticality. To conclude, in Network-On-Chip (NoC), sharing resources between communication flows can lead to unpredictable latencies and subsequently turns the implementation of MCS in many-core architectures challenging. In this article, we propose and evaluate a new NoC router to support MCS based on an accurate WCCT analysis for high-critical flows. The proposed router, called Double Arbiter and Switching router (DAS), jointly uses Wormhole and Store And Forward communication techniques for low- and high-critical flows, respectively. It ensures that high-critical flows meet their deadlines while maximizing the bandwidth remaining for the low-critical flows. We also propose a new method for high-critical communication time analysis, applied to Store And Forward switching mode with virtual channels. For low-critical flows communication time analysis, we adapt an existing wormhole communication time analysis with share policy to our context. The second contribution of this article is a multi-abstraction-level evaluation of DAS. We evaluate the communication time of flows, the system mode change, the cost, and four properties of DAS. Simulations with a cycle-accurate SystemC NoC simulator show that, with a 15\% network use rate, the communication delay of high-critical flows is reduced by 80\% while communication delay of low-critical flow is increased by 18\% compared to solutions based on routers with multiple virtual channels. For 10\% of network interferences, using system mode change, DAS reduces the high-critical communication delays about 66\%. We synthesize our router with a 28nm SOI technology and show that the size overhead is limited of 2.5\% compared to the solution based on virtual channel router. Finally, we applied model checking verification techniques to automatically prove several DAS properties required by critical systems designers.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Slijepcevic:2019:TRW, author = "Mladen Slijepcevic and Carles Hernandez and Jaume Abella and Francisco J. Cazorla", title = "Time-Randomized Wormhole {NoCs} for Critical Applications", journal = j-JETC, volume = "15", number = "1", pages = "3:1--3:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3281029", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Wormhole-based NoCs (wNoCs) are widely accepted in high-performance domains as the most appropriate solution to interconnect an increasing number of cores in the chip. However, wNoCs suitability in the context of critical real-time applications has not been demonstrated yet. In this article, in the context of probabilistic timing analysis (PTA), we propose a PTA-compatible wNoC design that provides tight time-composable contention bounds. The proposed wNoC design builds on PTA ability to reason in probabilistic terms about hardware events impacting execution time (e.g., wNoC contention), discarding those sequences of events occurring with a negligible low probability. This allows our wNoC design to deliver improved guaranteed performance w.r.t. conventional time-deterministic setups. Our results show that performance guarantees of applications running on top of probabilistic wNoC designs improve by 40\% and 93\% on average for $ 4 \times 4 $ and $ 6 \times 6 $ wNoC setups, respectively.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Louri:2019:LHS, author = "Ahmed Louri and Jacques Collet and Avinash Karanth", title = "Limit of Hardware Solutions for Self-Protecting Fault-Tolerant {NoCs}", journal = j-JETC, volume = "15", number = "1", pages = "4:1--4:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3233986", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We study the ultimate limits of hardware solutions for the self-protection strategies against permanent faults in networks on chips (NoCs). NoCs reliability is improved by replacing each base router by an augmented router which includes extra protection circuitry. We compare the protection achieved by the self-test and self-protect (STAP) architectures to that of triple modular redundancy with voting (TMR). Two STAP architectures are considered. In the first one, a defective router self-disconnects from the network, while it self-heals in the second one. In practice, none of the considered architectures (STAP or TMR) can tolerate all the permanent faults, especially faults in the extra-circuitry for protection or voting, and consequently, there will always be some unidentified defective augmented routers which are going to transmit errors in an unpredictable manner. This study consists of tackling this fundamental problem. Specifically, we study and determine the average percentage of {$<$ underline$>$ residual$<$}/{underline$>$} unidentified defective routers (UDRs) and their impact on the overall reliability of the NoC in light of self-protection strategies. Our study shows that TMR is the most efficient solution to limit the average percentage of UDRs when there are typically less than a 0.1 percent of defective base routers. However, TMR is also the most cost prohibitive and the least power efficient. Above 1\% of defective base routers, the STAP approaches are more efficient although the protection efficiency decreases inexorably in the very defective technologies (e.g. when there is 10\% or more of defective base routers). For instance, if the chip includes 10\% of defective base routers, our study shows that there will remain on the average 1\% of UDRs, which causes a major challenge for NoC reliability.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bhanu:2019:FTN, author = "P. Veda Bhanu and Pranav Venkatesh Kulkarni and Soumya J.", title = "Fault-Tolerant Network-on-Chip Design with Flexible Spare Core Placement", journal = j-JETC, volume = "15", number = "1", pages = "5:1--5:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3269983", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Network-on-Chip (NoC) has been proposed as a promising solution to overcome the communication challenges of System-on-Chip (SoC) design in nanoscale technologies. With the advancement in the nanoscale technology, the integration density of Intellectual Property (IP) cores in a single chip have increased, leading to heat dissipation, which in turn makes the system unreliable. Therefore, efficient fault-tolerant methods are necessary at different levels to improve overall system performance and make the system to operate normally. This article presents a flexible spare core placement technique for mesh-based NoC by taking several benchmark applications into consideration. An Integer Linear Programming (ILP)-based solution has been proposed for the spare core placement problem. Also, Particle Swarm Optimisation (PSO)-based meta-heuristic has been proposed for the same. Experiments have been performed by taking several application benchmarks reported in the literature and the applications generated using the TGFF tool. Comparisons have been carried out using our approach and the approach followed in the literature (i) by varying the network size with fixed fault percentage in the network, and (ii) by fixing the network size while varying the percentage of faults in the network. We have also compared the overall communication cost and CPU runtime between ILP and PSO approaches. The results show significant reductions in the overall communication cost, average network latency, and network power consumption across all the cases using our approach over the approaches reported in the literature.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Manna:2019:TAT, author = "Kanchan Manna and Chatla Swami Sagar and Santanu Chattopadhyay and Indranil Sengupta", title = "Thermal-aware Test Scheduling Strategy for Network-on-Chip based Systems", journal = j-JETC, volume = "15", number = "1", pages = "6:1--6:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3241050", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Rapid progress in technology scaling has introduced massive parallel computing systems with multiple cores on the integrated circuit (IC), in which a flexible and scalable packet-switched architecture, Network-on-Chip (NoC), is commonly used for communication among the cores. However, technology scaling has also increased the susceptibility to internal defects in such systems. So, manufacturing tests of such multicore systems is crucial and this is a complex and time-consuming process. Due to stress on time-to-market, test engineers focus on the reduction of testtime and perform parallel tests of cores. Due to aggressive technology scaling into the nanometer regime, power consumption is also becoming a significant burden. Moreover, power consumption during manufacturing tests is more as compared to normal operation. In addition, peak power consumption is often significantly higher than the average power values. The consumed power leads to high temperature and creates hotspots, which in turn leads to failure of good parts, resulting in yield loss. Thermal safety during testing is an utmost challenging problem in NoC-based multicore systems, including three-dimensional NoC-based (3D NoC) multicore systems due to stacking of layers. This work proposes a preemptive test scheduling technique for NoC-based multicore systems to reduce the testtime by minimizing conflicts of resource usage. The preemptive test scheduling problem has been formulated using Integer Linear Programming (ILP). In this article, authors have also presented a thermal-aware test scheduling technique to test cores in 2D as well as 3D stacked NoC-based multicore systems using a Particle Swarm Optimization (PSO) based approach. To improve the solution further, several innovative augmentation techniques have been incorporated in the basic PSO. Experimental results highlight the effectiveness of the proposed method in reducing testtime and peak temperature under the power constraints and achieve a tradeoff between testtime and peak temperature.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Ved:2019:PPA, author = "Sneha N. Ved and Sarabjeet Singh and Joycee Mekie", title = "{PANE}: Pluggable Asynchronous Network-on-Chip Simulator", journal = j-JETC, volume = "15", number = "1", pages = "7:1--7:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3241051", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Communication between different IP cores in MPSoCs and HMPs often results in clock domain crossing. Asynchronous network on chip (NoC) support communication in such heterogeneous set-ups. While there are a large number of tools to model NoCs for synchronous systems, there is very limited tool support to model communication for multi-clock domain NoCs and analyse them. In this article, we propose the P luggable A synchronous NE twork on Chip (PANE) simulator, which allows system-level simulation of asynchronous network on chip (NoC). PANE allows design space exploration of synchronous, asynchronous, and mixed synchronous-asynchronous(heterogeneous) NoC for various system-level NoC parameters such as packet latencies, throughput, network saturation point and power analysis. PANE supports a large range of NoC configurations-routing algorithms, topologies, network sizes, and so on-for both synthetic and real traffic patterns. We demonstrate the application of PANE by using synchronous routers, asynchronous routers, and a mix of asynchronous and synchronous routers. One of the key advantages of PANE is that it allows a seamless transition from synchronous to asynchronous NoC simulators while keeping pace with the developments in synchronous NoC tools as they can be integrated with PANE.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bashir:2019:BSO, author = "Janibul Bashir and Eldhose Peter and Smruti R. Sarangi", title = "{BigBus}: a Scalable Optical Interconnect", journal = j-JETC, volume = "15", number = "1", pages = "8:1--8:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289391", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents BigBus, a novel design of an on-chip photonic network for a 1,024-node system. For such a large on-chip network, performance and power reduction are two mutually conflicting goals. This article uses a combination of strategies to reduce static power consumption while simultaneously improving performance and the energy-delay$^2$ ( ED$^2$ ) product. The crux of the article is to segment the entire system into smaller clusters of nodes and adopt a hybrid strategy for each segment that includes conventional laser modulation, as well as a novel technique for sharing power across nodes dynamically. We represent energy internally as tokens, where one token will allow a node to send a message to any other node in its cluster. We allow optical stations to arbitrate for tokens at a global level, and then we predict the number of token equivalents of power that the off-chip laser needs to generate. Using these techniques, BigBus outperforms other competing proposals. We demonstrate a speedup of 14--34\% over state of the art proposals and a 20--61\% reduction in ED$^2$.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Xu:2019:GGP, author = "Zhen Xu and Xuhao Chen and Jie Shen and Yang Zhang and Cheng Chen and Canqun Yang", title = "{GARDENIA}: a Graph Processing Benchmark Suite for Next-Generation Accelerators", journal = j-JETC, volume = "15", number = "1", pages = "9:1--9:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3283450", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents the Graph Algorithm Repository for Designing Next-generation Accelerators (GARDENIA), a benchmark suite for studying irregular graph algorithms on massively parallel accelerators. Applications with limited control and data irregularity are the main focus of existing generic benchmarks for accelerators, while available graph processing benchmarks do not apply state-of-the-art algorithms and/or optimization techniques. GARDENIA includes emerging graph processing workloads from graph analytics, sparse linear algebra, and machine-learning domains, which mimic massively multithreaded commercial programs running on modern large-scale datacenters. Our characterization shows that GARDENIA exhibits irregular microarchitectural behavior, which is quite different from structured workloads and straightforward-implemented graph benchmarks.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Yoon:2019:SLC, author = "Su-Kyung Yoon and Young-Sun Youn and Bernd Burgstaller and Shin-Dug Kim", title = "Self-learnable Cluster-based Prefetching Method for {DRAM}-Flash Hybrid Main Memory Architecture", journal = j-JETC, volume = "15", number = "1", pages = "10:1--10:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3284932", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "This article presents a novel prefetching mechanism for memory-intensive workloads used in large-scale data centers. We design a negative-AND-flash/dynamic random-access memory (DRAM) hybrid memory architecture as a cost-effective memory architecture to resolve the scalability and power consumption problems of a DRAM-based model. A smart prefetching mechanism based on a cluster-management scheme to cope with dynamically varying and complex access patterns of any given application is designed for maximizing the performance of the DRAM. In this article, we propose a new concept for page management, called a cluster, which prefetches data in our hybrid memory architecture. The cluster management is based on a self-learning scheme on dynamically changeable access patterns by considering any correlation between missed pages. Experimental results show that the overall performance is significantly improved in relation to hit rate, execution time, and energy consumption. Namely, our proposed model can enhance the hit rate by 15\% and reduce the execution time by 1.75 times. In addition, we can save energy consumption by around 48\% by cutting the number of flushed pages to about an eighth of that in a conventional system.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Cui:2019:SMB, author = "Xiaotong Cui and Jeff (Jun) Zhang and Kaijie Wu and Siddharth Garg and Ramesh Karri", title = "Split Manufacturing-Based Register Transfer-Level Obfuscation", journal = j-JETC, volume = "15", number = "1", pages = "11:1--11:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289156", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Fabrication-less integrated circuit (IC) design houses outsource fabrication to third-party foundries to reduce cost of manufacturing. The outsourcing of IC fabrication, beyond our expectation, raises concerns regarding intellectual property (IP) piracy and theft by rogue elements in the third-party foundries. Obfuscation techniques have been proposed to increase resistance to reverse engineering, IP recovery, IP theft, and piracy. However, prior work on obfuscation for IP protection has primarily applied to the gate level or the layout level. As a result, it can significantly impact the performance of the original design in addition to requiring redesign of standard cells. In this article, we propose a high-level synthesis and analysis (HLSA)-based obfuscation approach for IP protection. The proposed method is based on split manufacturing. Additional dummy units and MUXes can be added to further obfuscate the design. The proposed technique aligns with the standard-cell-based design methodologies and does not significantly impact the performance of the original design. Our experimental results confirm that the proposed approach can provide high levels of IC obfuscation with moderate area cost.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2019:NNC, author = "Bingzhe Li and Yaobin Qin and Bo Yuan and David J. Lilja", title = "Neural Network Classifiers Using a Hardware-Based Approximate Activation Function with a Hybrid Stochastic Multiplier", journal = j-JETC, volume = "15", number = "1", pages = "12:1--12:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3284933", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Neural networks are becoming prevalent in many areas, such as pattern recognition and medical diagnosis. Stochastic computing is one potential solution for neural networks implemented in low-power back-end devices such as solar-powered devices and Internet of Things (IoT) devices. In this article, we investigate a new architecture of stochastic neural networks with a hardware-oriented approximate activation function. The newly proposed approximate activation function can be hidden in the proposed architecture and thus reduce the whole hardware cost. Additionally, to further reduce the hardware cost of the stochastic implementation, a new hybrid stochastic multiplier is proposed. It contains OR gates and a binary parallel counter, which aims to reduce the number of inputs of the binary parallel counter. The experimental results indicate the newly proposed approximate architecture without hybrid stochastic multipliers achieves more than 25\%, 60\%, and 3x reduction compared to previous stochastic neural networks, and more than 30x, 30x, and 52\% reduction compared to conventional binary neural networks, in terms of area, power, and energy, respectively, while maintaining the similar error rates compared to the conventional neural networks. Furthermore, the stochastic implementation with hybrid stochastic multipliers further reduces area about 18\% to 80\%, power from 15\% to 113.1\%, and energy about 15\% to 131\%, respectively.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zhao:2019:LST, author = "Zhou Zhao and Ashok Srivastava and Lu Peng and Qing Chen", title = "Long Short-Term Memory Network Design for Analog Computing", journal = j-JETC, volume = "15", number = "1", pages = "13:1--13:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289393", ISSN = "1550-4832", bibdate = "Sat Feb 23 06:37:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "We present an analog-integrated circuit implementation of long short-term memory network, which is compatible with digital CMOS technology. We have used multiple-input floating gate MOSFETs as both the front-end to obtain converted analog signals and the differential pairs in proposed analog multipliers. Analog crossbar is built by the analog multiplier processing matrix and bitwise multiplications. We have shown that using current signals as internal transmission signals can largely reduce computation delay, compared to the digital implementation. We also have introduced analog blocks to work as activation functions for the algorithm. In the back-end of our design, we have used current comparators to achieve the output to be readable to external digital systems. We have designed the LSTM network with the matrix size of $ 16 \times 16 $ in TSMC 180nm CMOS technology. The post-layout simulations show that the latency of one computing cycle is 1.19ns without memory, and power dissipation of the single analog LSTM computing core with 2 kilobytes SRAM at 200MHz is 460.3mW. The overhead of power dissipation due to SRAM access is 8.3\%, in which the computing of each LSTM layer requires one computing cycle. The energy efficiency is 0.95TOP/s/W.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Seo:2019:GEIa, author = "Jae-Sun Seo and Yu Cao and Xin Li and Paul Whatmough", title = "{Guest Editors}' Introduction to the Special Section on Hardware and Algorithms for Energy-Constrained On-chip Machine Learning", journal = j-JETC, volume = "15", number = "2", pages = "14:1--14:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322433", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322433", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Wang:2019:TBN, author = "Weijia Wang and Bill Lin", title = "Trained Biased Number Representation for {ReRAM}-Based Neural Network Accelerators", journal = j-JETC, volume = "15", number = "2", pages = "15:1--15:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304107", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304107", abstract = "Recent works have demonstrated the promise of using resistive random access memory (ReRAM) to perform neural network computations in memory. In particular, ReRAM-based crossbar structures can perform matrix-vector multiplication directly in the analog domain, but the resolutions of ReRAM cells and digital/analog converters limit the precisions of inputs and weights that can be directly supported. Although convolutional neural networks (CNNs) can be trained with low-precision weights and activations, previous quantization approaches are either not amenable to ReRAM-based crossbar implementations or have poor accuracies when applied to deep CNNs on complex datasets. In this article, we propose a new CNN training and implementation approach that implements weights using a trained biased number representation, which can achieve near full-precision model accuracy with as little as 2-bit weights and 2-bit activations on the CIFAR datasets. The proposed approach is compatible with a ReRAM-based crossbar implementation. We also propose an activation-side coalescing technique that combines the steps of batch normalization, non-linear activation, and quantization into a single stage that simply performs a clipped-rounding operation. Experiments demonstrate that our approach outperforms previous low-precision number representations for VGG-11, VGG-13, and VGG-19 models on both the CIFAR-10 and CIFAR-100 datasets.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Mondal:2019:SST, author = "Ankit Mondal and Ankur Srivastava", title = "In Situ Stochastic Training of {MTJ} Crossbars With Machine Learning Algorithms", journal = j-JETC, volume = "15", number = "2", pages = "16:1--16:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309880", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309880", abstract = "Owing to high device density, scalability, and non-volatility, magnetic tunnel junction (MTJ)-based crossbars have garnered significant interest for implementing the weights of neural networks (NNs). The existence of only two stable states in MTJs implies a high overhead of obtaining optimal binary weights in software. This article illustrates that the inherent parallelism in the crossbar structure makes it highly appropriate for in situ training, wherein the network is taught directly on the hardware. It leads to significantly smaller training overhead as the training time is independent of the size of the network, while also circumventing the effects of alternate current paths in the crossbar and accounting for manufacturing variations in the device. We show how the stochastic switching characteristics of MTJs can be leveraged to perform probabilistic weight updates using the gradient descent algorithm. We describe how the update operations can be performed on crossbars implementing NNs and restricted Boltzmann machines, and perform simulations on them to demonstrate the effectiveness of our techniques. The results reveal that stochastically trained MTJ-crossbar feed-forward and deep belief nets achieve a classification accuracy nearly the same as that of real-valued weight networks trained in software and exhibit immunity to device variations.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zand:2019:CPI, author = "Ramtin Zand and Kerem Y. Camsari and Supriyo Datta and Ronald F. Demara", title = "Composable Probabilistic Inference Networks Using {MRAM}-based Stochastic Neurons", journal = j-JETC, volume = "15", number = "2", pages = "17:1--17:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304105", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304105", abstract = "Magnetoresistive random access memory (MRAM) technologies with thermally unstable nanomagnets are leveraged to develop an intrinsic stochastic neuron as a building block for restricted Boltzmann machines (RBMs) to form deep belief networks (DBNs). The embedded MRAM-based neuron is modeled using precise physics equations. The simulation results exhibit the desired sigmoidal relation between the input voltages and probability of the output state. A probabilistic inference network simulator (PIN-Sim) is developed to realize a circuit-level model of an RBM utilizing resistive crossbar arrays along with differential amplifiers to implement the positive and negative weight values. The PIN-Sim is composed of five main blocks to train a DBN, evaluate its accuracy, and measure its power consumption. The MNIST dataset is leveraged to investigate the energy and accuracy tradeoffs of seven distinct network topologies in SPICE using the 14nm HP-FinFET technology library with the nominal voltage of 0.8V, in which an MRAM-based neuron is used as the activation function. The software and hardware level simulations indicate that a $ 784 \times 200 \times 10 $ topology can achieve less than 5\% error rates with $ \approx $400pJ energy consumption. The error rates can be reduced to 2.5\% by using a $ 784 \times 500 \times 500 \times 500 \times 10 $ DBN at the cost of $ \approx 10 \times $ higher energy consumption and significant area overhead. Finally, the effects of specific hardware-level parameters on power dissipation and accuracy tradeoffs are identified via the developed PIN-Sim framework.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2019:LCS, author = "Bingzhe Li and M. Hassan Najafi and David J. Lilja", title = "Low-Cost Stochastic Hybrid Multiplier for Quantized Neural Networks", journal = j-JETC, volume = "15", number = "2", pages = "18:1--18:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309882", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309882", abstract = "With increased interests of neural networks, hardware implementations of neural networks have been investigated. Researchers pursue low hardware cost by using different technologies such as stochastic computing (SC) and quantization. More specifically, the quantization is able to reduce total number of trained weights and results in low hardware cost. SC aims to lower hardware costs substantially by using simple gates instead of complex arithmetic operations. However, the advantages of both quantization and SC in neural networks are not well investigated. In this article, we propose a new stochastic multiplier with simple CMOS transistors called the stochastic hybrid multiplier for quantized neural networks. The new design uses the characteristic of quantized weights and tremendously reduces the hardware cost of neural networks. Experimental results indicate that our stochastic design achieves about 7.7x energy reduction compared to its counterpart binary implementation while maintaining slightly higher recognition error rates than the binary implementation. Compared to previous stochastic neural network implementations, our work derives at least 4x, 9x, and 10x reduction in terms of area, power, and energy, respectively.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lou:2019:MSA, author = "Qiuwen Lou and Chenyun Pan and John McGuinness and Andras Horvath and Azad Naeemi and Michael Niemier and X. Sharon Hu", title = "A Mixed Signal Architecture for Convolutional Neural Networks", journal = j-JETC, volume = "15", number = "2", pages = "19:1--19:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304110", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304110", abstract = "Deep neural network (DNN) accelerators with improved energy and delay are desirable for meeting the requirements of hardware targeted for IoT and edge computing systems. Convolutional neural networks (CoNNs) belong to one of the most popular types of DNN architectures. This article presents the design and evaluation of an accelerator for CoNNs. The system-level architecture is based on mixed-signal, cellular neural networks (CeNNs). Specifically, we present (i) the implementation of different layers, including convolution, ReLU, and pooling, in a CoNN using CeNN, (ii) modified CoNN structures with CeNN-friendly layers to reduce computational overheads typically associated with a CoNN, (iii) a mixed-signal CeNN architecture that performs CoNN computations in the analog and mixed signal domain, and (iv) design space exploration that identifies what CeNN-based algorithm and architectural features fare best compared to existing algorithms and architectures when evaluated over common datasets-MNIST and CIFAR-10. Notably, the proposed approach can lead to 8.7$ \times $ improvements in energy-delay product (EDP) per digit classification for the MNIST dataset at iso-accuracy when compared with the state-of-the-art DNN engine, while our approach could offer 4.3$ \times $ improvements in EDP when compared to other network implementations for the CIFAR-10 dataset.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Pilly:2019:HSL, author = "Praveen K. Pilly and Nigel D. Stepp and Yannis Liapis and David W. Payton and Narayan Srinivasa", title = "Hypercolumn Sparsification for Low-Power Convolutional Neural Networks", journal = j-JETC, volume = "15", number = "2", pages = "20:1--20:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304104", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304104", abstract = "We provide here a novel method, called hypercolumn sparsification, to achieve high recognition performance for convolutional neural networks (CNNs) despite low-precision weights and activities during both training and test phases. This method is applicable to any CNN architecture that operates on signal patterns (e.g., audio, image, video) to extract information such as class membership. It operates on the stack of feature maps in each of the cascading feature matching and pooling layers through the processing hierarchy of the CNN by an explicit competitive process ( k -WTA, winner take all) that generates a sparse feature vector at each spatial location. This principle is inspired by local brain circuits, where neurons tuned to respond to different patterns in the incoming signals from an upstream region inhibit each other using interneurons, such that only the ones that are maximally activated survive the quenching threshold. We show this process of sparsification is critical for probabilistic learning of low-precision weights and bias terms, thereby making pattern recognition amenable for energy-efficient hardware implementations. Further, we show that hypercolumn sparsification could lead to more data-efficient learning as well as having an emergent property of significantly pruning down the number of connections in the network. A theoretical account and empirical analysis are provided to understand these effects better.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Imani:2019:HSC, author = "Mohsen Imani and Ricardo Garcia and Saransh Gupta and Tajana Rosing", title = "Hardware-Software Co-design to Accelerate Neural Network Applications", journal = j-JETC, volume = "15", number = "2", pages = "21:1--21:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304086", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304086", abstract = "Many applications, such as machine learning and data sensing, are statistical in nature and can tolerate some level of inaccuracy in their computation. A variety of designs have been put forward exploiting the statistical nature of machine learning through approximate computing. With approximate multipliers being the main focus due to their high usage in machine-learning designs. In this article, we propose a novel approximate floating point multiplier, called CMUL, which significantly reduces energy and improves performance of multiplication while allowing for a controllable amount of error. Our design approximately models multiplication by replacing the most costly step of the operation with a lower energy alternative. To tune the level of approximation, CMUL dynamically identifies the inputs that produces the largest approximation error and processes them in precise mode. To use CMUL for deep neural network (DNN) acceleration, we propose a framework that modifies the trained DNN model to make it suitable for approximate hardware. Our framework adjusts the DNN weights to a set of `` potential weights '' that are suitable for approximate hardware. Then, it compensates the possible quality loss by iteratively retraining the network. Our evaluation with four DNN applications shows that, CMUL can achieve 60.3\% energy efficiency improvement and 3.2$ \times $ energy-delay product (EDP) improvement as compared to the baseline GPU, while ensuring less than 0.2\% quality loss. These results are 38.7\% and 2.0$ \times $ higher than energy efficiency and EDP improvement of the CMUL without using the proposed framework.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Bouvier:2019:SNN, author = "Maxence Bouvier and Alexandre Valentian and Thomas Mesquida and Fran{\c{c}}ois Rummens and Marina Reyboz and Elisa Vianello and Edith Beigne", title = "Spiking Neural Networks Hardware Implementations and Challenges: a Survey", journal = j-JETC, volume = "15", number = "2", pages = "22:1--22:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304103", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304103", abstract = "Neuromorphic computing is henceforth a major research field for both academic and industrial actors. As opposed to Von Neumann machines, brain-inspired processors aim at bringing closer the memory and the computational elements to efficiently evaluate machine learning algorithms. Recently, spiking neural networks, a generation of cognitive algorithms employing computational primitives mimicking neuron and synapse operational principles, have become an important part of deep learning. They are expected to improve the computational performance and efficiency of neural networks, but they are best suited for hardware able to support their temporal dynamics. In this survey, we present the state of the art of hardware implementations of spiking neural networks and the current trends in algorithm elaboration from model selection to training mechanisms. The scope of existing solutions is extensive; we thus present the general framework and study on a case-by-case basis the relevant particularities. We describe the strategies employed to leverage the characteristics of these event-driven algorithms at the hardware level and discuss their related advantages and challenges.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Saeed:2019:ISA, author = "Samah Mohamed Saeed and Nithin Mahendran and Alwin Zulehner and Robert Wille and Ramesh Karri", title = "Identification of Synthesis Approaches for {IP\slash IC} Piracy of Reversible Circuits", journal = j-JETC, volume = "15", number = "3", pages = "23:1--23:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289392", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3289392", abstract = "Reversible circuits employ a computational paradigm that is beneficial for several applications, including the design of encoding and decoding devices, low-power design, and emerging applications in quantum computation. However, similarly to conventional logic, reversible circuits are expected to be subject to Intellectual Property / Integrated Circuit piracy. To counteract such attacks, an understanding of how to identify the target function from a reversible circuit is a crucial first step. In contrast to conventional logic, the target function is (implicitly or explicitly) embedded into the reversible circuit. Numerous synthesis approaches have been proposed for this embedding task. To recover the target function embedded in a reversible circuit, one needs to know what synthesis approach has been used to embed the circuit. We propose a machine-learning-based scheme to determine the used reversible synthesis approach based on the telltale signs it leaves in the synthesized reversible circuit. We study the impact of optimizing the synthesis approaches on the telltale signs that they leave. Our analysis shows that the synthesis approaches can be determined in the vast majority of cases even if optimized versions of the synthesis approaches are used.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Zyarah:2019:NAH, author = "Abdullah M. Zyarah and Dhireesha Kudithipudi", title = "Neuromemrisitive Architecture of {HTM} with On-Device Learning and Neurogenesis", journal = j-JETC, volume = "15", number = "3", pages = "24:1--24:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3300971", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3300971", abstract = "Hierarchical temporal memory (HTM) is a biomimetic sequence memory algorithm that holds promise for invariant representations of spatial and spatio-temporal inputs. This article presents a comprehensive neuromemristive crossbar architecture for the spatial pooler (SP) and the sparse distributed representation classifier, which are fundamental to the algorithm. There are several unique features in the proposed architecture that tightly link with the HTM algorithm. A memristor that is suitable for emulating the HTM synapses is identified and a new Z-window function is proposed. The architecture exploits the concept of synthetic synapses to enable potential synapses in the HTM. The crossbar for the SP avoids dark spots caused by unutilized crossbar regions and supports rapid on-chip training within two clock cycles. This research also leverages plasticity mechanisms such as neurogenesis and homeostatic intrinsic plasticity to strengthen the robustness and performance of the SP. The proposed design is benchmarked for image recognition tasks using Modified National Institute of Standards and Technology (MNIST) and Yale faces datasets, and is evaluated using different metrics including entropy, sparseness, and noise robustness. Detailed power analysis at different stages of the SP operations is performed to demonstrate the suitability for mobile platforms.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2019:MML, author = "Qixiao Liu and Zhifeng Chen and Zhibin Yu", title = "{MiC}: Multi-level Characterization and Optimization of {GPGPU} Kernels", journal = j-JETC, volume = "15", number = "3", pages = "25:1--25:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304108", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304108", abstract = "Graphics processing units (GPUs)$^1$ have enjoyed increasing popularity in recent years, which benefits from, for example, general-purpose GPU (GPGPU) for parallel programs and new computing paradigms, such as the Internet of Things (IoT). GPUs hold great potential in providing effective solutions for big data analytics while the demands for processing large quantities of data in real time are also increasing. However, the pervasive presence of GPUs on mobile devices presents great challenges for GPGPU, mainly because GPGPU integrates a large amount of processor arrays and concurrent executing threads (up to hundreds of thousands). In particular, the root causes of performance loss in a GPGPU program can not be revealed in detail by current approaches. In this article, we propose MiC (Multi-level Characterization), a framework that comprehensively characterizes GPGPU kernels at the instruction, Basic Block (BBL), and thread levels. Specifically, we devise Instruction Vectors (IV) and Basic Blocks Vectors (BBV), a Thread Similarity Matrix (TSM), and a Divergence Flow Statistics Graph (DFSG) to profile information in each level. We use MiC to provide insights into GPGPU kernels through the characterizations of 34 kernels from popular GPGPU benchmark suites such as Compute Unified Device Architecture (CUDA) Software Development Kit (SDK), Rodinia, and Parboil. In comparison with Central Processing Unit (CPU) workloads, we conclude the key findings as follows: (1) There are comparable Instruction-Level Parallelism (ILP); (2) The BBL count is significantly smaller than CPU workloads-only 22.8 on average; (3) The dynamic instruction count per thread varies from dozens to tens of thousands and it is extremely small compared to CPU benchmarks; (4) The Pareto principle (also called 90/10 rule) does not apply to GPGPU kernels while it pervasively exists in CPU programs; (5) The loop patterns are dramatically different from those in CPU workloads; (6) The branch ratio is lower than that of CPU programs but higher than pure GPU workloads. In addition, we have also shown how TSM and DFSG are used to characterize the branch divergence in a visual way, to enable the analysis of thread behavior in GPGPU programs. In addition, we show an optimization case for a GPGPU kernel from the bottleneck identified through its characterization result, which improves 16.8\% performance.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Grimmer:2019:ASD, author = "Andreas Grimmer and Medina Hamidovi{\'c} and Werner Haselmayr and Robert Wille", title = "Advanced Simulation of Droplet Microfluidics", journal = j-JETC, volume = "15", number = "3", pages = "26:1--26:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3313867", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3313867", abstract = "The complexity of droplet microfluidics grows with the implementation of parallel processes and multiple functionalities on a single device. This poses a severe challenge to the engineer designing the corresponding microfluidic networks. In today's design processes, the engineer relies on calculations, assumptions, simplifications, as well as his/her experiences and intuitions. To validate the obtained specification of the microfluidic network, usually a prototype is fabricated and physical experiments are conducted thus far. In case the design does not implement the desired functionality, this prototyping iteration is repeated-obviously resulting in an expensive and time-consuming design process. To avoid unnecessary debugging loops involving fabrication and testing, simulation methods could help to initially validate the specification of the microfluidic network before any prototype is fabricated. However, state-of-the-art simulation tools come with severe limitations, which prevent their utilization for practically relevant applications. More precisely, they are often not dedicated to droplet microfluidics, cannot handle the required physical phenomena, are not publicly available, and can hardly be extended. In this work, we present an advanced simulation approach for droplet microfluidics that addresses these shortcomings and, eventually, allows simulating practically relevant applications. To this end, we propose a simulation framework at the one-dimensional analysis model, which directly works on the specification of the design, supports essential physical phenomena, is publicly available, and is easy to extend. Evaluations and case studies demonstrate the benefits of the proposed simulator: While current state-of-the-art tools were not applicable for practically relevant microfluidic networks, the proposed simulator allows reducing the design time and costs, e.g., of a drug screening device from one person month and USD 1200, respectively, to just a fraction of that.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Liu:2019:EEF, author = "Yu Liu and Sai Sourabh Yenamachintala and Peng Li", title = "Energy-efficient {FPGA} Spiking Neural Accelerators with Supervised and Unsupervised Spike-timing-dependent-Plasticity", journal = j-JETC, volume = "15", number = "3", pages = "27:1--27:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3313866", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3313866", abstract = "The liquid state machine (LSM) is a model of recurrent spiking neural networks (SNNs) and provides an appealing brain-inspired computing paradigm for machine-learning applications. Moreover, operated by processing information directly on spiking events, the LSM is amenable to efficient event-driven hardware implementation. However, training SNNs is, in general, a difficult task as synaptic weights shall be updated based on neural firing activities while achieving a learning objective. In this article, we explore bio-plausible spike-timing-dependent-plasticity (STDP) mechanisms to train liquid state machine models with and without supervision. First, we employ a supervised STDP rule to train the output layer of the LSM while delivering good classification performance. Furthermore, a hardware-friendly unsupervised STDP rule is leveraged to train the recurrent reservoir to further boost the performance. We pursue efficient hardware implementation of FPGA LSM accelerators by performing algorithm-level optimization of the two proposed training rules and exploiting the self-organizing behaviors naturally induced by STDP. Several recurrent spiking neural accelerators are built on a Xilinx Zync ZC-706 platform and trained for speech recognition with the TI46 speech corpus as the benchmark. Adopting the two proposed unsupervised and supervised STDP rules outperforms the recognition accuracy of a competitive non-STDP baseline training algorithm by up to 3.47\%.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Du:2019:SSA, author = "Gaoming Du and Guanyu Liu and Zhenmin Li and Yifan Cao and Duoli Zhang and Yiming Ouyang and Minglun Gao and Zhonghai Lu", title = "{SSS}: Self-aware System-on-chip Using a Static-dynamic Hybrid Method", journal = j-JETC, volume = "15", number = "3", pages = "28:1--28:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3313869", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3313869", abstract = "Network-on-Chip (NoC) has become the de facto communication standard for multi-core or many-core System-on-Chip (SoC) due to its scalability and flexibility. However, an important factor in NoC design is temperature, which affects the overall performance of SoC-decreasing circuit frequency, increasing energy consumption, and even shortening chip lifetime. In this article, we propose SSS, a self-aware SoC using a static-dynamic hybrid method that combines dynamic mapping and static mapping to reduce the hotspot temperature for NoC-based SoCs. First, we propose monitoring and thermal modeling for self-state sensoring. Then, in static mapping stage, we calculate the optimal mapping solutions under different temperature modes using the discrete firefly algorithm to help self-decision making. Finally, in dynamic mapping stage, we achieve dynamic mapping through configuring NoC and SoC sentient units for self-optimizing. Experimental results show that SSS has substantially reduced the peak temperature by up to 37.52\%. The FPGA prototype proves the effectiveness and smartness of SSS in reducing hotspot temperature.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Walter:2019:PRT, author = "Marcel Walter and Robert Wille and Daniel Gro{\ss}e and Frank Sill Torres and Rolf Drechsler", title = "Placement and Routing for Tile-based Field-coupled Nanocomputing Circuits Is {NP}-complete (Research Note)", journal = j-JETC, volume = "15", number = "3", pages = "29:1--29:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3312661", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3312661", abstract = "Field-coupled Nanocomputing (FCN) technologies provide an alternative to conventional CMOS-based computation technologies and are characterized by intriguingly low-energy dissipation. Accordingly, their design received significant attention in the recent past. FCN circuit implementations like Quantum-dot Cellular Automata (QCA) or Nanomagnet Logic (NML) have already been built in labs and basic operations such as inverters, Majority, AND, OR, and so on, are already available. The design problem basically boils down to the question of how to place basic operations and route their connections so that the desired function results while, at the same time, further constraints (related to timing, clocking, path lengths, etc.) are satisfied. While several solutions for this problem have been proposed, interestingly no clear understanding about the complexity of the underlying task exists thus far. In this research note, we consider this problem and eventually prove that placement and routing for tile-based FCN circuits is NP -complete. By this, we provide a theoretical foundation for the further development of corresponding design methods.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2019:LLP, author = "Sumin Li and Kaixin Huang and Linpeng Huang and Jiashun Zhu", title = "{LiwePMS}: a Lightweight Persistent Memory with Wear-aware Memory Management", journal = j-JETC, volume = "15", number = "3", pages = "30:1--30:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3327963", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3327963", abstract = "Next-generation Storage Class Memory (SCM) offers low-latency, high-density, byte-addressable access and persistency. The potent combination of these attractive characteristics makes it possible for SCM to unify the main memory and storage to reduce the storage hierarchy. Aiming for this, several persistent memory systems were designed. However, the heavy metadata and transaction cost degrade the system performance. Moreover, neither of them pays attention to wear-leveling strategy. In this article, we present a lightweight persistent memory system, LiwePMS, which allows a fast access to persistent data stored in SCM with wear-aware memory management. LiwePMS makes performance improvement by simplifying the metadata management and the consistency method. LiwePMS abstracts SCM as heap space with container-based dynamic address mapping. Also, LiwePMS implements efficient wear-aware dynamic memory allocator and lightweight transaction mechanism for data consistency in user-space library. The experiments showed that LiwePMS persists key-value records 1.5$ \times $ faster than Redis RDB mechanism. LiwePMS improves the performance of persistent region operation by more than 45\%, 63\%, and 1.1$ \times $ comparing with HEAPO, Mnemosyne, and NVML, respectively. Also, the wear-leveling policy of memory allocator outperforms that of NVMalloc from 35\% to 30\%, and the transaction method promotes the transaction performance to 1.8$ \times $ compared to NVML.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Seo:2019:GEIb, author = "Jae-Sun Seo and Yu Cao and Xin Li and Paul Whatmough", title = "Guest Editors' Introduction: Hardware and Algorithms for Energy-Constrained On-Chip Machine Learning (Part 2)", journal = j-JETC, volume = "15", number = "4", pages = "31:1--31:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3359336", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3359336", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Schmuck:2019:HOD, author = "Manuel Schmuck and Luca Benini and Abbas Rahimi", title = "Hardware Optimizations of Dense Binary Hyperdimensional Computing: Rematerialization of Hypervectors, Binarized Bundling, and Combinational Associative Memory", journal = j-JETC, volume = "15", number = "4", pages = "32:1--32:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314326", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314326", abstract = "Brain-inspired hyperdimensional (HD) computing models neural activity patterns of the very size of the brain's circuits with points of a hyperdimensional space, that is, with hypervectors. Hypervectors are D -dimensional (pseudo)random vectors with independent and identically distributed (i.i.d.) components constituting ultra-wide holographic words: D =10,000 bits, for instance. At its very core, HD computing manipulates a set of seed hypervectors to build composite hypervectors representing objects of interest. It demands memory optimizations with simple operations for an efficient hardware realization. In this article, we propose hardware techniques for optimizations of HD computing, in a synthesizable open-source VHDL library, to enable co-located implementation of both learning and classification tasks on only a small portion of Xilinx UltraScale FPGAs: (1) We propose simple logical operations to rematerialize the hypervectors on the fly rather than loading them from memory. These operations massively reduce the memory footprint by directly computing the composite hypervectors whose individual seed hypervectors do not need to be stored in memory. (2) Bundling a series of hypervectors over time requires a multibit counter per every hypervector component. We instead propose a binarized back-to-back bundling without requiring any counters. This truly enables on-chip learning with minimal resources as every hypervector component remains binary over the course of training to avoid otherwise multibit components. (3) For every classification event, an associative memory is in charge of finding the closest match between a set of learned hypervectors and a query hypervector by using a distance metric. This operator is proportional to hypervector dimension ( D ), and hence may take O( D ) cycles per classification event. Accordingly, we significantly improve the throughput of classification by proposing associative memories that steadily reduce the latency of classification to the extreme of a single cycle. (4) We perform a design space exploration incorporating the proposed techniques on FPGAs for a wearable biosignal processing application as a case study. Our techniques achieve up to 2.39$ \times $ area saving, or 2,337$ \times $ throughput improvement. The Pareto optimal HD architecture is mapped on only 18,340 configurable logic blocks (CLBs) to learn and classify five hand gestures using four electromyography sensors.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Dinakarrao:2019:ATR, author = "Sai Manoj Pudukotai Dinakarrao and Arun Joseph and Anand Haridass and Muhammad Shafique and J{\"o}rg Henkel and Houman Homayoun", title = "Application and Thermal-reliability-aware Reinforcement Learning Based Multi-core Power Management", journal = j-JETC, volume = "15", number = "4", pages = "33:1--33:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3323055", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3323055", abstract = "Power management through dynamic voltage and frequency scaling (DVFS) is one of the most widely adopted techniques. However, it impacts application reliability (due to soft errors, circuit aging, and deadline misses). However, increased power density impacts the thermal reliability of the chip, sometimes leading to permanent failure. To balance both application- and thermal-reliability along with achieving power savings and maintaining performance, we propose application- and thermal-reliability-aware reinforcement learning-based multi-core power management in this work. The proposed power management scheme employs a reinforcement learner to consider the power savings and variations in the application and thermal reliability caused by DVFS. To overcome the computational overhead, the power management decisions are determined at the application-level rather than per-core or system-level granularity. Experimental evaluation of proposed multi-core power management on a microprocessor with up to 32 cores, running PARSEC applications, was done to demonstrate the applicability and efficiency of the proposed technique. Compared to the existing state-of-the-art techniques, the proposed technique enables an average energy savings of up to $ \approx $20\%, up to 4.926${}^\circ $C temperature reduction without degradation in the application- and thermal-reliability.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Vu:2019:CAP, author = "The H. Vu and Yuichi Okuyama and Abderazek {Ben Abdallah}", title = "Comprehensive Analytic Performance Assessment and {$K$}-means based Multicast Routing Algorithm and Architecture for {$3$D-NoC} of Spiking Neurons", journal = j-JETC, volume = "15", number = "4", pages = "34:1--34:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340963", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340963", abstract = "Spiking neural networks (SNNs) are artificial neural network models that more closely mimic biological neural networks. In addition to neuronal and synaptic state, SNNs incorporate the variant time scale into their computational model. Since each neuron in these networks is connected to thousands of others, high bandwidth is required. Moreover, since the spike times are used to encode information in SNN, very low communication latency is also needed. The 2D-NoC was used as a solution to provide a scalable interconnection fabric in large-scale parallel SNN systems. The 3D-ICs have also attracted a lot of attention as a potential solution to resolve the interconnect bottleneck. The combination of these two emerging technologies provides a new horizon for IC designs to satisfy the high requirements of low power and small footprint in emerging AI applications. In this work, we first present a comprehensive analytical model to analyze the performance of 3D mesh NoC over variants of different SNN topologies and communications protocols. Second, we present an architecture and a low-latency spike routing algorithm, named shortest path K-means based multicast (SP-KMCR), for three-dimensional NoC of spiking neurons (3DNoC-SNN). The proposed system was validated based on an RTL-level implementation, while area/power analysis was performed using 45nm CMOS technology.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tang:2019:MNT, author = "Yibin Tang and Ying Wang and Huawei Li and Xiaowei Li", title = "{MV-Net}: Toward Real-Time Deep Learning on Mobile {GPGPU} Systems", journal = j-JETC, volume = "15", number = "4", pages = "35:1--35:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358696", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358696", abstract = "Recently the development of deep learning has been propelling the sheer growth of vision and speech applications on lightweight embedded and mobile systems. However, the limitation of computation resource and power delivery capability in embedded platforms is recognized as a significant bottleneck that prevents the systems from providing real-time deep learning ability, since the inference of deep convolutional neural networks (CNNs) and recurrent neural networks (RNNs) involves large quantities of weights and operations. Particularly, how to provide quality-of-services (QoS)-guaranteed neural network inference ability in the multitask execution environment of multicore SoCs is even more complicated due to the existence of resource contention. In this article, we present a novel deep neural network architecture, MV-Net, which provides performance elasticity and contention-aware self-scheduling ability for QoS enhancement in mobile computing systems. When the constraints of QoS, output accuracy, and resource contention status of the system change, MV-Net can dynamically reconfigure the corresponding neural network propagation paths and thus achieves an effective tradeoff between neural network computational complexity and prediction accuracy via approximate computing. The experimental results show that (1) MV-Net significantly improves the performance flexibility of current CNN models and makes it possible to provide always-guaranteed QoS in a multitask environment, and (2) it satisfies the quality-of-results (QoR) requirement, outperforming the baseline implementation significantly, and improves the system energy efficiency at the same time.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Shea:2019:HSD, author = "Colin Shea and Tinoosh Mohsenin", title = "Heterogeneous Scheduling of Deep Neural Networks for Low-power Real-time Designs", journal = j-JETC, volume = "15", number = "4", pages = "36:1--36:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358699", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358699", abstract = "Deep neural networks have become the readiest answer to a range of application challenges including image recognition, stock analysis, natural language processing, and biomedical applications such as seizure detection. All while outperforming prior leading solutions that relied heavily on hand-engineered techniques. However, deployment of these neural networks often requires high-computational and memory-intensive solutions. These requirements make it challenging to deploy Deep Neural Networks (DNNs) in embedded, real-time low-power applications where classic architectures, GPUs and CPUs, still impose significant power burden. Systems-on-Chip (SoC) with Field-programmable Gate Arrays (FPGAs) can be used to improve performance and allow more fine-grain control of resources than CPUs or GPUs, but it is difficult to find the optimal balance between hardware and software to improve DNN efficiency. In the current research literature there have been few proposed solutions to address optimizing hardware and software deployments of DNNs in embedded low-power systems. To address the computation resource restriction and low-power needs for deploying these networks, we describe and implement a domain-specific metric model for optimizing task deployment on differing platforms, hardware and software. Next, we propose a DNN hardware accelerator called Scalable Low-power Accelerator for real-time deep neural Networks (SCALENet) that includes multithreaded software workers. Finally, we propose a heterogeneous aware scheduler that uses the DNN-specific metric models and the SCALENet accelerator to allocate a task to a resource based on solving a numerical cost for a series of domain objectives. To demonstrate the applicability of our contribution, we deploy nine modern deep network architectures, each containing a different number of parameters within the context of two different neural network applications: image processing and biomedical seizure detection. Utilizing the metric modeling techniques integrated into the heterogeneous aware scheduler and the SCALENet accelerator, we demonstrate the ability to meet computational requirements, adapt to multiple architectures, and lower power by providing an optimized task to resource allocation. Our heterogeneous aware scheduler improves power saving by decreasing power consumption by 10\% of the total system power, does not affect the accuracy of the networks, and still meets the real-time deadlines. We demonstrate the ability to achieve parity with or exceed the energy efficiency of NVIDIA GPUs when evaluated against Jetson TK1 with embedded GPU SoC and with a $4 \times $ power savings in a power envelope of 2.0W. When compared to existing FPGA-based accelerators, SCALENet's accelerator and heterogeneous aware scheduler achieves a $4 \times $ improvement in energy efficiency.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Blott:2019:QBN, author = "Michaela Blott and Lisa Halder and Miriam Leeser and Linda Doyle", title = "{QuTiBench}: Benchmarking Neural Networks on Heterogeneous Hardware", journal = j-JETC, volume = "15", number = "4", pages = "37:1--37:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358700", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358700", abstract = "Neural Networks have become one of the most successful universal machine-learning algorithms. They play a key role in enabling machine vision and speech recognition and are increasingly adopted in other application domains. Their computational complexity is enormous and comes along with equally challenging memory requirements in regards to capacity and access bandwidth, which limits deployment in particular within energy constrained, embedded environments. To address these implementation challenges, a broad spectrum of new customized and heterogeneous hardware architectures have emerged, often accompanied with co-designed algorithms to extract maximum benefit out of the hardware. Furthermore, numerous optimization techniques are being explored for neural networks to reduce compute and memory requirements while maintaining accuracy. This results in an abundance of algorithmic and architectural choices, some of which fit specific use cases better than others. For system-level designers, there is currently no good way to compare the variety of hardware, algorithm, and optimization options. While there are many benchmarking efforts in this field, they cover only subsections of the embedded design space. None of the existing benchmarks support essential algorithmic optimizations such as quantization, an important technique to stay on chip, or specialized heterogeneous hardware architectures. We propose a novel benchmark suite, QuTiBench, that addresses this need. QuTiBench is a novel multi-tiered benchmarking methodology ( Ti ) that supports algorithmic optimizations such as quantization ( Qu ) and helps system developers understand the benefits and limitations of these novel compute architectures in regard to specific neural networks and will help drive future innovation. We invite the community to contribute to QuTiBench to support the full spectrum of choices in implementing machine-learning systems.", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Howladar:2019:HPH, author = "Pampa Howladar and Pranab Roy and Hafizur Rahaman", title = "A High-performance Homogeneous Droplet Routing Technique for {MEDA}-based Biochips", journal = j-JETC, volume = "15", number = "4", pages = "38:1--38:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3327965", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3327965", abstract = "Recent advancement of microelectrode-dot-array (MEDA)-based architecture for digital microfluidic biochips has enabled a major enhancement in microfluidic operations for traditional lab-on-chip devices. One critical issue for MEDA-based biochips is the transportation of droplets. MEDA allows dynamic routing for droplets of different size. In this article, we propose a high-performance droplet routing technique for MEDA-based digital microfluidic biochips. First, we propose the basic concept of droplet movement strategy in MEDA-based design together with a definition of strictly shielded zones within the layout in MEDA architecture. Next, we propose transportation schemes of droplets for MEDA architecture under different blockage or crossover conditions and estimate route distances for each net in offline. Finally, a priority-based routing strategy combining various transportation schemes stated earlier has been proposed. Concurrent movement of each droplet is scheduled in a time-multiplexed manner. This poses critical challenges for parallel routing of individual droplets with optimal sharing of cells formulating a routing problem with higher complexity. The final compaction solution satisfies the timing constraint and improves fault tolerance. Simulations are carried out on standard benchmark circuits, namely, Benchmark suite I and Benchmark suite III. Experimental results show satisfactory improvements and prove a high degree of robustness for our proposed algorithm.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Li:2019:TBH, author = "Bing Li and Mengjie Mao and Xiaoxiao Liu and Tao Liu and Zihao Liu and Wujie Wen and Yiran Chen and Hai (Helen) Li", title = "Thread Batching for High-performance Energy-efficient {GPU} Memory Design", journal = j-JETC, volume = "15", number = "4", pages = "39:1--39:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3330152", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3330152", abstract = "Massive multi-threading in GPU imposes tremendous pressure on memory subsystems. Due to rapid growth in thread-level parallelism of GPU and slowly improved peak memory bandwidth, memory becomes a bottleneck of GPU's performance and energy efficiency. In this article, we propose an integrated architectural scheme to optimize the memory accesses and therefore boost the performance and energy efficiency of GPU. First, we propose a thread batch enabled memory partitioning (TEMP) to improve GPU memory access parallelism. In particular, TEMP groups multiple thread blocks that share the same set of pages into a thread batch and applies a page coloring mechanism to bound each stream multiprocessor (SM) to the dedicated memory banks. After that, TEMP dispatches the thread batch to an SM to ensure high-parallel memory-access streaming from the different thread blocks. Second, a thread batch-aware scheduling (TBAS) scheme is introduced to improve the GPU memory access locality and to reduce the contention on memory controllers and interconnection networks. Experimental results show that the integration of TEMP and TBAS can achieve up to 10.3\% performance improvement and 11.3\% DRAM energy reduction across diverse GPU applications. We also evaluate the performance interference of the mixed CPU+GPU workloads when they are run on a heterogeneous system that employs our proposed schemes. Our results show that a simple solution can effectively ensure the efficient execution of both GPU and CPU applications.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Karri:2020:E, author = "Ramesh Karri", title = "Editorial", journal = j-JETC, volume = "16", number = "1", pages = "1:1--1:1", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3378024", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378024", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kang:2020:CCL, author = "Wang Kang and Bi Wu and Xing Chen and Daoqian Zhu and Zhaohao Wang and Xichao Zhang and Yan Zhou and Youguang Zhang and Weisheng Zhao", title = "A Comparative Cross-layer Study on Racetrack Memories: Domain Wall vs {Skyrmion}", journal = j-JETC, volume = "16", number = "1", pages = "2:1--2:17", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3333336", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3333336", abstract = "Racetrack memory (RM), a new storage scheme in which information flows along a nanotrack, has been considered as a potential candidate for future high-density storage device instead of hard disk drive (HDD). The first RM technology, which was proposed \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Smith:2020:HDQ, author = "Kaitlin N. Smith and Mitchell A. Thornton", title = "Higher Dimension Quantum Entanglement Generators", journal = j-JETC, volume = "16", number = "1", pages = "3:1--3:21", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3345501", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3345501", abstract = "Quantum information processing and communication techniques rely heavily upon entangled quantum states, and this dependence motivates the development of methods and systems to generate entanglement. Much research has been dedicated to state preparation \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bashir:2020:PSR, author = "Janibul Bashir and Smruti Ranjan Sarangi", title = "Predict, Share, and Recycle Your Way to Low-power Nanophotonic Networks", journal = j-JETC, volume = "16", number = "1", pages = "4:1--4:26", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3356585", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3356585", abstract = "High static power consumption is widely regarded as one of the largest bottlenecks in creating scalable optical NoCs. The standard techniques to reduce static power are based on sharing optical channels and modulating the laser. We show in this article \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sakalis:2020:EPA, author = "Christos Sakalis and Alexandra Jimborean and Stefanos Kaxiras and Magnus Sj{\"a}lander", title = "Evaluating the Potential Applications of Quaternary Logic for Approximate Computing", journal = j-JETC, volume = "16", number = "1", pages = "5:1--5:25", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3359620", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3359620", abstract = "There exist extensive ongoing research efforts on emerging atomic-scale technologies that have the potential to become an alternative to today's complementary metal--oxide--semiconductor technologies. A common feature among the investigated technologies \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Park:2020:LSC, author = "Jungmin Park and Fahim Rahman and Apostol Vassilev and Domenic Forte and Mark Tehranipoor", title = "Leveraging Side-Channel Information for Disassembly and Security", journal = j-JETC, volume = "16", number = "1", pages = "6:1--6:21", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3359621", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3359621", abstract = "With the rise of Internet of Things (IoT), devices such as smartphones, embedded medical devices, smart home appliances, as well as traditional computing platforms such as personal computers and servers have been increasingly targeted with a variety of \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mondal:2020:EED, author = "Ankit Mondal and Ankur Srivastava", title = "Energy-efficient Design of {MTJ}-based Neural Networks with Stochastic Computing", journal = j-JETC, volume = "16", number = "1", pages = "7:1--7:27", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3359622", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3359622", abstract = "Hardware implementations of Artificial Neural Networks (ANNs) using conventional binary arithmetic units are computationally expensive, energy-intensive, and have large area overheads. Stochastic Computing (SC) is an emerging paradigm that replaces \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Tann:2020:REE, author = "Hokchhay Tann and Heng Zhao and Sherief Reda", title = "A Resource-Efficient Embedded Iris Recognition System Using Fully Convolutional Networks", journal = j-JETC, volume = "16", number = "1", pages = "8:1--8:23", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3357796", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3357796", abstract = "Applications of fully convolutional networks (FCN) in iris segmentation have shown promising advances. For mobile and embedded systems, a significant challenge is that the proposed FCN architectures are extremely computationally demanding. In this \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Cui:2020:LSN, author = "Weilong Cui and Georgios Tzimpragos and Yu Tao and Joseph McMahan and Deeksha Dangwal and Nestan Tsiskaridze and George Michelogiannakis and Dilip P. Vasudevan and Timothy Sherwood", title = "Language Support for Navigating Architecture Design in Closed Form", journal = j-JETC, volume = "16", number = "1", pages = "9:1--9:28", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3360047", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3360047", abstract = "As computer architecture continues to expand beyond software-agnostic microarchitecture to specialized and heterogeneous logic or even radically different emerging computing models (e.g., quantum cores, DNA storage units), detailed cycle-level \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Islam:2020:FPM, author = "Mahzabeen Islam and Shashank Adavally and Marko Scrbak and Krishna Kavi", title = "On-the-fly Page Migration and Address Reconciliation for Heterogeneous Memory Systems", journal = j-JETC, volume = "16", number = "1", pages = "10:1--10:27", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3364179", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3364179", abstract = "For efficient placement of data in flat-address heterogeneous memory systems consisting of fast (e.g., 3D-DRAM) and slow memories (e.g., NVM), we present a hardware-based page migration technique. Unlike epoch-based approaches that migrate heavily \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Torres:2020:NZE, author = "Frank Sill Torres and Philipp Niemann and Robert Wille and Rolf Drechsler", title = "Near Zero-Energy Computation Using Quantum-Dot Cellular Automata", journal = j-JETC, volume = "16", number = "1", pages = "11:1--11:16", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365394", ISSN = "1550-4832", bibdate = "Sat Feb 15 07:14:53 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365394", abstract = "Near zero-energy computing describes the concept of executing logic operations below the ( k B T ln 2) energy limit. Landauer discussed that it is impossible to break this limit as long as the computations are performed in the conventional, non-reversible \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Qian:2020:MBC, author = "Fengyu Qian and Yanping Gong and Lei Wang", title = "A Memristor-Based Compressive Sampling Encoder with Dynamic Rate Control for Low-Power Video Streaming", journal = j-JETC, volume = "16", number = "2", pages = "12:1--12:16", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365836", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365836", abstract = "Image sensors are widely used in various applications. With the increasing requirement for high resolutions and frame rates, power consumption has become a critical issue, which limits the use of image sensors in mobile devices and IoT applications. \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Nguyen:2020:CMC, author = "Hoang Anh Du Nguyen and Jintao Yu and Muath Abu Lebdeh and Mottaqiallah Taouil and Said Hamdioui and Francky Catthoor", title = "A Classification of Memory-Centric Computing", journal = j-JETC, volume = "16", number = "2", pages = "13:1--13:26", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365837", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365837", abstract = "Technological and architectural improvements have been constantly required to sustain the demand of faster and cheaper computers. However, CMOS down-scaling is suffering from three technology walls: leakage wall, reliability wall, and cost wall. On top \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Crites:2020:DPM, author = "Brian Crites and Karen Kong and Philip Brisk", title = "Directed Placement for {mVLSI} Devices", journal = j-JETC, volume = "16", number = "2", pages = "14:1--14:26", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369585", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369585", abstract = "Continuous-flow microfluidic devices based on integrated channel networks are becoming increasingly prevalent in research in the biological sciences. At present, these devices are physically laid out by hand by domain experts who understand both the \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{He:2020:SBN, author = "Zhezhi He and Li Yang and Shaahin Angizi and Adnan Siraj Rakin and Deliang Fan", title = "Sparse {BD-Net}: a Multiplication-less {DNN} with Sparse Binarized Depth-wise Separable Convolution", journal = j-JETC, volume = "16", number = "2", pages = "15:1--15:24", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369391", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369391", abstract = "In this work, we propose a multiplication-less binarized depthwise-separable convolution neural network, called BD-Net. BD-Net is designed to use binarized depthwise separable convolution block as the drop-in replacement of conventional spatial-. \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wang:2020:CDF, author = "Lei Wang and Yu Deng and Rui Gong and Wei Shi and Li Luo and Yongwen Wang", title = "{CSMO--DSE}: Fast and Precise Application-driven {DSE} Guided by Criticality and Sensitivity Analysis", journal = j-JETC, volume = "16", number = "2", pages = "16:1--16:22", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371406", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371406", abstract = "Determining the optimal microarchitecture configuration of a processor at the early stages of design is undeniably a challenge. Due to many parameters at the microarchitecture level, finding the proper combination of these parameters to arrive at a \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Guo:2020:PNO, author = "Zimu Guo and Sreeja Chowdhury and Mark M. Tehranipoor and Domenic Forte", title = "Permutation Network De-obfuscation: a Delay-based Attack and Countermeasure Investigation", journal = j-JETC, volume = "16", number = "2", pages = "17:1--17:25", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371407", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371407", abstract = "Permutation-based obfuscation has been proposed to protect hardware against cloning, overproduction, reverse engineering, and unauthorized operation. To prevent key extraction from memory, the key used by the obfuscation is usually stored in volatile \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Taha:2020:AMM, author = "Mohammad M. A. Taha and Christof Teuscher", title = "Approximate Memristive In-Memory {Hamming} Distance Circuit", journal = j-JETC, volume = "16", number = "2", pages = "18:1--18:14", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371391", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371391", abstract = "Hamming Distance (HD) is a popular similarity measure that is used widely in pattern matching applications, DNA sequencing, and binary error-correcting codes. In this article, we extend our previous work to prove that our HD circuit is scalable, \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rasheed:2020:CAP, author = "Farhan Rasheed and Michael Hefenbrock and Rajendra Bishnoi and Michael Beigl and Jasmin Aghassi-Hagmann and Mehdi B. Tahoori", title = "Crossover-aware Placement and Routing for Inkjet Printed Circuits", journal = j-JETC, volume = "16", number = "2", pages = "19:1--19:22", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375461", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375461", abstract = "Printed Electronics technology is a key-enabler for smart sensors, soft robotics, and wearables. The inkjet printed electrolyte-gated field effect transistor (EGFET) technology is a promising candidate for such applications due to its low-power \ldots{}", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2020:RRB, author = "Bing Li and Janardhan Rao Doppa and Partha Pratim Pande and Krishnendu Chakrabarty and Joe X. Qiu and Hai (Helen) Li", title = "{$3$D-ReG}: a {$3$D} {ReRAM}-based Heterogeneous Architecture for Training Deep Neural Networks", journal = j-JETC, volume = "16", number = "2", pages = "20:1--20:24", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375699", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375699", abstract = "Deep neural network (DNN) models are being expanded to a broader range of applications. The computational capability of traditional hardware platforms cannot accommodate the growth of model complexity. Among recent technologies to accelerate DNN, \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ascia:2020:EDR, author = "Giuseppe Ascia and Vincenzo Catania and Salvatore Monteleone and Maurizio Palesi and Davide Patti and John Jose and Valerio Mario Salerno", title = "Exploiting Data Resilience in Wireless Network-on-chip Architectures", journal = j-JETC, volume = "16", number = "2", pages = "21:1--21:27", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3379448", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3379448", abstract = "The emerging wireless Network-on-Chip (WiNoC) architectures are a viable solution for addressing the scalability limitations of manycore architectures in which multi-hop long-range communications strongly impact both the performance and energy figures \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mintz:2020:QLE, author = "Tiffany M. Mintz and Alexander J. McCaskey and Eugene F. Dumitrescu and Shirley V. Moore and Sarah Powers and Pavel Lougovski", title = "{QCOR}: a Language Extension Specification for the Heterogeneous Quantum-Classical Model of Computation", journal = j-JETC, volume = "16", number = "2", pages = "22:1--22:17", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3380964", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380964", abstract = "Quantum computing (QC) is an emerging computational paradigm that leverages the laws of quantum mechanics to perform elementary logic operations. Existing programming models for QC were designed with fault-tolerant hardware in mind, envisioning stand-. \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Beckmann:2020:TSB, author = "Karsten Beckmann and Wilkie Olin-Ammentorp and Gangotree Chakma and Sherif Amer and Garrett S. Rose and Chris {Hobbs and Joseph} Van Nostrand and Martin Rodgers and Nathaniel C. Cady", title = "Towards Synaptic Behavior of Nanoscale {ReRAM} Devices for Neuromorphic Computing Applications", journal = j-JETC, volume = "16", number = "2", pages = "23:1--23:18", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381859", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue May 5 13:48:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381859", abstract = "Resistive Random Access Memory (ReRAM), a form of non-volatile memory, has been proposed as a Flash memory replacement. In addition, novel circuit architectures have been proposed that rely on newly discovered or predicted behavior of ReRAM. One such \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2020:ISI, author = "Helen Li and Wei Zhang and Swarup Bhunia and Wujie Wen", title = "Introduction to the Special Issue on New Trends in Nanoelectronic Device, Circuit, and Architecture Design,{Part 1}", journal = j-JETC, volume = "16", number = "3", pages = "24:1--24:3", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3392080", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3392080", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhang:2020:MPR, author = "Fan Zhang and Miao Hu", title = "Mitigate Parasitic Resistance in Resistive Crossbar-based Convolutional Neural Networks", journal = j-JETC, volume = "16", number = "3", pages = "25:1--25:20", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371277", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371277", abstract = "Traditional computing hardware often encounters on-chip memory bottleneck on large-scale Convolution Neural Networks (CNN) applications. With its unique in-memory computing feature, resistive crossbar-based computing attracts researchers' attention as a \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhang:2020:LOO, author = "Xinyi Zhang and Clay Patterson and Yongpan Liu and Chengmo Yang and Chun Jason Xue and Jingtong Hu", title = "Low Overhead Online Data Flow Tracking for Intermittently Powered Non-Volatile {FPGAs}", journal = j-JETC, volume = "16", number = "3", pages = "26:1--26:20", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371392", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371392", abstract = "Energy harvesting is an attractive way to power future Internet of Things (IoT) devices since it can eliminate the need for battery or power cables. However, harvested energy is intrinsically unstable. While Field-programmable Gate Array (FPGAs) have \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2020:LNL, author = "Sumin Li and Linpeng Huang", title = "{LosPem}: a Novel Log-Structured Framework for Persistent Memory", journal = j-JETC, volume = "16", number = "3", pages = "27:1--27:17", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3379932", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3379932", abstract = "New and emerging types of Persistent Memory (PM) technologies boost the opportunity to improve the performance of storage systems. PM can unify the main memory and secondary storage by incorporating it into legacy computer systems through the memory \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sayyaparaju:2020:DAC, author = "Sagarvarma Sayyaparaju and Md Musabbir Adnan and Sherif Amer and Garrett S. Rose", title = "Device-aware Circuit Design for Robust Memristive Neuromorphic Systems with {STDP}-based Learning", journal = j-JETC, volume = "16", number = "3", pages = "28:1--28:25", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3380969", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380969", abstract = "In the past decade, complementary metal oxide semiconductor-memristor hybrid neuromorphic systems have gained importance owing to the advantages of memristors such as nano-scale size, non-volatility, and low-power operation. However, they are often \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ni:2020:WBE, author = "Jiacheng Ni and Keren Liu and Bi Wu and Weisheng Zhao and Yuanqing Cheng and Xiaolong Zhang and Ying Wang", title = "Write Back Energy Optimization for {STT-MRAM}-based Last-level Cache with Data Pattern Characterization", journal = j-JETC, volume = "16", number = "3", pages = "29:1--29:18", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381860", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381860", abstract = "Traditional memory technologies face severe challenges in meeting the ever-increasing power and memory bandwidth requirements for high-performance computing and big-data analyses. Several emerging memory technologies are promising as the replacements of \ldots{}", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Delgado-Lozano:2020:PDR, author = "I. M. Delgado-Lozano and E. Tena-S{\'a}nchez and J. N{\'u}{\~n}ez and A. J. Acosta", title = "Projection of Dual-Rail {DPA} Countermeasures in Future {FinFET} and Emerging {TFET} Technologies", journal = j-JETC, volume = "16", number = "3", pages = "30:1--30:16", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381857", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381857", abstract = "The design of near future cryptocircuits will require greater performance characteristics in order to be implemented in devices with very limited resources for secure applications. Considering the security against differential power side-channel attacks \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Das:2020:GRM, author = "Subrata Das and Debesh Kumar Das and Soumya Pandit", title = "A Global Routing Method for Graphene Nanoribbons Based Circuits and Interconnects", journal = j-JETC, volume = "16", number = "3", pages = "31:1--31:28", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3384214", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3384214", abstract = "With extreme miniaturization of traditional CMOS devices in deep sub-micron design levels, the delay of a circuit, as well as power dissipation and area are dominated by interconnections between logic blocks. Interconnect today is causing major problems \ldots{}", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Nishio:2020:ESI, author = "Shin Nishio and Yulu Pan and Takahiko Satoh and Hideharu Amano and Rodney {Van Meter}", title = "Extracting Success from {IBM}'s $ 20$-Qubit Machines Using Error-Aware Compilation", journal = j-JETC, volume = "16", number = "3", pages = "32:1--32:25", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386162", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/super.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386162", abstract = "NISQ (Noisy, Intermediate-Scale Quantum) computing requires error mitigation to achieve meaningful computation. Our compilation tool development focuses on the fact that the error rates of individual qubits are not equal, with a goal of maximizing the success probability of real-world subroutines such as an adder circuit. We begin by establishing a metric for choosing among possible paths and circuit alternatives for executing gates between variables placed far apart within the processor, and test our approach on two IBM 20-qubit systems named Tokyo and Poughkeepsie. We find that a single-number metric describing the fidelity of individual gates is a useful but imperfect guide.\par Our compiler uses this subsystem and maps complete circuits onto the machine using a beam search-based heuristic that will scale as processor and program sizes grow. To evaluate the whole compilation process, we compiled and executed adder circuits, then calculated the Kullback Leibler divergence (KL-divergence, a measure of the distance between two probability distributions). For a circuit within the capabilities of the hardware, our compilation increases estimated success probability and reduces KL-divergence relative to an error-oblivious placement.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kim:2020:PSA, author = "J. Hyun Kim and Young Je Moon and Hyunsub Song and Jay H. Park and Sam H. Noh", title = "On Providing {OS} Support to Allow Transparent Use of Traditional Programming Models for Persistent Memory", journal = j-JETC, volume = "16", number = "3", pages = "33:1--33:24", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3388637", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3388637", abstract = "The advent of persistent memory (PM) into our everyday computing environment is now imminent. New programming models and algorithms based on these models are being developed for such systems. However, current models require programs to be rewritten with \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kumar:2020:DAL, author = "S. Dinesh Kumar and Himanshu Thapliyal", title = "Design of Adiabatic Logic-Based Energy-Efficient and Reliable {PUF} for {IoT} Devices", journal = j-JETC, volume = "16", number = "3", pages = "34:1--34:18", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3390771", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sun Jul 19 08:36:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3390771", abstract = "Internet of Things (IoT) devices have stringent constraints on power and energy consumption. Adiabatic logic has been proposed as a novel computing platform to design energy-efficient IoT devices. Physically Unclonable Functions (PUFs) is a promising \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhang:2020:GEA, author = "Wei Zhang and Helen Li and Wujie Wen and Swarup Bhunia", title = "Guest Editorial: {ACM JETC} Special Issue on New Trends in Nanolectronic Device, Circuit, and Architecture Design:{Part 2}", journal = j-JETC, volume = "16", number = "4", pages = "35:1--35:3", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3412343", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3412343", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kim:2020:HSC, author = "Youngseok Kim and Seyoung Kim and Chun-Chen Yeh and Vijay Narayanan and Jungwook Choi", title = "Hardware and Software Co-optimization for the Initialization Failure of the {ReRAM}-based Cross-bar Array", journal = j-JETC, volume = "16", number = "4", pages = "36:1--36:19", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3393669", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3393669", abstract = "Recent advances in deep neural network demand more than millions of parameters to handle and mandate the high-performance computing resources with improved efficiency. The cross-bar array architecture has been considered as one of the promising deep \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wang:2020:HSS, author = "Xueyan Wang and Jienlei Yang and Yinglin Zhao and Xiaotao Jia and Gang Qu and Weisheng Zhao", title = "Hardware Security in Spin-based Computing-in-memory: Analysis, Exploits, and Mitigation Techniques", journal = j-JETC, volume = "16", number = "4", pages = "37:1--37:18", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397513", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3397513", abstract = "Computing-in-memory (CIM) is proposed to alleviate the processor-memory data transfer bottleneck in traditional von Neumann architectures, and spintronics-based magnetic memory has demonstrated many facilitation in implementing CIM paradigm. Since \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Samavatian:2020:RAR, author = "Mohammad Hossein Samavatian and Anys Bacha and Li Zhou and Radu Teodorescu", title = "{RNNFast}: an Accelerator for Recurrent Neural Networks Using Domain-Wall Memory", journal = j-JETC, volume = "16", number = "4", pages = "38:1--38:27", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399670", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3399670", abstract = "Recurrent Neural Networks (RNNs) are an important class of neural networks designed to retain and incorporate context into current decisions. RNNs are particularly well suited for machine learning problems in which context is important, such as speech \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kang:2020:AAS, author = "Ziyang Kang and Lei Wang and Shasha Guo and Rui Gong and Shiming Li and Yu Deng and Weixia Xu", title = "{ASIE}: an Asynchronous {SNN} Inference Engine for {AER} Events Processing", journal = j-JETC, volume = "16", number = "4", pages = "39:1--39:22", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3404992", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3404992", abstract = "Neuromorphic computing based on spiking neural network (SNN) shows good energy-efficiency. However, it is inefficient for SNN to perform the convolution based on frame. It may contain a lot of redundant information in the frame. The output of Dynamic \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Cambou:2020:CAS, author = "Bertrand Cambou and David H{\'e}ly and Sareh Assiri", title = "Cryptography with Analog Scheme Using Memristors", journal = j-JETC, volume = "16", number = "4", pages = "40:1--40:30", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3412439", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3412439", abstract = "Networks of low-power Internet of Things do not have always access to enough computing power to support mainstream cryptographic schemes; such schemes also consume computing power that can be exposed to side channel attacks. This article describes a method, that we call ``cryptography with analog scheme using memristors,'' leveraging the physical properties of memristors, which are active elements suitable for the design of components such as artificial neurons. The proposed devices encrypt messages by segmenting them into blocks of bits, each modulating the injected currents into randomly selected memristor cells, resulting into sets of resistance values turned into cipher texts. Through hash-protected handshakes, identical addresses are independently generated by both communicating devices, to concurrently point at the same set of cells in the arrays, and their images. These block ciphers, for example, 1 KB long, can only be decrypted with the same memristor array driven by analog circuitry or its image, rather than digital key-based schemes. The proposed methods generate cipher text, and decrypt them, with approximately one femto joule per bit, which is below observable level through differential power analysis. The article explains how the use of different cells for each message to encrypt, driven under different conditions, has the potential to mitigate mainstream attacks. It provides a detailed characterization of memristors to evaluate the feasibility of the approach and discusses some hardware and architectures to implement the scheme.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Arka:2020:MCP, author = "Aqeeb Iqbal Arka and Srinivasan Gopal and Janardhan Rao Doppa and Deukhyoun Heo and Partha Pratim Pande", title = "Making a Case for Partially Connected {$3$D} {NoC}: {NFIC} versus {TSV}", journal = j-JETC, volume = "16", number = "4", pages = "41:1--41:17", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394919", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3394919", abstract = "3D Network-on-Chip (3D NoC) enables design of high-performance and energy-efficient manycore computing platforms. Two of the commonly used vertical interconnection technologies are: through silicon via (TSV) and near-field inductive coupling (NFIC). \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mehta:2020:BHE, author = "Dhwani Mehta and Hangwei Lu and Olivia P. Paradis and Mukhil Azhagan M. S. and M. Tanjidur Rahman and Yousef Iskander and Praveen Chawla and Damon L. Woodard and Mark Tehranipoor and Navid Asadizanjani", title = "The Big Hack Explained: Detection and Prevention of {PCB} Supply Chain Implants", journal = j-JETC, volume = "16", number = "4", pages = "42:1--42:25", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3401980", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3401980", abstract = "Over the past two decades, globalized outsourcing in the semiconductor supply chain has lowered manufacturing costs and shortened the time-to-market for original equipment manufacturers (OEMs). However, such outsourcing has rendered the printed circuit \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sayed:2020:ASM, author = "Nour Sayed and Rajendra Bishnoi and Mehdi B. Tahoori", title = "Approximate Spintronic Memories", journal = j-JETC, volume = "16", number = "4", pages = "43:1--43:22", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3404980", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3404980", abstract = "Various applications, such as multimedia, machine learning, and signal processing, have a significant intrinsic error resilience. This makes them preferable for approximate computing as they have the ability to tolerate computations and data errors \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gonzalez-Guerrero:2020:TNM, author = "Patricia Gonzalez-Guerrero and Tommy {Tracy II} and Xinfei Guo and Rahul Sreekumar and Marzieh Lenjani and Kevin Skadron and Mircea R. Stan", title = "Towards on-node Machine Learning for Ultra-low-power Sensors Using Asynchronous {$ \Sigma \Delta $} Streams", journal = j-JETC, volume = "16", number = "4", pages = "44:1--44:20", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3404975", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Oct 9 07:37:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3404975", abstract = "We propose a novel architecture to enable low-power, complex on-node data processing, for the next generation of sensors for the internet of things (IoT), smartdust, or edge intelligence. Our architecture combines near-analog-memory-computing (NAM) and \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ayub:2021:PPE, author = "Muhammad Kamran Ayub and Muhammad Abdullah Hanif and Osman Hasan and Muhammad Shafique", title = "{PEAL}: Probabilistic Error Analysis Methodology for Low-power Approximate Adders", journal = j-JETC, volume = "17", number = "1", pages = "1:1--1:37", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3405430", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3405430", abstract = "Approximate computing has emerged as an efficient design approach for applications with inherent error resilience. Low-power approximate adders (LPAAs), for instance, IMPACT and InXA, are being advocated as building blocks for approximate computing \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Pasandi:2021:DBG, author = "Ghasem Pasandi and Massoud Pedram", title = "Depth-bounded Graph Partitioning Algorithm and Dual Clocking Method for Realization of Superconducting {SFQ} Circuits", journal = j-JETC, volume = "17", number = "1", pages = "2:1--2:22", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3412389", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3412389", abstract = "Superconducting Single Flux Quantum (SFQ) logic with switching delay of 1ps and switching energy of 10$^{-19}$ J is a potential emerging candidate for replacing Complementary Metal Oxide Semiconductor (CMOS) to achieve very high speed and ultra energy \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Alam:2021:NNB, author = "Manaar Alam and Arnab Bag and Debapriya Basu Roy and Dirmanto Jap and Jakub Breier and Shivam Bhasin and Debdeep Mukhopadhyay", title = "Neural Network-based Inherently Fault-tolerant Hardware Cryptographic Primitives without Explicit Redundancy Checks", journal = j-JETC, volume = "17", number = "1", pages = "3:1--3:30", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409594", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3409594", abstract = "Fault injection-based cryptanalysis is one of the most powerful practical threats to modern cryptographic primitives. Popular countermeasures to such fault-based attacks generally use some form of redundant computation to detect and react/correct the \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Biswas:2021:NCI, author = "Arnab Kumar Biswas", title = "Network-on-Chip Intellectual Property Protection Using Circular Path-based Fingerprinting", journal = j-JETC, volume = "17", number = "1", pages = "4:1--4:22", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3410024", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3410024", abstract = "Intellectual property (IP) reuse is a well-known technique in chip design industry. But this technique also exposes a security vulnerability called IP stealing attack. Network-on-Chip (NoC) is an on-chip scalable communication medium and is used as an \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Jha:2021:DED, author = "Nandan Kumar Jha and Sparsh Mittal and Binod Kumar and Govardhan Mattela", title = "{DeepPeep}: Exploiting Design Ramifications to Decipher the Architecture of Compact {DNNs}", journal = j-JETC, volume = "17", number = "1", pages = "5:1--5:25", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3414552", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3414552", abstract = "The remarkable predictive performance of deep neural networks (DNNs) has led to their adoption in service domains of unprecedented scale and scope. However, the widespread adoption and growing commercialization of DNNs have underscored the importance of \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wang:2021:HTA, author = "Xinmu Wang and Tamzidul Hoque and Abhishek Basak and Robert Karam and Wei Hu and Maoyuan Qin and Dejun Mu and Swarup Bhunia", title = "Hardware {Trojan} Attack in Embedded Memory", journal = j-JETC, volume = "17", number = "1", pages = "6:1--6:28", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3422353", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3422353", abstract = "Static Random Access Memory (SRAM) is a core technology for building computing hardware, including cache memory, register files and field programmable gate array devices. Hence, SRAM reliability is essential to guarantee dependable computing. While \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Lu:2021:LNT, author = "Yunfeng Lu and Huaxi Gu and Xiaoshan Yu and Krishnendu Chakrabarty", title = "{Lotus}: a New Topology for Large-scale Distributed Machine Learning", journal = j-JETC, volume = "17", number = "1", pages = "7:1--7:21", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3415749", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3415749", abstract = "Machine learning is at the heart of many services provided by data centers. To improve the performance of machine learning, several parameter (gradient) synchronization methods have been proposed in the literature. These synchronization algorithms have \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bashir:2021:GPE, author = "Janibul Bashir and Smruti R. Sarangi", title = "{GPUOPT}: Power-efficient Photonic Network-on-Chip for a Scalable {GPU}", journal = j-JETC, volume = "17", number = "1", pages = "8:1--8:26", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3416850", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3416850", abstract = "On-chip photonics is a disruptive technology, and such NoCs are superior to traditional electrical NoCs in terms of latency, power, and bandwidth. Hence, researchers have proposed a wide variety of optical networks for multicore processors. The high \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sayed:2021:DBP, author = "Nour Sayed and Longfei Mao and Mehdi B. Tahoori", title = "Dynamic Behavior Predictions for Fast and Efficient Hybrid {STT-MRAM} Caches", journal = j-JETC, volume = "17", number = "1", pages = "9:1--9:21", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423135", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3423135", abstract = "Spin Transfer Torque Magnetic Random Access Memory (STT-MRAM) is a promising candidate as a universal on-chip memory technology due to its non-volatility, high density, and scalability. However, high write energy and latency are its major shortcomings, \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Beirendonck:2021:SCR, author = "Michiel {Van Beirendonck} and Jan-Pieter D'anvers and Angshuman Karmakar and Josep Balasch and Ingrid Verbauwhede", title = "A Side-Channel-Resistant Implementation of {SABER}", journal = j-JETC, volume = "17", number = "2", pages = "10:1--10:26", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429983", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3429983", abstract = "The candidates for the NIST Post-Quantum Cryptography standardization have undergone extensive studies on efficiency and theoretical security, but research on their side-channel security is largely lacking. This remains a considerable obstacle for their \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Alasad:2021:RSH, author = "Qutaiba Alasad and Jie Lin and Jiann-Shuin Yuan and Deliang Fan and Amro Awad", title = "Resilient and Secure Hardware Devices Using {ASL}", journal = j-JETC, volume = "17", number = "2", pages = "11:1--11:26", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429982", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3429982", abstract = "Due to the globalization of Integrated Circuit (IC) design in the semiconductor industry and the outsourcing of chip manufacturing, Third-Party Intellectual Properties (3PIPs) become vulnerable to IP piracy, reverse engineering, counterfeit IC, and \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mukhopadhyay:2021:PES, author = "Anand Kumar Mukhopadhyay and Atul Sharma and Indrajit Chakrabarti and Arindam Basu and Mrigank Sharad", title = "Power-efficient Spike Sorting Scheme Using Analog Spiking Neural Network Classifier", journal = j-JETC, volume = "17", number = "2", pages = "12:1--12:29", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3432814", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3432814", abstract = "The method to map the neural signals to the neuron from which it originates is spike sorting. A low-power spike sorting system is presented for a neural implant device. The spike sorter constitutes a two-step trainer module that is shared by the signal \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chatterjee:2021:PMM, author = "Anwesha Chatterjee and Shouvik Musavvir and Ryan Gary Kim and Janardhan Rao Doppa and Partha Pratim Pande", title = "Power Management of Monolithic {$3$D} Manycore Chips with Inter-tier Process Variations", journal = j-JETC, volume = "17", number = "2", pages = "13:1--13:19", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3430765", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3430765", abstract = "Voltage/frequency island (VFI)-based power management is a popular methodology for designing energy-efficient manycore architectures without incurring significant performance overhead. However, monolithic 3D (M3D) integration has emerged as an enabling \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Alam:2021:VCS, author = "Manaar Alam and Sarani Bhattacharya and Debdeep Mukhopadhyay", title = "Victims Can Be Saviors: a Machine Learning--based Detection for Micro-Architectural Side-Channel Attacks", journal = j-JETC, volume = "17", number = "2", pages = "14:1--14:31", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439189", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3439189", abstract = "Micro-architectural side-channel attacks are major threats to the most mathematically sophisticated encryption algorithms. In spite of the fact that there exist several defense techniques, the overhead of implementing the countermeasures remains a \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wu:2021:DRS, author = "Chia-Cheng Wu and Yi-Hsiang Hu and Chia-Chun Lin and Yung-Chih Chen and Juinn-Dar Huang and Chun-Yao Wang", title = "Diagnosis for Reconfigurable Single-Electron Transistor Arrays with a More Generalized Defect Model", journal = j-JETC, volume = "17", number = "2", pages = "15:1--15:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3444751", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3444751", abstract = "Singe-Electron Transistor (SET) is considered as a promising candidate of low-power devices for replacement or co-existence with Complementary Metal-Oxide-Semiconductor (CMOS) transistors/circuits. In this work, we propose a diagnosis approach for SET \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Adavally:2021:DAP, author = "Shashank Adavally and Mahzabeen Islam and Krishna Kavi", title = "Dynamically Adapting Page Migration Policies Based on Applications' Memory Access Behaviors", journal = j-JETC, volume = "17", number = "2", pages = "16:1--16:24", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3444750", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3444750", abstract = "There have been numerous studies on heterogeneous memory systems comprised of faster DRAM (e.g., 3D stacked HBM or HMC) and slower non-volatile memories (e.g., PCM, STT-RAM). However, most of these studies focused on static policies for managing data \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Tran:2021:CCC, author = "Dat Tran and Christof Teuscher", title = "Computational Capacity of Complex Memcapacitive Networks", journal = j-JETC, volume = "17", number = "2", pages = "17:1--17:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445795", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3445795", abstract = "Emerging memcapacitive nanoscale devices have the potential to perform computations in new ways. In this article, we systematically study, to the best of our knowledge for the first time, the computational capacity of complex memcapacitive networks, \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chen:2021:ISIa, author = "Yiran Chen and Qinru Qiu and Yingyan Lin", title = "Introduction of Special Issue on Hardware and Algorithms for Efficient Machine Learning-Part 1", journal = j-JETC, volume = "17", number = "2", pages = "18:1--18:2", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3449045", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3449045", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Hosseini:2021:BPN, author = "Morteza Hosseini and Tinoosh Mohsenin", title = "Binary Precision Neural Network Manycore Accelerator", journal = j-JETC, volume = "17", number = "2", pages = "19:1--19:27", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423136", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3423136", abstract = "This article presents a low-power, programmable, domain-specific manycore accelerator, Binarized neural Network Manycore Accelerator (BiNMAC), which adopts and efficiently executes binary precision weight/activation neural network models. Such networks \ldots{}", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhang:2021:FLI, author = "Nathan Zhang and Kevin Canini and Sean Silva and Maya Gupta", title = "Fast Linear Interpolation", journal = j-JETC, volume = "17", number = "2", pages = "20:1--20:15", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423184", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3423184", abstract = "We present fast implementations of linear interpolation operators for piecewise linear functions and multi-dimensional look-up tables. These operators are common for efficient transformations in image processing and are the core operations needed for \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Asseman:2021:ADN, author = "Alexis Asseman and Nicolas Antoine and Ahmet S. Ozcan", title = "Accelerating Deep Neuroevolution on Distributed {FPGAs} for Reinforcement Learning Problems", journal = j-JETC, volume = "17", number = "2", pages = "21:1--21:17", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3425500", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3425500", abstract = "Reinforcement learning, augmented by the representational power of deep neural networks, has shown promising results on high-dimensional problems, such as game playing and robotic control. However, the sequential nature of these problems poses a \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Das:2021:CNM, author = "Palash Das and Hemangee K. Kapoor", title = "{CLU}: a Near-Memory Accelerator Exploiting the Parallelism in Convolutional Neural Networks", journal = j-JETC, volume = "17", number = "2", pages = "22:1--22:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3427472", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3427472", abstract = "Convolutional/Deep Neural Networks (CNNs/DNNs) are rapidly growing workloads for the emerging AI-based systems. The gap between the processing speed and the memory-access latency in multi-core systems affects the performance and energy efficiency of the \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Khatwani:2021:FME, author = "Mohit Khatwani and Hasib-Al Rashid and Hirenkumar Paneliya and Mark Horton and Nicholas Waytowich and W. David Hairston and Tinoosh Mohsenin", title = "A Flexible Multichannel {EEG} Artifact Identification Processor using Depthwise-Separable Convolutional Neural Networks", journal = j-JETC, volume = "17", number = "2", pages = "23:1--23:21", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3427471", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3427471", abstract = "This article presents an energy-efficient and flexible multichannel Electroencephalogram (EEG) artifact identification network and its hardware using depthwise and separable convolutional neural networks. EEG signals are recordings of the brain \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Eliahu:2021:MME, author = "Adi Eliahu and Ronny Ronen and Pierre-Emmanuel Gaillardon and Shahar Kvatinsky", title = "{multiPULPly}: a Multiplication Engine for Accelerating Neural Networks on Ultra-low-power Architectures", journal = j-JETC, volume = "17", number = "2", pages = "24:1--24:27", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3432815", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3432815", abstract = "Computationally intensive neural network applications often need to run on resource-limited low-power devices. Numerous hardware accelerators have been developed to speed up the performance of neural network applications and reduce power consumption; however, most focus on data centers and full-fledged systems. Acceleration in ultra-low-power systems has been only partially addressed. In this article, we present multiPULPly, an accelerator that integrates memristive technologies within standard low-power CMOS technology, to accelerate multiplication in neural network inference on ultra-low-power systems. This accelerator was designated for PULP, an open-source microcontroller system that uses low-power RISC-V processors. Memristors were integrated into the accelerator to enable power consumption only when the memory is active, to continue the task with no context-restoring overhead, and to enable highly parallel analog multiplication. To reduce the energy consumption, we propose novel dataflows that handle common multiplication scenarios and are tailored for our architecture. The accelerator was tested on FPGA and achieved a peak energy efficiency of 19.5 TOPS/W, outperforming state-of-the-art accelerators by $ 1.5 \times $ to $ 4.5 \times $.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Biookaghazadeh:2021:TMF, author = "Saman Biookaghazadeh and Pravin Kumar Ravi and Ming Zhao", title = "Toward Multi-{FPGA} Acceleration of the Neural Networks", journal = j-JETC, volume = "17", number = "2", pages = "25:1--25:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3432816", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Apr 30 06:39:29 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3432816", abstract = "High-throughput and low-latency Convolutional Neural Network (CNN) inference is increasingly important for many cloud- and edge-computing applications. FPGA-based acceleration of CNN inference has demonstrated various benefits compared to other high-. \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kulkarni:2021:AAI, author = "Sourabh Kulkarni and Sachin Bhat and Csaba Andras Moritz", title = "Architecting for Artificial Intelligence with Emerging Nanotechnology", journal = j-JETC, volume = "17", number = "3", pages = "26:1--26:33", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445977", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3445977", abstract = "Artificial Intelligence is becoming ubiquitous in products and services that we use daily. Although the domain of AI has seen substantial improvements over recent years, its effectiveness is limited by the capabilities of current computing technology. \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kim:2021:SDB, author = "Heewoo Kim and Aporva Amarnath and Javad Bagherzadeh and Nishil Talati and Ronald G. Dreslinski", title = "A Survey Describing Beyond {Si} Transistors and Exploring Their Implications for Future Processors", journal = j-JETC, volume = "17", number = "3", pages = "27:1--27:44", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453143", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3453143", abstract = "The advancement of Silicon CMOS technology has led information technology innovation for decades. However, scaling transistors down according to Moore's law is almost reaching its limitations. To improve system performance, cost, and energy efficiency, \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Madhavan:2021:TSM, author = "Advait Madhavan and Matthew W. Daniels and Mark D. Stiles", title = "Temporal State Machines: Using Temporal Memory to Stitch Time-based Graph Computations", journal = j-JETC, volume = "17", number = "3", pages = "28:1--28:27", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451214", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3451214", abstract = "Race logic, an arrival-time-coded logic family, has demonstrated energy and performance improvements for applications ranging from dynamic programming to machine learning. However, the various ad hoc mappings of algorithms into hardware rely on researcher \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Forte:2021:ISI, author = "Domenic Forte and Debdeep Mukhopadhyay and Ilia Polian and Yunsi Fei and Rosario Cammarota", title = "Introduction to the Special Issue on Emerging Challenges and Solutions in Hardware Security", journal = j-JETC, volume = "17", number = "3", pages = "29:1--29:4", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464326", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3464326", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sisejkovic:2021:CSL, author = "Dominik Sisejkovic and Farhad Merchant and Lennart M. Reimann and Harshit Srivastava and Ahmed Hallawa and Rainer Leupers", title = "Challenging the Security of Logic Locking Schemes in the Era of Deep Learning: a Neuroevolutionary Approach", journal = j-JETC, volume = "17", number = "3", pages = "30:1--30:26", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3431389", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3431389", abstract = "Logic locking is a prominent technique to protect the integrity of hardware designs throughout the integrated circuit design and fabrication flow. However, in recent years, the security of locking schemes has been thoroughly challenged by the introduction \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhou:2021:AMH, author = "Jun Zhou and Mengquan Li and Pengxing Guo and Weichen Liu", title = "Attack Mitigation of Hardware {Trojans} for Thermal Sensing via Micro-ring Resonator in Optical {NoCs}", journal = j-JETC, volume = "17", number = "3", pages = "31:1--31:23", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3433676", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3433676", abstract = "As an emerging role in new-generation on-chip communication, optical networks-on-chip (ONoCs) provide ultra-high bandwidth, low latency, and low power dissipation for data transfers. However, the thermo-optic effects of the photonic devices have a great \ldots{}", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Tsiokanos:2021:DPD, author = "Ioannis Tsiokanos and Jack Miskelly and Chongyan Gu and Maire O'neill and Georgios Karakonstantis", title = "{DTA-PUF}: Dynamic Timing-aware Physical Unclonable Function for Resource-constrained Devices", journal = j-JETC, volume = "17", number = "3", pages = "32:1--32:24", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434281", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3434281", abstract = "In recent years, physical unclonable functions (PUFs) have gained a lot of attention as mechanisms for hardware-rooted device authentication. While the majority of the previously proposed PUFs derive entropy using dedicated circuitry, software PUFs \ldots{}", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Dhar:2021:HTH, author = "Tapobrata Dhar and Surajit Kumar Roy and Chandan Giri", title = "Hardware {Trojan} Horse Detection through Improved Switching of Dormant Nets", journal = j-JETC, volume = "17", number = "3", pages = "33:1--33:22", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439951", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3439951", abstract = "Covert Hardware Trojan Horses (HTH) introduced by malicious attackers during the fabless manufacturing process of integrated circuits (IC) have the potential to cause malignant functions within the circuit. This article employs a Design-for-Security \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{A:2021:IQF, author = "Asha K. A. and Li En Hsu and Abhishek Patyal and Hung-Ming Chen", title = "Improving the Quality of {FPGA} {RO-PUF} by Principal Component Analysis {(PCA)}", journal = j-JETC, volume = "17", number = "3", pages = "34:1--34:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442444", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3442444", abstract = "Ring Oscillator Physical Unclonable Functions (RO-PUFs) exploit the inherent manufacturing process variations, such as systematic and stochastic variations, to generate secret PUF responses that are unique to the device. Stochastic variations are random, \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Camacho-Ruiz:2021:TOH, author = "Eros Camacho-Ruiz and Santiago S{\'a}nchez-Solano and Piedad Brox and Macarena C. Mart{\'\i}nez-Rodr{\'\i}guez", title = "Timing-Optimized Hardware Implementation to Accelerate Polynomial Multiplication in the {NTRU} Algorithm", journal = j-JETC, volume = "17", number = "3", pages = "35:1--35:16", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445979", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3445979", abstract = "Post-quantum cryptographic algorithms have emerged to secure communication channels between electronic devices faced with the advent of quantum computers. The performance of post-quantum cryptographic algorithms on embedded systems has to be evaluated to \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Hazari:2021:MLV, author = "Noor Ahmad Hazari and Ahmed Oun and Mohammed Niamat", title = "Machine Learning Vulnerability Analysis of {FPGA}-based Ring Oscillator {PUFs} and Counter Measures", journal = j-JETC, volume = "17", number = "3", pages = "36:1--36:20", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445978", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3445978", abstract = "Physical Unclonable Functions (PUFs) exploit the manufacturing process variations inherent in silicon-based chips to generate unique secret keys. Although PUFs are supposed to be unclonable or unbreakable, researchers have found that they are vulnerable \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Liu:2021:RAR, author = "Yuntao Liu and Michael Zuzak and Yang Xie and Abhishek Chakraborty and Ankur Srivastava", title = "Robust and Attack Resilient Logic Locking with a High Application-Level Impact", journal = j-JETC, volume = "17", number = "3", pages = "37:1--37:22", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446215", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3446215", abstract = "Logic locking is a hardware security technique aimed at protecting intellectual property against security threats in the IC supply chain, especially those posed by untrusted fabrication facilities. Such techniques incorporate additional locking circuitry \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Laurent:2021:BGB, author = "J. Laurent and C. Deleuze and F. Pebay-Peyroula and V. Beroulle", title = "Bridging the Gap between {RTL} and Software Fault Injection", journal = j-JETC, volume = "17", number = "3", pages = "38:1--38:24", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446214", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3446214", abstract = "Protecting programs against hardware fault injection requires accurate software fault models. However, typical models, such as the instruction skip, do not take into account the microarchitecture specificities of a processor. We propose in this article an \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rahman:2021:CGO, author = "M. Tanjidur Rahman and Nusrat Farzana Dipu and Dhwani Mehta and Shahin Tajik and Mark Tehranipoor and Navid Asadizanjani", title = "{CONCEALING-Gate}: Optical Contactless Probing Resilient Design", journal = j-JETC, volume = "17", number = "3", pages = "39:1--39:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446998", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3446998", abstract = "Optical probing, though developed as silicon debugging tools from the chip backside, has shown its capability of extracting secret data, such as cryptographic keys and user identifications, from modern system-on-chip devices. Existing optical probing \ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rioja:2021:USC, author = "Unai Rioja and Servio Paguada and Lejla Batina and Igor Armendariz", title = "The Uncertainty of Side-channel Analysis: a Way to Leverage from Heuristics", journal = j-JETC, volume = "17", number = "3", pages = "40:1--40:27", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446997", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3446997", abstract = "Performing a comprehensive side-channel analysis evaluation of small embedded devices is a process known for its variability and complexity. In real-world experimental setups, the results are largely influenced by a huge amount of parameters, some of \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Robissout:2021:IDL, author = "Damien Robissout and Lilian Bossuet and Amaury Habrard and Vincent Grosso", title = "Improving Deep Learning Networks for Profiled Side-channel Analysis Using Performance Improvement Techniques", journal = j-JETC, volume = "17", number = "3", pages = "41:1--41:30", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453162", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3453162", abstract = "The use of deep learning techniques to perform side-channel analysis attracted the attention of many researchers as they obtained good performances with them. Unfortunately, the understanding of the neural networks used to perform side-channel attacks is \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Biernacki:2021:SDS, author = "Lauren Biernacki and Mark Gallagher and Zhixing Xu and Misiker Tadesse Aga and Austin Harris and Shijia Wei and Mohit Tiwari and Baris Kasikci and Sharad Malik and Todd Austin", title = "Software-driven Security Attacks: From Vulnerability Sources to Durable Hardware Defenses", journal = j-JETC, volume = "17", number = "3", pages = "42:1--42:38", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3456299", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3456299", abstract = "There is an increasing body of work in the area of hardware defenses for software-driven security attacks. A significant challenge in developing these defenses is that the space of security vulnerabilities and exploits is large and not fully understood. \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mexis:2021:LAH, author = "Nico Mexis and Nikolaos Athanasios Anagnostopoulos and Shuai Chen and Jan Bambach and Tolga Arul and Stefan Katzenbeisser", title = "A Lightweight Architecture for Hardware-Based Security in the Emerging Era of Systems of Systems", journal = j-JETC, volume = "17", number = "3", pages = "43:1--43:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3458824", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3458824", abstract = "In recent years, a new generation of the Internet of Things (IoT 2.0) is emerging, based on artificial intelligence, the blockchain technology, machine learning, and the constant consolidation of pre-existing systems and subsystems into larger systems. In \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Paul:2021:SSI, author = "Shubhra Deb Paul and Swarup Bhunia", title = "{SILVerIn}: Systematic Integrity Verification of Printed Circuit Board Using {JTAG} Infrastructure", journal = j-JETC, volume = "17", number = "3", pages = "44:1--44:28", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3460232", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Aug 14 05:29:37 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3460232", abstract = "A printed circuit board (PCB) provides necessary mechanical support to an electronic system and acts as a platform for connecting electronic components. Counterfeiting and in-field tampering of PCBs have become significant security concerns in the \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chen:2021:ISIb, author = "Yiran Chen and Qinru Qiu and Yingyan Lin", title = "Introduction to the Special Issue on Hardware and Algorithms for Efficient Machine Learning --- Part 2", journal = j-JETC, volume = "17", number = "4", pages = "45:1--45:2", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464917", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3464917", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Thakker:2021:CRK, author = "Urmish Thakker and Igor Fedorov and Chu Zhou and Dibakar Gope and Matthew Mattina and Ganesh Dasika and Jesse Beu", title = "Compressing {RNNs} to Kilobyte Budget for {IoT} Devices Using {Kronecker} Products", journal = j-JETC, volume = "17", number = "4", pages = "46:1--46:18", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440016", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3440016", abstract = "Micro-controllers (MCUs) make up most of the processors in the world with widespread applicability from automobile to medical devices. The Internet of Things promises to enable these resource-constrained MCUs with machine learning algorithms to provide \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gan:2021:CED, author = "Victor M. Gan and Yibin Liang and Lianjun Li and Lingjia Liu and Yang Yi", title = "A Cost-Efficient Digital {ESN} Architecture on {FPGA} for {OFDM} Symbol Detection", journal = j-JETC, volume = "17", number = "4", pages = "47:1--47:15", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440017", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3440017", abstract = "The echo state network (ESN) is a recently developed machine-learning paradigm whose processing capabilities rely on the dynamical behavior of recurrent neural networks. Its performance outperforms traditional recurrent neural networks in nonlinear system \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Guo:2021:RMS, author = "Chuliang Guo and Li Zhang and Xian Zhou and Grace Li Zhang and Bing Li and Weikang Qian and Xunzhao Yin and Cheng Zhuo", title = "A Reconfigurable Multiplier for Signed Multiplications with Asymmetric Bit-Widths", journal = j-JETC, volume = "17", number = "4", pages = "48:1--48:16", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446213", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3446213", abstract = "Multiplications have been commonly conducted in quantized CNNs, filters, and reconfigurable cores, and so on, which are widely deployed in mobile and embedded applications. Most multipliers are designed to perform multiplications with symmetric bit-widths,. \ldots{}", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Dang:2021:BCP, author = "Dharanidhar Dang and Sai Vineel Reddy Chittamuru and Sudeep Pasricha and Rabi Mahapatra and Debashis Sahoo", title = "{BPLight}-{CNN}: a Photonics-Based Backpropagation Accelerator for Deep Learning", journal = j-JETC, volume = "17", number = "4", pages = "49:1--49:26", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446212", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3446212", abstract = "Training deep learning networks involves continuous weight updates across the various layers of the deep network while using a backpropagation (BP) algorithm. This results in expensive computation overheads during training. Consequently, most deep \ldots{}", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wang:2021:GBA, author = "He Wang and Nicoleta Cucu Laurenciu and Yande Jiang and Sorin Cotofana", title = "Graphene-Based Artificial Synapses with Tunable Plasticity", journal = j-JETC, volume = "17", number = "4", pages = "50:1--50:21", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447778", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3447778", abstract = "Design and implementation of artificial neuromorphic systems able to provide brain akin computation and/or bio-compatible interfacing ability are crucial for understanding the human brain's complex functionality and unleashing brain-inspired computation's \ldots{}", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Yang:2021:DRA, author = "Qing Yang and Jiachen Mao and Zuoguan Wang and ``Helen'' Li Hai", title = "Dynamic Regularization on Activation Sparsity for Neural Network Efficiency Improvement", journal = j-JETC, volume = "17", number = "4", pages = "51:1--51:16", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447776", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3447776", abstract = "When deploying deep neural networks in embedded systems, it is crucial to decrease the model size and computational complexity for improving the execution speed and efficiency. In addition to conventional compression techniques, e.g., weight pruning and \ldots{}", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Dey:2021:SCB, author = "Sumon Dey and Lee Baker and Joshua Schabel and Weifu Li and Paul D. Franzon", title = "A Scalable Cluster-based Hierarchical Hardware Accelerator for a Cortically Inspired Algorithm", journal = j-JETC, volume = "17", number = "4", pages = "52:1--52:29", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447777", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3447777", abstract = "This article describes a scalable, configurable and cluster-based hierarchical hardware accelerator through custom hardware architecture for Sparsey, a cortical learning algorithm. Sparsey is inspired by the operation of the human cortex and uses a Sparse \ldots{}", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Meyer:2021:IBH, author = "Bruno Henrique Meyer and Aurora Trinidad Ramirez Pozo and Wagner M. Nunan Zola", title = "Improving {Barnes--Hut} {$t$-SNE} Algorithm in Modern {GPU} Architectures with Random Forest {KNN} and Simulated Wide-Warp", journal = j-JETC, volume = "17", number = "4", pages = "53:1--53:26", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447779", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3447779", abstract = "The $t$-Distributed Stochastic Neighbor Embedding (t-SNE) is a widely used technique for dimensionality reduction but is limited by its scalability when applied to large datasets. Recently, BH-tSNE was proposed; this is a successful approximation that transforms a step of the original algorithm into an N-Body simulation problem that can be solved by a modified Barnes-Hut algorithm. However, this improvement still has limitations to process large data volumes (millions of records). Late studies, such as $t$-SNE-CUDA, have used GPUs to implement highly parallel BH-tSNE. In this research we have developed a new GPU BH-tSNE implementation that produces the embedding of multidimensional data points into three-dimensional space. We examine scalability issues in two of the most expensive steps of GPU BH-tSNE by using efficient memory access strategies, recent acceleration techniques, and a new approach to compute the KNN graph structure used in BH-tSNE with GPU. Our design allows up to 460\% faster execution when compared to the $t$-SNE-CUDA implementation. Although our SIMD acceleration techniques were used in a modern GPU setup, we have also verified a potential for applications in the context of multi-core processors.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chen:2021:QDN, author = "Wentao Chen and Hailong Qiu and Jian Zhuang and Chutong Zhang and Yu Hu and Qing Lu and Tianchen Wang and Yiyu Shi and Meiping Huang and Xiaowe Xu", title = "Quantization of Deep Neural Networks for Accurate Edge Computing", journal = j-JETC, volume = "17", number = "4", pages = "54:1--54:11", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451211", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3451211", abstract = "Deep neural networks have demonstrated their great potential in recent years, exceeding the performance of human experts in a wide range of applications. Due to their large sizes, however, compression techniques such as weight quantization and pruning are \ldots{}", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wang:2021:QWY, author = "Serena Wang and Maya Gupta and Seungil You", title = "Quit When You Can: Efficient Evaluation of Ensembles by Optimized Ordering", journal = j-JETC, volume = "17", number = "4", pages = "55:1--55:20", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451209", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3451209", abstract = "Given a classifier ensemble and a dataset, many examples may be confidently and accurately classified after only a subset of the base models in the ensemble is evaluated. Dynamically deciding to classify early can reduce both mean latency and CPU without \ldots{}", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Adnan:2021:DRM, author = "Md Musabbir Adnan and Sagarvarma Sayyaparaju and Samuel D. Brown and Mst Shamim Ara Shawkat and Catherine D. Schuman and Garrett S. Rose", title = "Design of a Robust Memristive Spiking Neuromorphic System with Unsupervised Learning in Hardware", journal = j-JETC, volume = "17", number = "4", pages = "56:1--56:26", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451210", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3451210", abstract = "Spiking neural networks (SNN) offer a power efficient, biologically plausible learning paradigm by encoding information into spikes. The discovery of the memristor has accelerated the progress of spiking neuromorphic systems, as the intrinsic plasticity \ldots{}", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Masadeh:2021:QAA, author = "Mahmoud Masadeh and Yassmeen Elderhalli and Osman Hasan and Sofiene Tahar", title = "A Quality-assured Approximate Hardware Accelerators-based on Machine Learning and Dynamic Partial Reconfiguration", journal = j-JETC, volume = "17", number = "4", pages = "57:1--57:19", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462329", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3462329", abstract = "Machine learning is widely used these days to extract meaningful information out of the Zettabytes of sensors data collected daily. All applications require analyzing and understanding the data to identify trends, e.g., surveillance, exhibit some error \ldots{}", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Xu:2021:MCC, author = "Xiaowe Xu and Jiawei Zhang and Jinglan Liu and Yukun Ding and Tianchen Wang and Hailong Qiu and Haiyun Yuan and Jian Zhuang and Wen Xie and Yuhao Dong and Qianjun Jia and Meiping Huang and Yiyu Shi", title = "Multi-Cycle-Consistent Adversarial Networks for Edge Denoising of Computed Tomography Images", journal = j-JETC, volume = "17", number = "4", pages = "58:1--58:16", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462328", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3462328", abstract = "As one of the most commonly ordered imaging tests, the computed tomography (CT) scan comes with inevitable radiation exposure that increases cancer risk to patients. However, CT image quality is directly related to radiation dose, and thus it is desirable \ldots{}", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gong:2021:SES, author = "Shijun Gong and Jiajun Li and Wenyan Lu and Guihai Yan and Xiaowei Li", title = "{ShuntFlowPlus}: an Efficient and Scalable Dataflow Accelerator Architecture for Stream Applications", journal = j-JETC, volume = "17", number = "4", pages = "59:1--59:24", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453164", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3453164", abstract = "Streaming processing is an important and growing class of applications for analyzing continuous streams in real time. In such applications, sliding-window aggregation (SWAG) is a widely used approach, and general-purpose processors cannot efficiently \ldots{}", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Xie:2021:AIB, author = "Wen Xie and Zeyang Yao and Erchao Ji and Hailong Qiu and Zewen Chen and Huiming Guo and Jian Zhuang and Qianjun Jia and Meiping Huang", title = "Artificial Intelligence-based Computed Tomography Processing Framework for Surgical Telementoring of Congenital Heart Disease", journal = j-JETC, volume = "17", number = "4", pages = "60:1--60:24", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457613", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3457613", abstract = "Congenital heart disease (CHD) is the most common birth defect, accounting for one-third of all congenital birth defects. As with complicated intracardiac structural abnormalities, CHD is usually treated with surgical repair, and computed tomography (CT) \ldots{}", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sunny:2021:SSP, author = "Febin P. Sunny and Ebadollah Taheri and Mahdi Nikdast and Sudeep Pasricha", title = "A Survey on Silicon Photonics for Deep Learning", journal = j-JETC, volume = "17", number = "4", pages = "61:1--61:57", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3459009", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3459009", abstract = "Deep learning has led to unprecedented successes in solving some very difficult problems in domains such as computer vision, natural language processing, and general pattern recognition. These achievements are the culmination of decades-long research into \ldots{}", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Botero:2021:HTA, author = "Ulbert J. Botero and Ronald Wilson and Hangwei Lu and Mir Tanjidur Rahman and Mukhil A. Mallaiyan and Fatemeh Ganji and Navid Asadizanjani and Mark M. Tehranipoor and Damon L. Woodard and Domenic Forte", title = "Hardware Trust and Assurance through Reverse Engineering: a Tutorial and Outlook from Image Analysis and Machine Learning Perspectives", journal = j-JETC, volume = "17", number = "4", pages = "62:1--62:53", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464959", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3464959", abstract = "In the context of hardware trust and assurance, reverse engineering has been often considered as an illegal action. Generally speaking, reverse engineering aims to retrieve information from a product, i.e., integrated circuits (ICs) and printed circuit \ldots{}", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Song:2021:DRM, author = "Shihao Song and Jui Hanamshet and Adarsha Balaji and Anup Das and Jeffrey L. Krichmar and Nikil D. Dutt and Nagarajan Kandasamy and Francky Catthoor", title = "Dynamic Reliability Management in Neuromorphic Computing", journal = j-JETC, volume = "17", number = "4", pages = "63:1--63:27", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462330", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3462330", abstract = "Neuromorphic computing systems execute machine learning tasks designed with spiking neural networks. These systems are embracing non-volatile memory to implement high-density and low-energy synaptic storage. Elevated voltages and currents needed to \ldots{}", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mohanty:2022:ISI, author = "Saraju P. Mohanty and Jim Plusquellic and Garrett S. Rose and Wei Zhang and Maria K. Michael", title = "Introduction to the Special Issue on Hardware-Assisted Security for Emerging {Internet of Things}", journal = j-JETC, volume = "18", number = "1", pages = "1:1--1:3", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3475952", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3475952", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Roessler:2022:SEL, author = "Nick Roessler and Andr{\'e} DeHon", title = "{SCALPEL}: Exploring the Limits of Tag-enforced Compartmentalization", journal = j-JETC, volume = "18", number = "1", pages = "2:1--2:28", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461673", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3461673", abstract = "We present Secure Compartments Automatically Learned and Protected by Execution using Lightweight metadata (SCALPEL), a tool for automatically deriving compartmentalization policies and lowering them to a tagged architecture for hardware-accelerated \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Falas:2022:MEE, author = "Solon Falas and Charalambos Konstantinou and Maria K. Michael", title = "A Modular End-to-End Framework for Secure Firmware Updates on Embedded Systems", journal = j-JETC, volume = "18", number = "1", pages = "3:1--3:19", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3460234", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3460234", abstract = "Firmware refers to device read-only resident code which includes microcode and macro-instruction-level routines. For Internet-of-Things (IoT) devices without an operating system, firmware includes all the necessary instructions on how such embedded \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Danial:2022:EXD, author = "Josef Danial and Debayan Das and Anupam Golder and Santosh Ghosh and Arijit Raychowdhury and Shreyas Sen", title = "{EM-X-DL}: Efficient Cross-device Deep Learning Side-channel Attack With Noisy {EM} Signatures", journal = j-JETC, volume = "18", number = "1", pages = "4:1--4:17", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465380", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3465380", abstract = "This work presents a Cross-device Deep-Learning based Electromagnetic (EM-X-DL) side-channel analysis (SCA) on AES-128, in the presence of a significantly lower signal-to-noise ratio (SNR) compared to previous works. Using a novel algorithm to \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Thompson:2022:IPS, author = "David Thompson and Haibo Wang", title = "Integrated Power Signature Generation Circuit for {IoT} Abnormality Detection", journal = j-JETC, volume = "18", number = "1", pages = "5:1--5:13", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3460476", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3460476", abstract = "This work presents a methodology to monitor the power signature of IoT devices for detecting operation abnormality. It does not require bulky measurement equipment thanks to the proposed power signature generation circuit which can be integrated into LDO \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Elkanishy:2022:LOH, author = "Abdelrahman Elkanishy and Paul M. Furth and Derrick T. Rivera and Abdel-Hameed A. Badawy", title = "Low-overhead Hardware Supervision for Securing an {IoT} {Bluetooth}-enabled Device: Monitoring Radio Frequency and Supply Voltage", journal = j-JETC, volume = "18", number = "1", pages = "6:1--6:28", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3468064", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3468064", abstract = "Over the past decade, the number of Internet of Things (IoT) devices increased tremendously. In particular, the Internet of Medical Things (IoMT) and the Industrial Internet of Things (IIoT) expanded dramatically. Resource restrictions on IoT devices and \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Fragkos:2022:EPP, author = "Georgios Fragkos and Cyrus Minwalla and Eirini Eleni Tsiropoulou and Jim Plusquellic", title = "Enhancing Privacy in {PUF-Cash} through Multiple Trusted Third Parties and Reinforcement Learning", journal = j-JETC, volume = "18", number = "1", pages = "7:1--7:26", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3441139", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3441139", abstract = "Electronic cash (e-Cash) is a digital alternative to physical currency such as coins and bank notes. Suitably constructed, e-Cash has the ability to offer an anonymous offline experience much akin to cash, and in direct contrast to traditional forms of \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Labrado:2022:FVS, author = "Carson Labrado and Himanshu Thapliyal and Saraju P. Mohanty", title = "Fortifying Vehicular Security through Low Overhead Physically Unclonable Functions", journal = j-JETC, volume = "18", number = "1", pages = "8:1--8:18", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3442443", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3442443", abstract = "Within vehicles, the Controller Area Network (CAN) allows efficient communication between the electronic control units (ECUs) responsible for controlling the various subsystems. The CAN protocol was not designed to include much support for secure \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mahalat:2022:PBS, author = "Mahabub Hasan Mahalat and Dipankar Karmakar and Anindan Mondal and Bibhash Sen", title = "{PUF} based Secure and Lightweight Authentication and Key-Sharing Scheme for Wireless Sensor Network", journal = j-JETC, volume = "18", number = "1", pages = "9:1--9:23", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3466682", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3466682", abstract = "The deployment of wireless sensor networks (WSN) in an untended environment and the openness of the wireless channel bring various security threats to WSN. The resource limitations of the sensor nodes make the conventional security systems less attractive \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wille:2022:ISI, author = "Robert Wille and Rolf Drechsler", title = "Introduction to the Special Issue on Design Automation for Quantum Computing", journal = j-JETC, volume = "18", number = "1", pages = "10:1--10:2", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485041", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3485041", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Simoni:2022:TCM, author = "Mario Simoni and Giovanni Amedeo Cirillo and Giovanna Turvani and Mariagrazia Graziano and Maurizio Zamboni", title = "Towards Compact Modeling of Noisy Quantum Computers: a Molecular-Spin-Qubit Case of Study", journal = j-JETC, volume = "18", number = "1", pages = "11:1--11:26", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474223", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3474223", abstract = "Classical simulation of Noisy Intermediate Scale Quantum computers is a crucial task for testing the expected performance of real hardware. The standard approach, based on solving Schr{\"o}dinger and Lindblad equations, is demanding when scaling the number of \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{ChagasRibeiroDaRosa:2022:KQP, author = "Evandro {Chagas Ribeiro Da Rosa} and Rafael {De Santiago}", title = "Ket Quantum Programming", journal = j-JETC, volume = "18", number = "1", pages = "12:1--12:25", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474224", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3474224", abstract = "Quantum programming languages (QPL) fill the gap between quantum mechanics and classical programming constructions, simplifying the development of quantum applications. However, most QPL addresses the inherent quantum programming problem, neglecting \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Khammassi:2022:OPQ, author = "N. Khammassi and I. Ashraf and J. V. Someren and R. Nane and A. M. Krol and M. A. Rol and L. Lao and K. Bertels and C. G. Almudever", title = "\pkg{OpenQL}: a Portable Quantum Programming Framework for Quantum Accelerators", journal = j-JETC, volume = "18", number = "1", pages = "13:1--13:24", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474222", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3474222", abstract = "With the potential of quantum algorithms to solve intractable classical problems, quantum computing is rapidly evolving, and more algorithms are being developed and optimized. Expressing these quantum algorithms using a high-level language and making them \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Acharya:2022:TPO, author = "Nikita Acharya and Miroslav Urbanek and Wibe A. {De Jong} and Samah Mohamed Saeed", title = "Test Points for Online Monitoring of Quantum Circuits", journal = j-JETC, volume = "18", number = "1", pages = "14:1--14:19", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477928", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3477928", abstract = "Noisy Intermediate-Scale Quantum (NISQ) computers consisting of tens of inherently noisy quantum bits (qubits) suffer from reliability problems. Qubits and their gates are susceptible to various types of errors. Due to limited numbers of qubits and high \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Han:2022:PBC, author = "Jianhui Han and Xiang Fei and Zhaolin Li and Youhui Zhang", title = "Polyhedral-Based Compilation Framework for In-Memory Neural Network Accelerators", journal = j-JETC, volume = "18", number = "1", pages = "15:1--15:23", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469847", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3469847", abstract = "Memristor-based processing-in-memory architecture is a promising solution to the memory bottleneck in the neural network (NN) processing. A major challenge for the programmability of such architectures is the automatic compilation of high-level NN \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{El-Derhalli:2022:TAO, author = "Hassnaa El-Derhalli and L{\'e}a Constans and S{\'e}bastien {Le Beux} and Alfredo {De Rossi} and Fabrice Raineri and Sofi{\`e}ne Tahar", title = "Towards All-optical Stochastic Computing Using Photonic Crystal Nanocavities", journal = j-JETC, volume = "18", number = "1", pages = "16:1--16:25", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3484871", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3484871", abstract = "Stochastic computing allows a drastic reduction in hardware complexity using serial processing of bit streams. While the induced high computing latency can be overcome using integrated optics technology, the design of realistic optical stochastic \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2022:HIH, author = "Weifu Li and Paul Franzon and Sumon Dey and Joshua Schabel", title = "Hardware Implementation of Hierarchical Temporal Memory Algorithm", journal = j-JETC, volume = "18", number = "1", pages = "17:1--17:23", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3479430", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3479430", abstract = "Hierarchical temporal memory (HTM) is an un-supervised machine learning algorithm that can learn both spatial and temporal information of input. It has been successfully applied to multiple areas. In this paper, we propose a multi-level hierarchical ASIC \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Choudhury:2022:HPE, author = "Dwaipayan Choudhury and Aravind Sukumaran Rajam and Ananth Kalyanaraman and Partha Pratim Pande", title = "High-Performance and Energy-Efficient {$3$D} Manycore {GPU} Architecture for Accelerating Graph Analytics", journal = j-JETC, volume = "18", number = "1", pages = "18:1--18:19", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3482880", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3482880", abstract = "Recent advances in GPU-based manycore accelerators provide the opportunity to efficiently process large-scale graphs on chip. However, real world graphs have a diverse range of topology and connectivity patterns (e.g., degree distributions) that make the \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Thuries:2022:ISI, author = "S{\'e}bastien Thuries and Aida Todri-Sanial", title = "Introduction to the Special Issue on Monolithic {$3$D}: Technology, Design and Computing Systems Applications Perspectives", journal = j-JETC, volume = "18", number = "1", pages = "19:1--19:3", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487869", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3487869", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Lee:2022:RMC, author = "Edward Lee and Daehyun Kim and Jinwoo Kim and Sung Kyu Lim and Saibal Mukhopadhyay", title = "A {ReRAM} Memory Compiler for Monolithic {$3$D} Integrated Circuits in a Carbon Nanotube Process", journal = j-JETC, volume = "18", number = "1", pages = "20:1--20:20", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3466681", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3466681", abstract = "We present a ReRAM memory compiler for monolithic 3D (M3D) integrated circuits (IC). We develop ReRAM architectures for M3D ICs using 1T-1R bit cells and single and multiple tiers of transistors for access and peripheral circuits. The compiler includes an \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhu:2022:DAT, author = "Lingjun Zhu and Arjun Chaudhuri and Sanmitra Banerjee and Gauthaman Murali and Pruek Vanna-Iampikul and Krishnendu Chakrabarty and Sung Kyu Lim", title = "Design Automation and Test Solutions for Monolithic {$3$D} {ICs}", journal = j-JETC, volume = "18", number = "1", pages = "21:1--21:49", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473462", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3473462", abstract = "Monolithic 3D (M3D) is an emerging heterogeneous integration technology that overcomes the limitations of the conventional through-silicon-via (TSV) and provides significant performance uplift and power reduction. However, the ultra-dense 3D interconnects \ldots{}", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chaudhuri:2022:BST, author = "Arjun Chaudhuri and Sanmitra Banerjee and Jinwoo Kim and Heechun Park and Bon Woong Ku and Sukeshwar Kannan and Krishnendu Chakrabarty and Sung Kyu Lim", title = "Built-in Self-Test and Fault Localization for Inter-Layer Vias in Monolithic {$3$D} {ICs}", journal = j-JETC, volume = "18", number = "1", pages = "22:1--22:37", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3464430", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3464430", abstract = "Monolithic 3D (M3D) integration provides massive vertical integration through the use of nanoscale inter-layer vias (ILVs). However, high integration density and aggressive scaling of the inter-layer dielectric make ILVs especially prone to defects. We \ldots{}", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bagherzadeh:2022:HSR, author = "Javad Bagherzadeh and Aporva Amarnath and Jielun Tan and Subhankar Pal and Ronald G. Dreslinski", title = "A Holistic Solution for Reliability of {$3$D} Parallel Systems", journal = j-JETC, volume = "18", number = "1", pages = "23:1--23:27", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488900", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Mar 2 09:33:14 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3488900", abstract = "Monolithic 3D technology is emerging as a promising solution that can bring massive opportunities, but the gains can be hindered due to the reliability issues exaggerated by high temperature. Conventional reliability solutions focus on one specific \ldots{}", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Shi:2022:GEA, author = "Yiyu Shi and Yongpan Liu and Jianxu Chen and Steve Jiang", title = "Guest Editorial: {ACM JETC} Special Issue on Hardware-Aware Learning for Medical Applications", journal = j-JETC, volume = "18", number = "2", pages = "24:1--24:3", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3503262", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3503262", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wang:2022:OUN, author = "Weijia Wang and Bill Lin", title = "Optimizing {$3$D} {U-Net}-based Brain Tumor Segmentation with Integer-arithmetic Deep Learning Accelerators", journal = j-JETC, volume = "18", number = "2", pages = "25:1--25:16", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3495210", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3495210", abstract = "While gliomas have become the most common cancerous brain tumors, manual diagnoses from 3D MRIs are time-consuming and possibly inconsistent when conducted by different radiotherapists, which leads to the pressing demand for automatic segmentation of \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mishra:2022:ICG, author = "Suraj Mishra and Danny Z. Chen and X. Sharon Hu", title = "Image Complexity Guided Network Compression for Biomedical Image Segmentation", journal = j-JETC, volume = "18", number = "2", pages = "26:1--26:23", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3471190", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3471190", abstract = "Compression is a standard procedure for making convolutional neural networks (CNNs) adhere to some specific computing resource constraints. However, searching for a compressed architecture typically involves a series of time-consuming training/validation \ldots{}", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chen:2022:AUA, author = "Yufei Chen and Tingtao Li and Qinming Zhang and Wei Mao and Nan Guan and Mei Tian and Hao Yu and Cheng Zhuo", title = "{ANT-UNet}: Accurate and Noise-Tolerant Segmentation for Pathology Image Processing", journal = j-JETC, volume = "18", number = "2", pages = "27:1--27:17", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451213", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3451213", abstract = "Pathology image segmentation is an essential step in early detection and diagnosis for various diseases. Due to its complex nature, precise segmentation is not a trivial task. Recently, deep learning has been proved as an effective option for pathology \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2022:QDQ, author = "Dawei Li and Yang Zhou and Shaopin Chen and Xiaowei Xu", title = "A Quasi-digital {QPSK} Modulator Design for Biomedical Devices", journal = j-JETC, volume = "18", number = "2", pages = "28:1--28:16", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465379", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3465379", abstract = "For the biomedical transceiver, the data transmission is often asymmetric. At the downlink, the transceiver only needs to receive a simple command to control the operation of the external device, and the receiving data rate is low, about hundreds of Kb/s \ldots{}", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zhang:2022:RRN, author = "Yongan Zhang and Anton Banta and Yonggan Fu and Mathews M. John and Allison Post and Mehdi Razavi and Joseph Cavallaro and Behnaam Aazhang and Yingyan Lin", title = "{RT-RCG}: Neural Network and Accelerator Search Towards Effective and Real-time {ECG} Reconstruction from Intracardiac Electrograms", journal = j-JETC, volume = "18", number = "2", pages = "29:1--29:25", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465372", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3465372", abstract = "There exists a gap in terms of the signals provided by pacemakers (i.e., intracardiac electrogram (EGM)) and the signals doctors use (i.e., 12-lead electrocardiogram (ECG)) to diagnose abnormal rhythms. Therefore, the former, even if remotely transmitted, \ldots{}", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kulkarni:2022:HAS, author = "Sourabh Kulkarni and Mario Michael Krell and Seth Nabarro and Csaba Andras Moritz", title = "Hardware-accelerated Simulation-based Inference of Stochastic Epidemiology Models for {COVID-19}", journal = j-JETC, volume = "18", number = "2", pages = "30:1--30:24", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3471188", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3471188", abstract = "Epidemiology models are central to understanding and controlling large-scale pandemics. Several epidemiology models require simulation-based inference such as Approximate Bayesian Computation (ABC) to fit their parameters to observations. ABC inference is \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Hamdioui:2022:GEC, author = "Said Hamdioui and Elena-Ioana Vatajelu and Alberto Bosio", title = "Guest Editorial: Computation-In-Memory {(CIM)}: from Device to Applications", journal = j-JETC, volume = "18", number = "2", pages = "31:1--31:3", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3503263", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3503263", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mayahinia:2022:VCO, author = "Mahta Mayahinia and Abhairaj Singh and Christopher Bengel and Stefan Wiefels and Muath A. Lebdeh and Stephan Menzel and Dirk J. Wouters and Anteneh Gebregiorgis and Rajendra Bishnoi and Rajiv Joshi and Said Hamdioui", title = "A Voltage-Controlled, Oscillation-Based {ADC} Design for Computation-in-Memory Architectures Using Emerging {ReRAMs}", journal = j-JETC, volume = "18", number = "2", pages = "32:1--32:25", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451212", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3451212", abstract = "Conventional von Neumann architectures cannot successfully meet the demands of emerging computation and data-intensive applications. These shortcomings can be improved by embracing new architectural paradigms using emerging technologies. In particular, \ldots{}", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wan:2022:ARA, author = "Zhe Wan and Tianyi Wang and Yiming Zhou and Subramanian S. Iyer and Vwani P. Roychowdhury", title = "Accuracy and Resiliency of Analog Compute-in-Memory Inference Engines", journal = j-JETC, volume = "18", number = "2", pages = "33:1--33:23", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502721", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3502721", abstract = "Recently, analog compute-in-memory (CIM) architectures based on emerging analog non-volatile memory (NVM) technologies have been explored for deep neural networks (DNNs) to improve scalability, speed, and energy efficiency. Such architectures, however, \ldots{}", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Krishnan:2022:ICI, author = "Gokul Krishnan and Sumit K. Mandal and Chaitali Chakrabarti and Jae-Sun Seo and Umit Y. Ogras and Yu Cao", title = "Impact of On-chip Interconnect on In-memory Acceleration of Deep Neural Networks", journal = j-JETC, volume = "18", number = "2", pages = "34:1--34:22", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3460233", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3460233", abstract = "With the widespread use of Deep Neural Networks (DNNs), machine learning algorithms have evolved in two diverse directions-one with ever-increasing connection density for better accuracy and the other with more compact sizing for energy efficiency. The \ldots{}", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Luo:2022:ACT, author = "Yandong Luo and Panni Wang and Shimeng Yu", title = "Accelerating On-Chip Training with Ferroelectric-Based Hybrid Precision Synapse", journal = j-JETC, volume = "18", number = "2", pages = "35:1--35:20", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473461", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3473461", abstract = "In this article, we propose a hardware accelerator design using ferroelectric transistor (FeFET)-based hybrid precision synapse (HPS) for deep neural network (DNN) on-chip training. The drain erase scheme for FeFET programming is incorporated for both \ldots{}", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Jones:2022:SNA, author = "Alexander Jones and Aaron Ruen and Rashmi Jha", title = "A Spiking Neuromorphic Architecture Using {Gated-RRAM} for Associative Memory", journal = j-JETC, volume = "18", number = "2", pages = "36:1--36:22", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461667", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3461667", abstract = "This work reports a spiking neuromorphic architecture for associative memory simulated in a SPICE environment using recently reported gated-RRAM (resistive random-access memory) devices as synapses alongside neurons based on complementary metal-oxide \ldots{}", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gupta:2022:CCS, author = "Saransh Gupta and Mohsen Imani and Joonseop Sim and Andrew Huang and Fan Wu and Jaeyoung Kang and Yeseong Kim and Tajana Simuni{\'c} Rosing", title = "{COSMO}: Computing with Stochastic Numbers in Memory", journal = j-JETC, volume = "18", number = "2", pages = "37:1--37:25", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3484731", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3484731", abstract = "Stochastic computing (SC) reduces the complexity of computation by representing numbers with long streams of independent bits. However, increasing performance in SC comes with either an increase in area or a loss in accuracy. Processing in memory (PIM) \ldots{}", acknowledgement = ack-nhfb, articleno = "37", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ku:2022:UDR, author = "Bon Woong Ku and Catherine D. Schuman and Md Musabbir Adnan and Tiffany M. Mintz and Raphael Pooser and Kathleen E. Hamilton and Garrett S. Rose and Sung Kyu Lim", title = "Unsupervised Digit Recognition Using Cosine Similarity In A Neuromemristive Competitive Learning System", journal = j-JETC, volume = "18", number = "2", pages = "38:1--38:20", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473036", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3473036", abstract = "This work addresses how to naturally adopt the l$^2$ -norm cosine similarity in the neuromemristive system and studies the unsupervised learning performance on handwritten digit image recognition. Proposed architecture is a two-layer fully connected neural \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{deLima:2022:SAD, author = "Jo{\~a}o Paulo Cardoso de Lima and Marcelo Brandalero and Michael H{\"u}bner and Luigi Carro", title = "{STAP}: an Architecture and Design Tool for Automata Processing on Memristor {TCAMs}", journal = j-JETC, volume = "18", number = "2", pages = "39:1--39:22", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3450769", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3450769", abstract = "Accelerating finite-state automata benefits several emerging application domains that are built on pattern matching. In-memory architectures, such as the Automata Processor (AP), are efficient to speed them up, at least for outperforming traditional von-\ldots{}", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kooli:2022:TTI, author = "Maha Kooli and Antoine Heraud and Henri-Pierre Charles and Bastien Giraud and Roman Gauchi and Mona Ezzadeen and Kevin Mambu and Valentin Egloff and Jean-Philippe Noel", title = "Towards a Truly Integrated Vector Processing Unit for Memory-bound Applications Based on a Cost-competitive Computational {SRAM} Design Solution", journal = j-JETC, volume = "18", number = "2", pages = "40:1--40:26", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485823", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3485823", abstract = "This article presents Computational SRAM (C-SRAM) solution combining In- and Near-Memory Computing approaches. It allows performing arithmetic, logic, and complex memory operations inside or next to the memory without transferring data over the system bus,. \ldots{}", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Froehlich:2022:PCG, author = "Saman Froehlich and Saeideh Shirinzadeh and Rolf Drechsler", title = "Parallel Computing of Graph-based Functions in {ReRAM}", journal = j-JETC, volume = "18", number = "2", pages = "41:1--41:24", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3453163", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3453163", abstract = "Resistive Random Access Memory (ReRAM) is an emerging non-volatile memory technology. Besides its low power consumption and its high scalability, its inherent computation capabilities make ReRAM especially interesting for future computer architectures. \ldots{}", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zaman:2022:EDS, author = "Md Adnan Zaman and Rajeev Joshi and Srinivas Katkoori", title = "Early Design Space Exploration Framework for Memristive Crossbar Arrays", journal = j-JETC, volume = "18", number = "2", pages = "42:1--42:26", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461644", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3461644", abstract = "For memristive crossbar arrays, currently, no high-level design validation and early space exploration tools exist in the literature. Such tools are essential to quickly verify the design functionality as well as compare design alternatives in terms of \ldots{}", acknowledgement = ack-nhfb, articleno = "42", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ronen:2022:BMP, author = "Ronny Ronen and Adi Eliahu and Orian Leitersdorf and Natan Peled and Kunal Korgaonkar and Anupam Chattopadhyay and Ben Perach and Shahar Kvatinsky", title = "The {Bitlet} Model: a Parameterized Analytical Model to Compare {PIM} and {CPU} Systems", journal = j-JETC, volume = "18", number = "2", pages = "43:1--43:29", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465371", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon May 2 14:16:07 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3465371", abstract = "Currently, data-intensive applications are gaining popularity. Together with this trend, processing-in-memory (PIM)-based systems are being given more attention and have become more relevant. This article describes an analytical modeling tool called \ldots{}", acknowledgement = ack-nhfb, articleno = "43", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zahedi:2022:MTA, author = "Mahdi Zahedi and Muah Abu Lebdeh and Christopher Bengel and Dirk Wouters and Stephan Menzel and Manuel {Le Gallo} and Abu Sebastian and Stephan Wong and Said Hamdioui", title = "{MNEMOSENE}: Tile Architecture and Simulator for Memristor-based Computation-in-memory", journal = j-JETC, volume = "18", number = "3", pages = "44:1--44:24", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485824", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3485824", abstract = "In recent years, we are witnessing a trend toward in-memory computing for future generations of computers that differs from traditional von-Neumann architecture in which there is a clear distinction between computing and memory units. Considering that \ldots{}", acknowledgement = ack-nhfb, articleno = "44", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Karempudi:2022:PNC, author = "Venkata Sai Praneeth Karempudi and Febin Sunny and Ishan G. Thakkar and Sai Vineel Reddy Chittamuru and Mahdi Nikdast and Sudeep Pasricha", title = "Photonic Networks-on-Chip Employing Multilevel Signaling: a Cross-Layer Comparative Study", journal = j-JETC, volume = "18", number = "3", pages = "45:1--45:36", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487365", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3487365", abstract = "Photonic network-on-chip (PNoC) architectures employ photonic links with dense wavelength-division multiplexing (DWDM) to enable high throughput on-chip transfers. Unfortunately, increasing the DWDM degree (i.e., using a larger number of wavelengths) to \ldots{}", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Lalouani:2022:CMA, author = "Wassila Lalouani and Mohamed Younis and Mohammad Ebrahimabadi and Naghmeh Karimi", title = "Countering Modeling Attacks in {PUF}-based {IoT} Security Solutions", journal = j-JETC, volume = "18", number = "3", pages = "46:1--46:28", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491221", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3491221", abstract = "Hardware fingerprinting has emerged as a viable option for safeguarding IoT devices from cyberattacks. Such a fingerprint is used to not only authenticate the interconnected devices but also to derive cryptographic keys for ensuring data integrity and \ldots{}", acknowledgement = ack-nhfb, articleno = "46", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Pundir:2022:ASV, author = "Nitin Pundir and Sohrab Aftabjahani and Rosario Cammarota and Mark Tehranipoor and Farimah Farahmandi", title = "Analyzing Security Vulnerabilities Induced by High-level Synthesis", journal = j-JETC, volume = "18", number = "3", pages = "47:1--47:22", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3492345", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3492345", abstract = "High-level synthesis (HLS) is essential to map the high-level language (HLL) description (e.g., in C/C++) of hardware design to the corresponding Register Transfer Level (RTL) to produce hardware-independent design specifications with reduced design \ldots{}", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rueckauer:2022:NAC, author = "Bodo Rueckauer and Connor Bybee and Ralf Goettsche and Yashwardhan Singh and Joyesh Mishra and Andreas Wild", title = "{NxTF}: an {API} and Compiler for Deep Spiking Neural Networks on {Intel Loihi}", journal = j-JETC, volume = "18", number = "3", pages = "48:1--48:22", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501770", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3501770", abstract = "Spiking Neural Networks (SNNs) is a promising paradigm for efficient event-driven processing of spatio-temporally sparse data streams. Spiking Neural Networks (SNNs) have inspired the design of and can take advantage of the emerging class of neuromorphic \ldots{}", acknowledgement = ack-nhfb, articleno = "48", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Karkar:2022:TPE, author = "Ammar Karkar and Nizar Dahir and Terrence Mak and Kin-Fai Tong", title = "Thermal and Performance Efficient On-Chip Surface-Wave Communication for Many-Core Systems in Dark Silicon Era", journal = j-JETC, volume = "18", number = "3", pages = "49:1--49:18", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501771", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3501771", abstract = "Due to the exceedingly high integration density of VLSI circuits and the resulting high power density, thermal integrity became a major challenge. One way to tackle this problem is Dark silicon. Dark silicon is the amount of circuitry in a chip that is \ldots{}", acknowledgement = ack-nhfb, articleno = "49", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Barbareschi:2022:GAB, author = "Mario Barbareschi and Salvatore Barone and Alberto Bosio and Jie Han and Marcello Traiola", title = "A Genetic-algorithm-based Approach to the Design of {DCT} Hardware Accelerators", journal = j-JETC, volume = "18", number = "3", pages = "50:1--50:25", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501772", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3501772", abstract = "As modern applications demand an unprecedented level of computational resources, traditional computing system design paradigms are no longer adequate to guarantee significant performance enhancement at an affordable cost. Approximate Computing (AxC) has \ldots{}", acknowledgement = ack-nhfb, articleno = "50", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Alam:2022:NLL, author = "Manaar Alam and Sayandeep Saha and Debdeep Mukhopadhyay and Sandip Kundu", title = "{NN-Lock}: a Lightweight Authorization to Prevent {IP} Threats of Deep Learning Models", journal = j-JETC, volume = "18", number = "3", pages = "51:1--51:19", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3505634", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3505634", abstract = "The prevalent usage and unparalleled recent success of Deep Neural Network (DNN) applications have raised the concern of protecting their Intellectual Property (IP) rights in different business models to prevent the theft of trade secrets. In this article,. \ldots{}", acknowledgement = ack-nhfb, articleno = "51", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Fieback:2022:DFM, author = "Moritz Fieback and Guilherme Cardoso Medeiros and Lizhou Wu and Hassen Aziza and Rajendra Bishnoi and Mottaqiallah Taouil and Said Hamdioui", title = "Defects, Fault Modeling, and Test Development Framework for {RRAMs}", journal = j-JETC, volume = "18", number = "3", pages = "52:1--52:26", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510851", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3510851", abstract = "Resistive RAM (RRAM) is a promising technology to replace traditional technologies such as Flash, because of its low energy consumption, CMOS compatibility, and high density. Many companies are prototyping this technology to validate its potential. \ldots{}", acknowledgement = ack-nhfb, articleno = "52", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Volkel:2022:DDI, author = "Kevin Volkel and Kyle J. Tomek and Albert J. Keung and James M. Tuck", title = "{DINOS}: {Data INspired Oligo Synthesis} for {DNA} Data Storage", journal = j-JETC, volume = "18", number = "3", pages = "53:1--53:35", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510853", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3510853", abstract = "As interest in DNA-based information storage grows, the costs of synthesis have been identified as a key bottleneck. A potential direction is to tune synthesis for data. Data strands tend to be composed of a small set of recurring code word sequences, and \ldots{}", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ince:2022:DFB, author = "Mehmet Ince and Bora Bilgic and Sule Ozev", title = "Digital Fault-based Built-in Self-test and Evaluation of Low Dropout Voltage Regulators", journal = j-JETC, volume = "18", number = "3", pages = "54:1--54:20", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510852", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3510852", abstract = "With increasing pressure to obtain near-zero defect rates, there is a need to explore built-in self-test and other non-traditional test techniques for embedded mixed-signal components, such as PLLs, power converters, and data converters. This article \ldots{}", acknowledgement = ack-nhfb, articleno = "54", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Jin:2022:GET, author = "Yier Jin and Tsung-Yi Ho and Stjepan Picek and Siddharth Garg", title = "Guest Editorial: Trustworthy {AI}", journal = j-JETC, volume = "18", number = "3", pages = "55:1--55:3", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3534957", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3534957", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Dubey:2022:GML, author = "Anuj Dubey and Rosario Cammarota and Vikram Suresh and Aydin Aysu", title = "Guarding Machine Learning Hardware Against Physical Side-channel Attacks", journal = j-JETC, volume = "18", number = "3", pages = "56:1--56:31", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465377", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3465377", abstract = "Machine learning (ML) models can be trade secrets due to their development cost. Hence, they need protection against malicious forms of reverse engineering (e.g., in IP piracy). With a growing shift of ML to the edge devices, in part for performance and \ldots{}", acknowledgement = ack-nhfb, articleno = "56", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Olney:2022:DNT, author = "Brooks Olney and Robert Karam", title = "Diverse, Neural {Trojan} Resilient Ecosystem of Neural Network {IP}", journal = j-JETC, volume = "18", number = "3", pages = "57:1--57:23", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3471189", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3471189", abstract = "Adversarial machine learning is a prominent research area aimed towards exposing and mitigating security vulnerabilities in AI/ML algorithms and their implementations. Data poisoning and neural Trojans enable an attacker to drastically change the behavior \ldots{}", acknowledgement = ack-nhfb, articleno = "57", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2022:PBA, author = "Ge Li and Mohit Tiwari and Michael Orshansky", title = "Power-based Attacks on Spatial {DNN} Accelerators", journal = j-JETC, volume = "18", number = "3", pages = "58:1--58:18", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491219", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3491219", abstract = "With proliferation of DNN-based applications, the confidentiality of DNN model is an important commercial goal. Spatial accelerators, which parallelize matrix/vector operations, are utilized for enhancing energy efficiency of DNN computation. Recently, \ldots{}", acknowledgement = ack-nhfb, articleno = "58", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Huang:2022:GBB, author = "Po-Hao Huang and Honggang Yu and Max Panoff and Ting-Chi Wang", title = "Generation of Black-box Audio Adversarial Examples Based on Gradient Approximation and Autoencoders", journal = j-JETC, volume = "18", number = "3", pages = "59:1--59:19", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491220", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3491220", abstract = "Deep Neural Network (DNN) is gaining popularity thanks to its ability to attain high accuracy and performance in various security-crucial scenarios. However, recent research shows that DNN-based Automatic Speech Recognition (ASR) systems are vulnerable to \ldots{}", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Huang:2022:DAA, author = "Tianjin Huang and Vlado Menkovski and Yulong Pei and Yuhao Wang and Mykola Pechenizkiy", title = "Direction-aggregated Attack for Transferable Adversarial Examples", journal = j-JETC, volume = "18", number = "3", pages = "60:1--60:22", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501769", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3501769", abstract = "Deep neural networks are vulnerable to adversarial examples that are crafted by imposing imperceptible changes to the inputs. However, these adversarial examples are most successful in white-box settings where the model and its parameters are available. \ldots{}", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Liu:2022:DAA, author = "Yuan Liu and Jinxin Dong and Pingqiang Zhou", title = "Defending against Adversarial Attacks in Deep Learning with Robust Auxiliary Classifiers Utilizing Bit-plane Slicing", journal = j-JETC, volume = "18", number = "3", pages = "61:1--61:17", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510855", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3510855", abstract = "Deep Neural Networks (DNNs) have been widely used in variety of fields with great success. However, recent research indicates that DNNs are susceptible to adversarial attacks, which can easily fool the well-trained DNN-based classifiers without being \ldots{}", acknowledgement = ack-nhfb, articleno = "61", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Panoff:2022:RCA, author = "Max Panoff and Honggang Yu and Haoqi Shan and Yier Jin", title = "A Review and Comparison of {AI}-enhanced Side Channel Analysis", journal = j-JETC, volume = "18", number = "3", pages = "62:1--62:20", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517810", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Aug 10 06:36:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517810", abstract = "Side Channel Analysis (SCA) presents a clear threat to privacy and security in modern computing systems. The vast majority of communications are secured through cryptographic algorithms. These algorithms are often provably-secure from a cryptographical \ldots{}", acknowledgement = ack-nhfb, articleno = "62", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chen:2022:GES, author = "Vanessa Chen and Mohammad AL Faruque and Fadi Kurdahi", title = "Guest Editorial: Secure Radio-Frequency {(RF)-Analog} Electronics and Electromagnetics", journal = j-JETC, volume = "18", number = "4", pages = "63:1--63:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3564261", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3564261", acknowledgement = ack-nhfb, articleno = "63", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Nooraiepour:2022:TVM, author = "Alireza Nooraiepour and Shaghayegh Vosoughitabar and Chung-Tse Michael Wu and Waheed U. Bajwa and Narayan B. Mandayam", title = "Time-varying Metamaterial-enabled Directional Modulation Schemes for Physical Layer Security in Wireless Communication Links", journal = j-JETC, volume = "18", number = "4", pages = "64:1--64:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3513088", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3513088", abstract = "Novel transmission schemes, enabled by recent advances in the fields of metamaterial (MTM), leaky-wave antenna (LWA) and directional modulation (DM), are proposed for enhancing the physical layer (PHY) security. MTM-LWAs, which offer compact, integrated, \ldots{}", acknowledgement = ack-nhfb, articleno = "64", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mosavirik:2022:SVE, author = "Tahoura Mosavirik and Fatemeh Ganji and Patrick Schaumont and Shahin Tajik", title = "\pkg{ScatterVerif}: Verification of Electronic Boards Using Reflection Response of Power Distribution Network", journal = j-JETC, volume = "18", number = "4", pages = "65:1--65:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3513087", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3513087", abstract = "The globalization of electronic systems' fabrication has made some of our most critical systems vulnerable to supply chain attacks. Implanting spy chips on the printed circuit boards (PCBs) or replacing genuine components with counterfeit/recycled ones \ldots{}", acknowledgement = ack-nhfb, articleno = "65", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Riaz:2022:SAP, author = "Arslan Riaz and Dylan Nash and Jonathan Ngo and Chiraag Juvekar and Phillip Nadeau and Tao Yu and Rabia Tugce Yazicigil", title = "Security Assessment of Phase-Based Ranging Systems in a Multipath Environment", journal = j-JETC, volume = "18", number = "4", pages = "66:1--66:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517809", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517809", abstract = "Phase-based ranging has been widely deployed in proximity detection scenarios including security-critical applications due to their low implementation complexity on existing transceivers. In this work, the security of multi-carrier phase-based ranging \ldots{}", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ashok:2022:HTD, author = "Maitreyi Ashok and Matthew J. Turner and Ronald L. Walsworth and Edlyn V. Levine and Anantha P. Chandrakasan", title = "Hardware {Trojan} Detection Using Unsupervised Deep Learning on Quantum Diamond Microscope Magnetic Field Images", journal = j-JETC, volume = "18", number = "4", pages = "67:1--67:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3531010", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3531010", abstract = "This article presents a method for hardware trojan detection in integrated circuits. Unsupervised deep learning is used to classify wide field-of-view (4 $ \times $ 4 mm$^2$ ), high spatial resolution magnetic field images taken using a Quantum Diamond Microscope (QDM). \ldots{}", acknowledgement = ack-nhfb, articleno = "67", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Lee:2022:SAS, author = "Jeong-Jun Lee and Wenrui Zhang and Yuan Xie and Peng Li", title = "{SaARSP}: an Architecture for Systolic-Array Acceleration of Recurrent Spiking Neural Networks", journal = j-JETC, volume = "18", number = "4", pages = "68:1--68:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510854", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3510854", abstract = "Spiking neural networks (SNNs) are brain-inspired event-driven models of computation with promising ultra-low energy dissipation. Rich network dynamics emergent in recurrent spiking neural networks (R-SNNs) can form temporally based memory, offering great \ldots{}", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Alam:2022:SMM, author = "Mohsen Riahi Alam and M. Hassan Najafi and Nima Taherinejad", title = "Sorting in Memristive Memory", journal = j-JETC, volume = "18", number = "4", pages = "69:1--69:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517181", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517181", abstract = "Sorting data is needed in many application domains. Traditionally, the data is read from memory and sent to a general-purpose processor or application-specific hardware for sorting. The sorted data is then written back to the memory. Reading/writing data \ldots{}", acknowledgement = ack-nhfb, articleno = "69", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Maity:2022:CEB, author = "Dilip Kumar Maity and Surajit Kumar Roy and Chandan Giri", title = "A Cost-Effective Built-In Self-Test Mechanism for Post-Manufacturing {TSV} Defects in {$3$D} {ICs}", journal = j-JETC, volume = "18", number = "4", pages = "70:1--70:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517808", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517808", abstract = "Three-Dimensional Integrated Circuit (3D IC) based on Through-Silicon-Via (TSV) has brought a drastic change in IC technology. Since TSVs connect different layers of 3D stacks, their proper functioning is an essential prerequisite for system operation. \ldots{}", acknowledgement = ack-nhfb, articleno = "70", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Xu:2022:ASP, author = "Kangwei Xu and Dongrong Zhang and Qiang Ren and Yuanqing Cheng and Patrick Girard", title = "All-spin {PUF}: an Area-efficient and Reliable {PUF} Design with Signature Improvement for Spin-transfer Torque Magnetic Cell-based All-spin Circuits", journal = j-JETC, volume = "18", number = "4", pages = "71:1--71:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517811", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517811", abstract = "Recently, spin-transfer torque magnetic cell (STT-mCell) has emerged as a promising spintronic device to be used in Computing-in-Memory (CIM) systems. However, it is challenging to guarantee the hardware security of STT-mCell-based all-spin circuits. In \ldots{}", acknowledgement = ack-nhfb, articleno = "71", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Anandakumar:2022:DAF, author = "N. Nalla Anandakumar and Mohammad S. Hashmi and Somitra Kumar Sanadhya", title = "Design and Analysis of {FPGA}-based {PUFs} with Enhanced Performance for Hardware-oriented Security", journal = j-JETC, volume = "18", number = "4", pages = "72:1--72:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517813", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517813", abstract = "This article presents a thorough analysis of two distinct Physically Unclonable Functions (PUF), namely RO-PUF (Ring oscillator-based PUF) and RS-LPUF (RS Latch-based PUF), prototyped on FPGA. It is shown that the implemented PUFs possess significantly \ldots{}", acknowledgement = ack-nhfb, articleno = "72", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ruffini:2022:NFB, author = "Simone Ruffini and Luca Caronti and Kasim Sinan Yildirim and Davide Brunelli", title = "{NORM}: an {FPGA}-based Non-volatile Memory Emulation Framework for Intermittent Computing", journal = j-JETC, volume = "18", number = "4", pages = "73:1--73:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517812", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3517812", abstract = "Today's intermittent computing systems operate by relying only on harvested energy accumulated in their tiny energy reservoirs, typically capacitors. An intermittent device dies due to a power failure when there is no energy in its capacitor and boots \ldots{}", acknowledgement = ack-nhfb, articleno = "73", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mehrabani:2022:NHE, author = "Yavar Safaei Mehrabani and Samaneh Goldani Gigasari and Mohammad Mirzaei and Hamidreza Uoosefian", title = "A Novel Highly-Efficient Inexact Full Adder Cell for Motion and Edge Detection Systems of Image Processing in {CNFET} Technology", journal = j-JETC, volume = "18", number = "4", pages = "74:1--74:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524061", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3524061", abstract = "In this paper, a novel and highly efficient inexact Full Adder cell by exploiting two logic styles including conventional CMOS (C-COMS) and pass transistor logic (PTL) are presented. The so-called carbon nanotube field-effect transistor (CNFET) technology \ldots{}", acknowledgement = ack-nhfb, articleno = "74", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kim:2022:EPS, author = "Yulhwa Kim and Hyungjun Kim and Jae-Joon Kim", title = "Extreme Partial-Sum Quantization for Analog Computing-In-Memory Neural Network Accelerators", journal = j-JETC, volume = "18", number = "4", pages = "75:1--75:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3528104", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3528104", abstract = "In Analog Computing-in-Memory (CIM) neural network accelerators, analog-to-digital converters (ADCs) are required to convert the analog partial sums generated from a CIM array to digital values. The overhead from ADCs substantially degrades the energy \ldots{}", acknowledgement = ack-nhfb, articleno = "75", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Han:2022:PSO, author = "Haiyang Han and Theoni Alexoudi and Chris Vagionas and Nikos Pleros and Nikos Hardavellas", title = "A Practical Shared Optical Cache With Hybrid {MWSR\slash R-SWMR NoC} for Multicore Processors", journal = j-JETC, volume = "18", number = "4", pages = "76:1--76:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3531012", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3531012", abstract = "Conventional electronic memory hierarchies are intrinsically limited in their ability to overcome the memory wall due to scaling constraints. Optical caches and interconnects can mitigate these constraints, and enable processors to reach performance and \ldots{}", acknowledgement = ack-nhfb, articleno = "76", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Zaeemi:2022:HLM, author = "Meysam Zaeemi and Siamak Mohammadi", title = "High-level Modeling and Verification Platform for Elastic Circuits with Process Variation Considerations", journal = j-JETC, volume = "18", number = "4", pages = "77:1--77:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3534971", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3534971", abstract = "In addition to the advantages of asynchronous circuits, compatibility with synchronous EDA tools is another strength point of synchronous elastic circuits. Synchronous elastic circuits face some challenges, such as process variations that can compromise \ldots{}", acknowledgement = ack-nhfb, articleno = "77", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{K:2022:CHA, author = "Amal Thomas K. and Soumyajit Poddar and Hemanta Kumar Mondal", title = "A {CNN} Hardware Accelerator Using Triangle-based Convolution", journal = j-JETC, volume = "18", number = "4", pages = "78:1--78:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3544975", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3544975", abstract = "Convolutional neural networks (CNNs) have gained a massive impression in the fields of computer vision and especially in the embedded applications because of their high accuracy and performance. However, high computational complexity and power consumption \ldots{}", acknowledgement = ack-nhfb, articleno = "78", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gebregiorgis:2022:SMC, author = "Anteneh Gebregiorgis and Hoang Anh Du Nguyen and Jintao Yu and Rajendra Bishnoi and Mottaqiallah Taouil and Francky Catthoor and Said Hamdioui", title = "A Survey on Memory-centric Computer Architectures", journal = j-JETC, volume = "18", number = "4", pages = "79:1--79:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3544974", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3544974", abstract = "Faster and cheaper computers have been constantly demanding technological and architectural improvements. However, current technology is suffering from three technology walls: leakage wall, reliability wall, and cost wall. Meanwhile, existing architecture \ldots{}", acknowledgement = ack-nhfb, articleno = "79", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Wu:2022:ZHT, author = "You Wu and Lin Li", title = "\pkg{Zallocator}: a High Throughput Write-Optimized Persistent Allocator for Non-Volatile Memory", journal = j-JETC, volume = "18", number = "4", pages = "80:1--80:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3549528", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3549528", abstract = "Non-volatile main memory (NVRAM) is likely to break the bottleneck caused by data transferring between main memory and extern storage, and fundamentally change the way applications do data persistence. We can build persistent data structures directly on \ldots{}", acknowledgement = ack-nhfb, articleno = "80", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Peng:2022:DNN, author = "Jiaxin Peng and Yousra Alkabani and Krunal Puri and Xiaoxuan Ma and Volker Sorger and Tarek El-Ghazawi", title = "A Deep Neural Network Accelerator using Residue Arithmetic in a Hybrid Optoelectronic System", journal = j-JETC, volume = "18", number = "4", pages = "81:1--81:??", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3550273", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Oct 29 07:30:43 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3550273", abstract = "The acceleration of Deep Neural Networks (DNNs) has attracted much attention in research. Many critical real-time applications benefit from DNN accelerators but are limited by their compute-intensive nature. This work introduces an accelerator for \ldots{}", acknowledgement = ack-nhfb, articleno = "81", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ardesi:2023:TMF, author = "Yuri Ardesi and Umberto Garlando and Fabrizio Riente and Giuliana Beretta and Gianluca Piccinini and Mariagrazia Graziano", title = "Taming Molecular Field-Coupling for Nanocomputing Design", journal = j-JETC, volume = "19", number = "1", pages = "1:1--1:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3552520", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3552520", abstract = "Molecular Field-Coupling Nanocomputing (FCN) is one of the most promising technologies for overcoming Complementary Metal Oxide Semiconductor (CMOS) scaling issues. It encodes the information in the charge distribution of nanometric molecules and \ldots{}", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Samant:2023:NAL, author = "Prerana Samant and Naveen Kumar Macha and Mostafizur Rahman", title = "A Neoteric Approach for Logic with Embedded Memory Leveraging Crosstalk Computing", journal = j-JETC, volume = "19", number = "1", pages = "2:1--2:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3569917", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3569917", abstract = "One of the essential elements of computing is the memory element. Flip-flops form an integral part of a System-on-Chip (SoC) and consume most of the area on the die. To meet the high-speed performance demands by the data-intensive applications such as \ldots{}", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Forero:2023:BOD, author = "Freddy Forero and Victor Champac and Michel Renovell", title = "B-open Defect: a Novel Defect Model in {FinFET} Technology", journal = j-JETC, volume = "19", number = "1", pages = "3:1--3:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564244", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3564244", abstract = "This article proposes an electrical analysis of a new defect mechanism, to be named as b-open defect, which may occur in nanometer technologies due to the use of the Self-Aligned Double Patterning (SADP) technique. In metal lines making use of the SADP \ldots{}", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Farahmandi:2023:ISI, author = "Farimah Farahmandi and Ankur Srivastava and Giorgio {Di Natale} and Mark Tehranipoor", title = "Introduction to the Special Issue on {CAD} for Security: Pre-silicon Security Sign-off Solutions Through Design Cycle", journal = j-JETC, volume = "19", number = "1", pages = "4:1--4:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584317", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3584317", abstract = "This introduction welcomes all readers to this ACM JETC special issue on CAD for Security: Pre-silicon Security Sign-off Solutions Through Design Cycle. The articles published in this special issue reflect how computer-aided design (CAD) tools are \ldots{}", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Canto:2023:RCK, author = "Alvaro Cintas Canto and Mehran Mozaffari Kermani and Reza Azarderakhsh", title = "Reliable Constructions for the Key Generator of Code-based Post-quantum Cryptosystems on {FPGA}", journal = j-JETC, volume = "19", number = "1", pages = "5:1--5:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544921", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3544921", abstract = "Advances in quantum computing have urged the need for cryptographic algorithms that are low-power, low-energy, and secure against attacks that can be potentially enabled. For this post-quantum age, different solutions have been studied. Code-based \ldots{}", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Erata:2023:SAT, author = "Ferhat Erata and Shuwen Deng and Faisal Zaghloul and Wenjie Xiong and Onur Demir and Jakub Szefer", title = "Survey of Approaches and Techniques for Security Verification of Computer Systems", journal = j-JETC, volume = "19", number = "1", pages = "6:1--6:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564785", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3564785", abstract = "This article surveys the landscape of security verification approaches and techniques for computer systems at various levels: from a software-application level all the way to the physical hardware level. Different existing projects are compared, based on \ldots{}", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Javaheripi:2023:AAH, author = "Mojan Javaheripi and Jung-Woo Chang and Farinaz Koushanfar", title = "{AccHashtag}: Accelerated Hashing for Detecting Fault-Injection Attacks on Embedded Neural Networks", journal = j-JETC, volume = "19", number = "1", pages = "7:1--7:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555808", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3555808", abstract = "We propose AccHashtag, the first framework for high-accuracy detection of fault-injection attacks on Deep Neural Networks (DNNs) with provable bounds on detection performance. Recent literature in fault-injection attacks shows the severe DNN accuracy \ldots{}", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Witharana:2023:AGS, author = "Hasini Witharana and Aruna Jayasena and Andrew Whigham and Prabhat Mishra", title = "Automated Generation of Security Assertions for {RTL} Models", journal = j-JETC, volume = "19", number = "1", pages = "8:1--8:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565801", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3565801", abstract = "System-on-Chip (SoC) security is vital in designing trustworthy systems. Detecting and fixing a vulnerability in the early stages is easier and cost-effective. Assertion-based verification is widely used for functional validation of Register-Transfer \ldots{}", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Monta:2023:SCS, author = "Kazuki Monta and Lang Lin and Jimin Wen and Harsh Shrivastav and Calvin Chow and Hua Chen and Joao Geada and Sreeja Chowdhury and Nitin Pundir and Norman Chang and Makoto Nagata", title = "Silicon-correlated Simulation Methodology of {EM} Side-channel Leakage Analysis", journal = j-JETC, volume = "19", number = "1", pages = "9:1--9:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568957", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Mar 18 10:19:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3568957", abstract = "Cryptography hardware is vulnerable to side-channel (SC) attacks on power supply current flow and electromagnetic (EM) emission. This article proposes simulation-based power and EM side-channel leakage analysis (SCLA) techniques on a cryptographic \ldots{}", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ray:2023:SCI, author = "Dipojjwal Ray and Yogendra Sao and Santosh Biswas and Sk Subidh Ali", title = "On Securing Cryptographic {ICs} against Scan-based Attacks: a {Hamming} Weight Distribution Perspective", journal = j-JETC, volume = "19", number = "2", pages = "10:1--10:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577215", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3577215", abstract = "Scan chain-based Design for Testability is the industry standard in use for testing manufacturing defects in the semiconductor industry to ensure the structural and functional correctness of chips. Fault coverage is significantly enhanced due to the \ldots{}", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Talukder:2023:NTD, author = "B. M. S. Bahar Talukder and Farah Ferdaus and Md Tauhidur Rahman", title = "A Noninvasive Technique to Detect Authentic\slash Counterfeit {SRAM} Chips", journal = j-JETC, volume = "19", number = "2", pages = "11:1--11:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597024", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3597024", abstract = "Many commercially available memory chips are fabricated worldwide in untrusted facilities. Therefore, a counterfeit memory chip can easily enter into the supply chain in different formats. Deploying these counterfeit memory chips into an electronic system \ldots{}", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ram:2023:ETA, author = "Saswat Kumar Ram and Sauvagya Ranjan Sahoo and Banee Bandana Das and Kamalakanta Mahapatra and Saraju P. Mohanty", title = "Eternal-thing 2.0: Analog-{Trojan}-resilient Ripple-less Solar Harvesting System for Sustainable {IoT}", journal = j-JETC, volume = "19", number = "2", pages = "12:1--12:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3575800", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3575800", abstract = "Recently, harvesting natural energy is gaining more attention than other conventional approaches for sustainable IoT. System on chip power requirement for the internet of things (IoT) and generating higher voltages on chip is a massive challenge for on-. \ldots{}", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chen:2023:AED, author = "Xiangru Chen and Maneesh Merugu and Jiaqi Zhang and Sandip Ray", title = "{AroMa}: Evaluating Deep Learning Systems for Stealthy Integrity Attacks on Multi-tenant Accelerators", journal = j-JETC, volume = "19", number = "2", pages = "13:1--13:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579033", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3579033", abstract = "Multi-tenant applications have been proliferating in recent years, supported by the emergence of computing-as-service paradigms. Unfortunately, multi-tenancy induces new security vulnerabilities due to spatial or temporal co-location of applications with \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Yang:2023:HOE, author = "Mingdai Yang and Qiuwen Lou and Ramin Rajaei and Mohammad Reza Jokar and Junyi Qiu and Yuming Liu and Aditi Udupa and Frederic T. Chong and John M. Dallesasse and Milton Feng and Lynford L. Goddard and X. Sharon Hu and Yanjing Li", title = "A Hybrid Optical-Electrical Analog Deep Learning Accelerator Using Incoherent Optical Signals", journal = j-JETC, volume = "19", number = "2", pages = "14:1--14:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584183", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3584183", abstract = "Optical deep learning (DL) accelerators have attracted significant interests due to their latency and power advantages. In this article, we focus on incoherent optical designs. A significant challenge is that there is no known solution to perform single-. \ldots{}", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ma:2023:MMT, author = "Yu Ma and Linfeng Zheng and Pingqiang Zhou", title = "A Mapping Method Tolerating {SAF} and Variation for Memristor Crossbar Array Based Neural Network Inference on Edge Devices", journal = j-JETC, volume = "19", number = "2", pages = "15:1--15:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3585518", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3585518", abstract = "There is an increasing demand for running neural network inference on edge devices. Memristor crossbar array (MCA) based accelerators can be used to accelerate neural networks on edge devices. However, reliability issues in memristors, such as stuck-at \ldots{}", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Huang:2023:LRG, author = "Siyuan Huang and Brian D. Hoskins and Matthew W. Daniels and Mark D. Stiles and Gina C. Adam", title = "Low-Rank Gradient Descent for Memory-Efficient Training of Deep In-Memory Arrays", journal = j-JETC, volume = "19", number = "2", pages = "16:1--16:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577214", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3577214", abstract = "The movement of large quantities of data during the training of a deep neural network presents immense challenges for machine learning workloads, especially those based on future functional memories deployed to store network models. As the size of network \ldots{}", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Jessurun:2023:FNS, author = "Nathan Jessurun and Olivia P. Dizon-Paradis and Jacob Harrison and Shajib Ghosh and Mark M. Tehranipoor and Damon L. Woodard and Navid Asadizanjani", title = "{FPIC}: a Novel Semantic Dataset for Optical {PCB} Assurance", journal = j-JETC, volume = "19", number = "2", pages = "17:1--17:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588032", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3588032", abstract = "Outsourced PCB fabrication necessitates increased hardware assurance capabilities. Several assurance techniques based on AOI have been proposed that leverage PCB images acquired using digital cameras. We review state-of-the-art AOI techniques and observe \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Koylu:2023:SML, author = "Troya {\c{C}}agil K{\"o}yl{\"u} and Cezar Rodolfo Wedig Reinbrecht and Anteneh Gebregiorgis and Said Hamdioui and Mottaqiallah Taouil", title = "A Survey on Machine Learning in Hardware Security", journal = j-JETC, volume = "19", number = "2", pages = "18:1--18:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589506", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Fri Jun 9 06:26:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3589506", abstract = "Hardware security is currently a very influential domain, where each year countless works are published concerning attacks against hardware and countermeasures. A significant number of them use machine learning, which is proven to be very effective in \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ganguly:2023:GEI, author = "Amlan Ganguly and Salvatore Monteleone and Diana Goehringer and Cristinel Ababei", title = "{Guest Editors} Introduction: Special Issue on Network-on-Chip Architectures of the Future {(NoCArc)}", journal = j-JETC, volume = "19", number = "3", pages = "19:1--19:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609500", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3609500", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "19", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rout:2023:MSE, author = "Sidhartha Sankar Rout and Mitali Sinha and Sujay Deb", title = "{$2$DMAC}: a Sustainable and Efficient Medium Access Control Mechanism for Future Wireless {NoCs}", journal = j-JETC, volume = "19", number = "3", pages = "20:1--20:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570727", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3570727", abstract = "Wireless Network-on-Chip (WNoC) requires a Medium Access Control (MAC) mechanism for an interference-free sharing of the wireless channel. In traditional MAC, a token is circulated among the Wireless Interfaces (WIs) in a Round Robin manner. The WI with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "20", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ali:2023:CTB, author = "Usman Ali and Sheikh Abdul Rasheed Sahni and Omer Khan", title = "Characterization of Timing-based Software Side-channel Attacks and Mitigations on Network-on-Chip Hardware", journal = j-JETC, volume = "19", number = "3", pages = "21:1--21:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3585519", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3585519", abstract = "Modern network-on-chip (NoC) hardware is an emerging target for side-channel security attacks. A recent work implemented and characterized timing-based software side-channel attacks that target NoC hardware on a real multicore machine. This article \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "21", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Patooghy:2023:SNC, author = "Ahmad Patooghy and Mahdi Hasanzadeh and Amin Sarihi and Mostafa Abdelrehim and Abdel-Hameed A. Badawy", title = "Securing Network-on-chips Against Fault-injection and Crypto-analysis Attacks via Stochastic Anonymous Routing", journal = j-JETC, volume = "19", number = "3", pages = "22:1--22:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592798", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3592798", abstract = "Network-on-chip (NoC) is widely used as an efficient communication architecture in multi-core and many-core System-on-chips (SoCs). However, the shared communication resources in an NoC platform, e.g., channels, buffers, and routers, might be used to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "22", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Reza:2023:MLE, author = "Md Farhadur Reza", title = "Machine Learning Enabled Solutions for Design and Optimization Challenges in Networks-on-Chip based Multi\slash Many-Core Architectures", journal = j-JETC, volume = "19", number = "3", pages = "23:1--23:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3591470", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3591470", abstract = "Due to the advancement of transistor technology, a single chip processor can now have hundreds of cores. Network-on-Chip (NoC) has been the superior interconnect fabric for multi/many-core on-chip systems because of its scalability and parallelism. Due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "23", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Hasan:2023:EEV, author = "Md. Mahfuz {Al Hasan} and Mohammad Tahsin Mostafiz and Thomas An Le and Jake Julia and Nidish Vashistha and Shayan Taheri and Navid Asadizanjani", title = "{EVHA}: Explainable Vision System for Hardware Testing and Assurance --- an Overview", journal = j-JETC, volume = "19", number = "3", pages = "24:1--24:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3590772", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3590772", abstract = "Due to the ever-growing demands for electronic chips in different sectors, semiconductor companies have been mandated to offshore their manufacturing processes. This unwanted matter has made security and trustworthiness of their fabricated chips \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "24", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gaikwad:2023:HIA, author = "Pravin Gaikwad and Jonathan Cruz and Prabuddha Chakraborty and Swarup Bhunia and Tamzidul Hoque", title = "Hardware {IP} Assurance against {Trojan} Attacks with Machine Learning and Post-processing", journal = j-JETC, volume = "19", number = "3", pages = "25:1--25:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592795", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3592795", abstract = "System-on-chip (SoC) developers increasingly rely on pre-verified hardware intellectual property (IP) blocks often acquired from untrusted third-party vendors. These IPs might contain hidden malicious functionalities or hardware Trojans that may \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Densmore:2023:ISI, author = "Douglas Densmore and Nathan J. Hillson and Eric Klavins and Chris Myers and Jean Peccoud and Giovanni Stracquadanio", title = "Introduction to the Special Issue on {BioFoundries} and Cloud Laboratories", journal = j-JETC, volume = "19", number = "3", pages = "26:1--26:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609485", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3609485", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "26", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Winston:2023:VEF, author = "Caleb Winston and Max Willsey and Luis Ceze", title = "Virtualizing Existing Fluidic Programs", journal = j-JETC, volume = "19", number = "3", pages = "27:1--27:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558550", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3558550", abstract = "Fluidic automation, the practice of programmatically manipulating small fluids to execute laboratory protocols, has led to vastly increased productivity for biologists and chemists. Most fluidic programs, commonly referred to as protocols, are written \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "27", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bartley:2023:BOR, author = "Bryan Bartley and Jacob Beal and Miles Rogers and Daniel Bryce and Robert P. Goldman and Benjamin Keller and Peter Lee and Vanessa Biggers and Joshua Nowak and Mark Weston", title = "Building an Open Representation for Biological Protocols", journal = j-JETC, volume = "19", number = "3", pages = "28:1--28:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604568", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:57 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3604568", abstract = "Laboratory protocols are critical to biological research and development, yet difficult to communicate and reproduce across projects, investigators, and organizations. While many attempts have been made to address this challenge, there is currently no \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "28", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Mondal:2023:TGT, author = "Anindan Mondal and Debasish Kalita and Archisman Ghosh and Suchismita Roy and Bibhash Sen", title = "Toward the Generation of Test Vectors for the Detection of Hardware {Trojan} Targeting Effective Switching Activity", journal = j-JETC, volume = "19", number = "4", pages = "29:1--29:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597497", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:58 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3597497", abstract = "Hardware Trojans (HTs) are small circuits intentionally designed by an adversary for harmful purposes. These types of circuits are extremely difficult to detect. An HT often requires some specific signals to activate, which are almost impossible to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "29", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Demirkiran:2023:EPS, author = "Cansu Demirkiran and Furkan Eris and Gongyu Wang and Jonathan Elmhurst and Nick Moore and Nicholas C. Harris and Ayon Basumallik and Vijay Janapa Reddi and Ajay Joshi and Darius Bunandar", title = "An Electro-Photonic System for Accelerating Deep Neural Networks", journal = j-JETC, volume = "19", number = "4", pages = "30:1--30:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606949", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:58 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3606949", abstract = "The number of parameters in deep neural networks (DNNs) is scaling at about 5$ \times $ the rate of Moore's Law. To sustain this growth, photonic computing is a promising avenue, as it enables higher throughput in dominant general matrix-matrix multiplication (. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "30", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kim:2023:ISI, author = "John Kim and Tushar Krishna", title = "Introduction to the Special Issue on Next-Generation On-Chip and Off-Chip Communication Architectures for Edge, Cloud and {HPC}", journal = j-JETC, volume = "19", number = "4", pages = "31:1--31:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3631144", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:58 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3631144", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "31", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Munoz-Martinez:2023:SST, author = "Francisco Mu{\~n}oz-Mart{\'\i}nez and Jos{\'e} L. Abell{\'a}n and Manuel E. Acacio and Tushar Krishna", title = "{STIFT}: a Spatio-Temporal Integrated Folding Tree for Efficient Reductions in Flexible {DNN} Accelerators", journal = j-JETC, volume = "19", number = "4", pages = "32:1--32:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3531011", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:58 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3531011", abstract = "Increasing deployment of Deep Neural Networks (DNNs) recently fueled interest in the development of specific accelerator architectures capable of meeting their stringent performance and energy consumption requirements. DNN accelerators can be organized \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "32", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bhat:2023:SFG, author = "Sachin Bhat and Mingyu Li and Sourabh Kulkarni and Csaba Andras Moritz", title = "{SkyBridge 2.0}: a Fine-grained Vertical {$3$D-IC} Technology for Future {ICs}", journal = j-JETC, volume = "19", number = "4", pages = "33:1--33:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3617501", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:58 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3617501", abstract = "Gate-all-around field effect transistors (FETs) are set to replace FinFETs to enable continued miniaturization of ICs in the deep nanometer regime. IMEC and IRDS roadmaps project that 3D integration of gate-all-around FETs is a key path for the IC \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "33", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Koblah:2023:FOD, author = "David Selasi Koblah and Ulbert J. Botero and Sean P. Costello and Olivia P. Dizon-Paradis and Fatemeh Ganji and Damon L. Woodard and Domenic Forte", title = "A Fast Object Detection-Based Framework for Via Modeling on {PCB} {X}-Ray {CT} Images", journal = j-JETC, volume = "19", number = "4", pages = "34:1--34:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606948", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Dec 4 13:56:58 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3606948", abstract = "For successful printed circuit board (PCB) reverse engineering (RE), the resulting device must retain the physical characteristics and functionality of the original. Although the applications of RE are within the discretion of the executing party, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "34", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Okafor:2024:FSN, author = "Ikenna Okafor and Akshay Krishna Ramanathan and Nagadastagiri Reddy Challapalle and Zheyu Li and Vijaykrishnan Narayanan", title = "Fusing In-storage and Near-storage Acceleration of Convolutional Neural Networks", journal = j-JETC, volume = "20", number = "1", pages = "1:1--1:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3597496", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Feb 3 11:03:44 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3597496", abstract = "Video analytics has a wide range of applications and has attracted much interest over the years. While it can be both computationally and energy-intensive, video analytics can greatly benefit from in/near memory compute. The practice of moving compute \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ahmed:2024:DTR, author = "Soyed Tuhin Ahmed and Mahta Mayahinia and Michael Hefenbrock and Christopher M{\"u}nch and Mehdi B. Tahoori", title = "Design-time Reference Current Generation for Robust Spintronic-based Neuromorphic Architecture", journal = j-JETC, volume = "20", number = "1", pages = "2:1--2:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625556", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Feb 3 11:03:44 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3625556", abstract = "Neural Networks (NN) can be efficiently accelerated in a neuromorphic fabric based on emerging resistive non-volatile memories (NVM), such as Spin Transfer Torque Magnetic RAM (STT-MRAM). Compared to other NVM technologies, STT-MRAM offers many benefits, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Roy:2024:SLA, author = "Sourav Roy and Dipnarayan Das and Bibhash Sen", title = "Secure and Lightweight Authentication Protocol Using {PUF} for the {IoT}-based Wireless Sensor Network", journal = j-JETC, volume = "20", number = "1", pages = "3:1--3:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624477", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Feb 3 11:03:44 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3624477", abstract = "The wireless sensor network (WSN) has been gaining popularity for automation and performance improvement in different IoT-based applications. The resource-constrained nature and operating environment of IoT make the devices highly vulnerable to different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Shafi:2024:RUD, author = "Omais Shafi and Mohammad Khalid Pandit and Amarjeet Saini and Gayathri Ananthanarayanan and Rijurekha Sen", title = "Repercussions of Using {DNN} Compilers on Edge {GPUs} for Real Time and Safety Critical Systems: a Quantitative Audit", journal = j-JETC, volume = "20", number = "1", pages = "4:1--4:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3611016", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Feb 3 11:03:44 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3611016", abstract = "Rapid advancements in edge devices have led to a large deployment of deep neural network (DNN) based workloads. To utilize the resources at the edge effectively, many DNN compilers are proposed that efficiently map the high level DNN models developed in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Nowshin:2024:TEE, author = "Fabiha Nowshin and Hongyu An and Yang Yi", title = "Towards Energy-Efficient Spiking Neural Networks: a Robust Hybrid {CMOS}-Memristive Accelerator", journal = j-JETC, volume = "20", number = "1", pages = "5:1--5:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3635165", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Feb 3 11:03:44 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3635165", abstract = "Spiking Neural Networks (SNNs) are energy-efficient artificial neural network models that can carry out data-intensive applications. Energy consumption, latency, and memory bottleneck are some of the major issues that arise in machine learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Karempudi:2024:AVD, author = "Venkata Sai Praneeth Karempudi and Janibul Bashir and Ishan G. Thakkar", title = "An Analysis of Various Design Pathways Towards Multi-Terabit Photonic On-Interposer Interconnects", journal = j-JETC, volume = "20", number = "2", pages = "6:1--6:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3635031", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 8 06:25:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3635031", abstract = "In the wake of dwindling Moore's Law, to address the rapidly increasing complexity and cost of fabricating large-scale, monolithic systems-on-chip (SoCs), the industry has adopted dis-aggregation as a solution, wherein a large monolithic SoC is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Gonzalez-Guerrero:2024:TPS, author = "Patricia Gonzalez-Guerrero and Kylie Huch and Nirmalendu Patra and Thom Popovici and George Michelogiannakis", title = "Toward Practical Superconducting Accelerators for Machine Learning Using {U-SFQ}", journal = j-JETC, volume = "20", number = "2", pages = "7:1--7:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653073", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jun 8 06:25:18 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3653073", abstract = "Most popular superconducting circuits operate on information carried by ps-wide, $\mu$ V-tall, single flux quantum (SFQ) pulses. These circuits can operate at frequencies of hundreds of GHz with orders of magnitude lower switching energy than complementary-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Tirelli:2024:SBE, author = "Cristian Tirelli and Juan Sapriza and Rub{\'e}n Rodr{\'\i}guez {\'A}lvarez and Lorenzo Ferretti and Beno{\^\i}t Denkinger and Giovanni Ansaloni and Jos{\'e} Miranda Calero and David Atienza and Laura Pozzi", title = "{SAT}-Based Exact Modulo Scheduling Mapping for Resource-Constrained {CGRAs}", journal = j-JETC, volume = "20", number = "3", pages = "8:1--8:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663675", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 27 10:10:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3663675", abstract = "Coarse-Grain Reconfigurable Arrays (CGRAs) represent emerging low-power architectures designed to accelerate Compute-Intensive Loops (CILs). The effectiveness of CGRAs in providing acceleration relies on the quality of mapping: how efficiently the CIL is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bean:2024:PBD, author = "Benjamin Bean and Cyrus Minwalla and Eirini Eleni Tsiropoulou and Jim Plusquellic", title = "{PUF}-based Digital Money with Propagation-of-Provenance and Offline Transfers between Two Parties", journal = j-JETC, volume = "20", number = "3", pages = "9:1--9:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663676", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 27 10:10:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3663676", abstract = "Building on prior concepts of electronic money (eCash), we introduce a digital currency where a physical unclonable function (PUF) engenders devices with the twin properties of being verifiably enrolled as a member of a legitimate set of eCash devices and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Li:2024:SNS, author = "Ziwen Li and Yu Ma and Jindong Zhou and Pingqiang Zhou", title = "{Spiking-NeRF}: Spiking Neural Network for Energy-Efficient Neural Rendering", journal = j-JETC, volume = "20", number = "3", pages = "10:1--10:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3675808", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 27 10:10:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3675808", abstract = "Artificial Neural Networks (ANNs) have achieved remarkable performance in many artificial intelligence tasks. As the application scenarios become more sophisticated, the computation and energy consumption of ANNs are also constantly increasing, which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Amouzegar:2024:GCM, author = "Morteza Amouzegar and Morteza Rezaalipour and Masoud Dehyadegari", title = "Genetic Cache: a Machine Learning Approach to Designing {DRAM} Cache Controllers in {HBM} Systems", journal = j-JETC, volume = "20", number = "3", pages = "11:1--11:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676966", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Tue Aug 27 10:10:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3676966", abstract = "DRAM memory controller plays a critical role in maximizing the performance of high bandwidth memory by efficiently managing data transfers between the CPU and the memory modules. Thus, they are suitable for low-power data-intensive applications. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Weerasena:2024:BCC, author = "Hansika Weerasena and Prabhat Mishra", title = "Breaking On-Chip Communication Anonymity Using Flow Correlation Attacks", journal = j-JETC, volume = "20", number = "4", pages = "12:1--12:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677034", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Dec 7 08:49:06 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3677034", abstract = "Network-on-chip (NoC) is widely used to facilitate communication between components in sophisticated system-on-chip (SoC) designs. Security of the on-chip communication is crucial because exploiting any vulnerability in shared NoC would be a goldmine for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kole:2024:EEN, author = "Abhoy Kole and Kamalika Datta and Indranil Sengupta and Rolf Drechsler", title = "Exploiting the Extended Neighborhood of Hexagonal Qubit Architecture for Mapping Quantum Circuits", journal = j-JETC, volume = "20", number = "4", pages = "13:1--13:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3688391", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Dec 7 08:49:06 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3688391", abstract = "In this work mapping of quantum circuits to regular hexagonal grid with coupling degree of six has been investigated. Architectures involving superconducting qubits impose restrictions on 2-qubit gate operations to be carried out only between physically \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ouyang:2024:FBL, author = "Yiming Ouyang and Shuaijie Yuan and Jianhua Li and Huaguo Liang", title = "{F-Bypass}: a Low-Power Network-on-Chip Design Utilizing Bypass to Improve Network Connectivity", journal = j-JETC, volume = "20", number = "4", pages = "14:1--14:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695874", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Dec 7 08:49:06 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3695874", abstract = "With the development of transistor feature size to nanometer level, static power consumption has gradually become the main factor affecting the overall power consumption of network-on-chip (NoC). Power gating is an effective technology to reduce static \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rakka:2024:HSH, author = "Mariam Rakka and Walaa Amer and Hanning Chen and Mohsen Imani and Fadi Kurdahi", title = "{HDRLPIM}: a Simulator for Hyper-Dimensional Reinforcement Learning Based on Processing In-Memory", journal = j-JETC, volume = "20", number = "4", pages = "15:1--15:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695875", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Dec 7 08:49:06 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", URL = "https://dl.acm.org/doi/10.1145/3695875", abstract = "Processing In-Memory (PIM) is a data-centric computation paradigm that performs computations inside the memory, hence eliminating the memory wall problem in traditional computational paradigms used in Von-Neumann architectures. The associative processor, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "15", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Roy:2025:ACA, author = "Antika Roy and MD Mahfuz {Al Hasan} and Shajib Ghosh and Nitin Varshney and Jake Julia and Reza Forghani and Navid Asadizanjani", title = "Applications and Challenges of {AI} in {PCB} {X}-ray Inspection: a Comprehensive Study", journal = j-JETC, volume = "21", number = "1", pages = "1:1--1:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703457", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jul 26 08:39:23 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As printed circuit boards (PCBs) continue to evolve in complexity and miniaturization, the demand for robust and efficient inspection techniques has become paramount in ensuring the quality and reliability of electronic devices. The application of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Hashemi:2025:SIS, author = "Mona Hashemi and Siamak Mohammadi and Trevor E. Carlson", title = "{SRLL}: Improving Security and Reliability with User-Defined Constraint-Aware Logic Locking", journal = j-JETC, volume = "21", number = "1", pages = "2:1--2:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3709139", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jul 26 08:39:23 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "As chip fabrication costs rise, designers have shifted to a fabless and outsourced development model which opens up the possibility for IP piracy. To address these challenges, logic locking methods modify designs to limit functionality to authorized users \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Lenz:2025:HTD, author = "Jacob N. Lenz and Scott K. Perryman and Dmitro J. Martynowych and David A. Hopper and Sean M. Oliver", title = "Hardware {Trojan} Detection Potential and Limits with the Quantum Diamond Microscope", journal = j-JETC, volume = "21", number = "1", pages = "3:1--3:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711712", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jul 26 08:39:23 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The Quantum Diamond Microscope (QDM) is an instrument with a demonstrated capability to image electrical current in Integrated Circuits (ICs), which shows promise for detection of hardware Trojans. The anomalous current activity caused by hardware Trojans \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Chang:2025:EAT, author = "Shih-Han Chang and Ray-Hong Yen and Chien-Nan Liu", title = "Error-Aware Training for In-{RRAM} Computing Design Considering Non-Ideal Effects in {RRAM} Crossbar Array and Peripheral Circuits", journal = j-JETC, volume = "21", number = "2", pages = "4:1--4:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711830", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jul 26 08:39:23 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "In recent years, Computing In-Memory (CIM) technology has been proposed to solve the bottleneck of data movement in AI edge designs. In-RRAM Computing (IRC) is a popular architecture due to its low leakage current and high density. However, to integrate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Puri:2025:CAS, author = "Amit Puri and John Jose and Venkatesh Tamarapalli", title = "{CosMoS}: Architectural Support for Cost-Effective Data Movement in a Disaggregated Memory Systems", journal = j-JETC, volume = "21", number = "2", pages = "5:1--5:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725218", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jul 26 08:39:23 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Memory disaggregation has emerged as a strong alternative to traditional server systems for improved memory utilization and scalability. The compute nodes with a small local memory are connected to disaggregated memory pools through memory-semantic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Kole:2025:QDE, author = "Abhoy Kole and Mohammed Elkacem Djeridane and Lennart Weingarten and Kamalika Datta and Rolf Drechsler", title = "{qSAT}: Design of an Efficient Quantum Satisfiability Solver for Hardware Equivalence Checking", journal = j-JETC, volume = "21", number = "2", pages = "6:1--6:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3729229", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Sat Jul 26 08:39:23 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "The use of Boolean Satisfiability (SAT) solver for hardware verification incurs exponential runtime in several instances. In this work, we have proposed an efficient quantum SAT (qSAT) solver for equivalence checking of Boolean circuits employing Grover'. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Bhatia:2025:DSD, author = "Swapnil P. Bhatia and Miriam Ramliden and Vibha Rao and Grace Vezeau and Nina Katz-Christy and Seth Tolkamp and Matthew O'Leary and Hannah Jayne and Tyler Rockwood and Rithika Raj Kumar Pradeep and Chaitanya Joshi and Cat Ferrieri and Laurel Provencher and Gabriella Davis and Dasith Perera and Emily Greenwald and James Loomis and Phyllis Gitobu and David Kleiman and Sean Mihm and David Turek and Hyunjun Park", title = "Demonstration of a Scalable {DNA} Computing Platform: Writing and Selection", journal = j-JETC, volume = "21", number = "3", pages = "7:1--7:28", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744562", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Oct 2 14:07:28 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "High value data like historical or health records, seismic or satellite images, particle collision events or original recordings, requires a long-term storage medium that is resilient, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sunkavilli:2025:FSS, author = "Sandeep Sunkavilli and Mashrafi Alam Kajol and Qiaoyan Yu", title = "{S$^2$FAM}: Signal-slowdown-based Fault Attack Mitigation Method for Secure Multi-tenant {FPGA}", journal = j-JETC, volume = "21", number = "3", pages = "8:1--8:25", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3756013", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Oct 2 14:07:28 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Multi-tenant Field-programmable Gate Arrays (FPGAs) in cloud service are vulnerable to remotely exploitable attacks, among which power waster circuit (PWC)-based fault \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Lu:2025:TSC, author = "Sheng Lu and Liuting Shang and Sungyong Jung and Yichen Zhang and Qilian Liang and Chenyun Pan", title = "Technology\slash System Co-Optimization for {FPGA} Using Emerging Reconfigurable Logic Device", journal = j-JETC, volume = "21", number = "3", pages = "9:1--9:21", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3750730", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Oct 2 14:07:28 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Reconfigurable devices are gaining increasing attention as a viable alternative and supplementary solution to the traditional CMOS technology. In this article, we develop a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ghimire:2025:GFU, author = "Ashutosh Ghimire and Mohammed Alkurdi and Md Tauhidur Rahman and Saraju Mohanty and Fathi Amsaad", title = "A Golden-Free Unsupervised {ML}-Assisted Security Approach for Detection of {IC} Hardware {Trojans}", journal = j-JETC, volume = "21", number = "3", pages = "10:1--10:22", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748652", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Thu Oct 2 14:07:28 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Hardware Trojans are deliberate malicious hardware modifications inserted in semiconductor Integrated Circuits (ICs) for the purpose of stealing or leaking sensitive information, as well as disrupting critical systems upon activation. Emerging hardware security research highlights the criticality of employing AI for effective detection within the semiconductor IC supply chain. The efficient detection of these malicious Trojan circuits is of utmost significance, as it holds paramount importance in cultivating trust within the semiconductor IC supply chain. However, prevailing detection methodologies, predominantly reliant on Side-Channel Analysis (SCA), often necessitate the utilization of golden chips for validation. This article heralds a new era in hardware Trojan detection, harnessing the prowess of unsupervised machine learning in conjunction with SCA to eliminate the need for golden data. Employing unsupervised clustering, the methodology not only showcased a superior false-positive rate but also demonstrated a comparable accuracy level when compared to supervised counterparts. Notably, the proposed model exhibited an impressive accuracy rate of 93\%, particularly excelling in pinpointing diminutive Trojans triggered by concise events, surpassing the capabilities of preceding techniques. In conclusion, this research advances a paradigm in hardware Trojan detection, emphasizing its potential in enhancing the integrity of semiconductor IC supply chains.", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Rizvee:2025:PHB, author = "Md Mashfiq Rizvee and Fairuz Shadmani Shishir and Tanvir Hossain and Tamzidul Hoque and Domenic Forte and Sumaiya Shomaji", title = "A Persistent Hierarchical {Bloom} Filter-based Framework for Scalable Authentication and Tracking of {ICs}", journal = j-JETC, volume = "21", number = "4", pages = "11:1--11:22", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748650", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 20 07:25:20 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Due to the reliance on untrusted supply chain entities, tracking and authentication of Integrated Circuits (ICs) has become crucial to prevent the rapid proliferation of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "11", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Awal:2025:ILV, author = "Md Sadik Awal and Md Tauhidur Rahman", title = "Impedance Leakage Vulnerability and Its Utilization in Reverse-Engineering Embedded Software", journal = j-JETC, volume = "21", number = "4", pages = "12:1--12:20", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3764931", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 20 07:25:20 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Discovering new vulnerabilities and implementing security and privacy measures are important to protect systems and data against physical attacks. One such vulnerability is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "12", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Sun:2025:SSH, author = "Haodong Sun and Zhi Yang and Shuyuan Jin and Zhenlong Zhang", title = "{SHIFT}: Selective Hardware Information Flow Tracking Driven by Deterministic Constraints", journal = j-JETC, volume = "21", number = "4", pages = "13:1--13:16", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765906", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 20 07:25:20 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "Information flow tracking technology is commonly used in the security analysis of hardware design. This technology protects the confidentiality and integrity of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "13", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Deshmukh:2025:ATC, author = "Shreyas Deshmukh and Raghav Singhal and Shruti Landge and Vivek Saraswat and Anmol Biswas and Abhishek Kadam and Ajay K. Singh and Sreenivas Subramoney and Laxmeesha Somappa and Maryam Shojaei Baghini and Udayan Ganguly", title = "Analog and Temporary On-chip Memory for {ANN} Training and Inference", journal = j-JETC, volume = "21", number = "4", pages = "14:1--14:18", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765899", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Mon Oct 20 07:25:20 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", abstract = "On-chip training at the edge becomes a primary requisite for real-time and security-sensitive artificial neural network (ANN) applications. In-memory computation (IMC) techniques \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "14", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Pilato:2026:ISI, author = "Christian Pilato and Francesca Palumbo", title = "Introduction to the Special Issue on Designing Cyber-Physical Systems-From Concepts to Implementation", journal = j-JETC, volume = "22", number = "1", pages = "1:1--1:3", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3771084", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "1", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Khelifati:2026:CCS, author = "Adel Khelifati and Malika Boukala-Ioualalen and Ahmed Hammad", title = "Construction of Consistent {SysML} Models Applied to the {CPS}", journal = j-JETC, volume = "22", number = "1", pages = "2:1--2:23", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3702326", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "2", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Banerjee:2026:TCS, author = "Ayan Banerjee and Aranyak Maity and Imane Lamrani and Sandeep K. S. Gupta", title = "Towards Certified Safe Personalization in Learning-Enabled Human-in-the-loop Human-in-the-plant Systems", journal = j-JETC, volume = "22", number = "1", pages = "3:1--3:27", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3736766", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "3", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Ishak:2026:EEP, author = "Md Ishak and Mohammed Alawad", title = "Energy-Efficient Probabilistic {Bayesian} Neural Networks for Resource-Constrained Environments", journal = j-JETC, volume = "22", number = "1", pages = "4:1--4:21", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3748651", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "4", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Pandey:2026:CCD, author = "Suraj Kumar Pandey and Shivashankar B. Nair", title = "{ChaoticImmuneNet}: a Chaos-driven Immunity Inspired Neural Network Paradigm for Embodied Intelligence in Resource-Constrained Devices", journal = j-JETC, volume = "22", number = "1", pages = "5:1--5:22", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3764930", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "5", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Vega:2026:SPE, author = "Christopher Vega", title = "{SPLIT PUF}: Efficient {PUF} Implementation Using Underutilized {FPGA} Resource", journal = j-JETC, volume = "22", number = "1", pages = "6:1--6:18", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3725533", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "6", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Dobkin:2026:ESU, author = "Daniel Dobkin and Edut Katz and David Popovtzer and Itamar Levi", title = "{EMI} Shielding for Use in Side-Channel Security: Analysis, Simulation, and Measurements", journal = j-JETC, volume = "22", number = "1", pages = "7:1--7:22", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3748508", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "7", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Padhan:2026:DFD, author = "Sushree Padhan and Ashok Kumar Turuk", title = "Design of False Data Injection Attacks in a Cyber-Physical System Using {Gaussian} Distribution", journal = j-JETC, volume = "22", number = "1", pages = "8:1--8:25", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3764929", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "8", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Pawar:2026:EBS, author = "Manjula K. Pawar and Prakashgoud Patil and P. S. Hiremath", title = "Enhancing Blockchain Scalability using Off-Chain and Machine Learning Techniques", journal = j-JETC, volume = "22", number = "1", pages = "9:1--9:21", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3757743", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "9", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{deBrito:2026:RBR, author = "Maiquel de Brito", title = "{ROS-BDI} Robots: an Agent-Based Approach for Programming the Behaviour of Autonomous Robots", journal = j-JETC, volume = "22", number = "1", pages = "10:1--10:26", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3765618", ISSN = "1550-4832 (print), 1550-4840 (electronic)", ISSN-L = "1550-4832", bibdate = "Wed Jan 14 06:36:51 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib", acknowledgement = ack-nhfb, ajournal = "ACM J. Emerg. Technol. Comput. Syst.", articleno = "10", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", }